Merge branch 'develop' into fix-baidu-ku6

2025-02-11 12:42:29 +03:00 · 2016-05-25 16:38:48 -04:00 · 2016-05-25 16:38:48 -04:00 · 0791f566db
commit 0791f566db
parent 00d272b3d9 5f99e1dfd3
10 changed files with 158 additions and 4 deletions
--- a/README.md
+++ b/README.md
@ -360,6 +360,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
 | PPTV聚力 | <http://www.pptv.com/>         |✓| | |
 | 齐鲁网   | <http://v.iqilu.com/>          |✓| | |
 | QQ<br/>腾讯视频 | <http://v.qq.com/>      |✓| | |
+| 企鹅直播 | <http://live.qq.com/>          |✓| | |
 | 阡陌视频 | <http://qianmo.com/>           |✓| | |
 | THVideo  | <http://thvideo.tv/>           |✓| | |
 | Sina<br/>新浪视频<br/>微博秒拍视频 | <http://video.sina.com.cn/><br/><http://video.weibo.com/> |✓| | |
@ -373,6 +374,8 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
 | 战旗TV   | <http://www.zhanqi.tv/lives>   |✓| | |
 | 央视网   | <http://www.cntv.cn/>          |✓| | |
 | 花瓣     | <http://huaban.com/>           | |✓| |
+| Naver<br/>네이버 | <http://tvcast.naver.com/>     |✓| | |
+| 芒果TV   | <http://www.mgtv.com/>         |✓| | |

 For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page.

--- a/src/you_get/common.py
+++ b/src/you_get/common.py
@ -52,6 +52,7 @@ SITES = {
    'mixcloud'         : 'mixcloud',
    'mtv81'            : 'mtv81',
    'musicplayon'      : 'musicplayon',
+    'naver'            : 'naver',
    '7gogo'            : 'nanagogo',
    'nicovideo'        : 'nicovideo',
    'panda'            : 'panda',
--- a/src/you_get/extractors/init.py
+++ b/src/you_get/extractors/init.py
@ -45,6 +45,7 @@ from .mixcloud import *
 from .mtv81 import *
 from .musicplayon import *
 from .nanagogo import *
+from .naver import *
 from .netease import *
 from .nicovideo import *
 from .panda import *
@ -52,6 +53,7 @@ from .pinterest import *
 from .pixnet import *
 from .pptv import *
 from .qianmo import *
+from .qie import *
 from .qq import *
 from .sina import *
 from .sohu import *
--- a/src/you_get/extractors/embed.py
+++ b/src/you_get/extractors/embed.py
@ -8,6 +8,7 @@ from .netease import netease_download
 from .qq import qq_download_by_vid
 from .sina import sina_download_by_vid
 from .tudou import tudou_download_by_id
+from .vimeo import vimeo_download_by_id
 from .yinyuetai import yinyuetai_download_by_id
 from .youku import youku_download_by_vid

@ -39,6 +40,9 @@ iqiyi_embed_patterns = [ 'player\.video\.qiyi\.com/([^/]+)/[^/]+/[^/]+/[^/]+\.sw

 netease_embed_patterns = [ '(http://\w+\.163\.com/movie/[^\'"]+)' ]

+vimeo_embed_patters = [ 'player\.vimeo\.com/video/(\d+)' ]
+
+
 def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwargs):
    content = get_content(url, headers=fake_headers)
    found = False
@ -69,6 +73,11 @@ def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwa
        found = True
        netease_download(url, title=title, output_dir=output_dir, merge=merge, info_only=info_only)

+    urls = matchall(content, vimeo_embed_patters)
+    for url in urls:
+        found = True
+        vimeo_download_by_id(url, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
+
    if not found:
        raise NotImplementedError(url)

--- a/src/you_get/extractors/naver.py
+++ b/src/you_get/extractors/naver.py
@ -0,0 +1,48 @@
+#!/usr/bin/env python
+
+__all__ = ['naver_download']
+import urllib.request, urllib.parse
+from ..common import *
+
+def naver_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
+
+	assert re.search(r'http://tvcast.naver.com/v/', url), "URL is not supported"
+
+	html = get_html(url)
+	contentid = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',html)
+	videoid = contentid.group(1)
+	inkey = contentid.group(2)
+	assert videoid
+	assert inkey
+	info_key = urllib.parse.urlencode({'vid': videoid, 'inKey': inkey, })
+	down_key = urllib.parse.urlencode({'masterVid': videoid,'protocol': 'p2p','inKey': inkey, })
+	inf_xml = get_html('http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?%s' % info_key )
+
+	from xml.dom.minidom import parseString
+	doc_info = parseString(inf_xml)
+	Subject = doc_info.getElementsByTagName('Subject')[0].firstChild
+	title = Subject.data
+	assert title
+
+	xml = get_html('http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?%s' % down_key )
+	doc = parseString(xml)
+
+	encodingoptions = doc.getElementsByTagName('EncodingOption')
+	old_height = doc.getElementsByTagName('height')[0]
+	real_url= ''
+	#to download the highest resolution one,
+	for node in encodingoptions:
+		new_height = node.getElementsByTagName('height')[0]
+		domain_node = node.getElementsByTagName('Domain')[0]
+		uri_node = node.getElementsByTagName('uri')[0]
+		if int(new_height.firstChild.data) > int (old_height.firstChild.data):
+			real_url= domain_node.firstChild.data+ '/' +uri_node.firstChild.data
+
+	type, ext, size = url_info(real_url)
+	print_info(site_info, title, type, size)
+	if not info_only:
+		download_urls([real_url], title, ext, size, output_dir, merge = merge)
+
+site_info = "tvcast.naver.com"
+download = naver_download
+download_playlist = playlist_not_supported('naver')
--- a/src/you_get/extractors/qie.py
+++ b/src/you_get/extractors/qie.py
@ -0,0 +1,78 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from ..common import *
+from ..extractor import VideoExtractor
+
+from json import loads
+
+class QiE(VideoExtractor):
+    name = "QiE （企鹅直播）"
+
+    # Last updated: 2015-11-24
+    stream_types = [
+        {'id': 'normal', 'container': 'flv', 'video_profile': '标清'},
+        {'id': 'middle', 'container': 'flv', 'video_profile': '550'},
+        {'id': 'middle2', 'container': 'flv', 'video_profile': '900'},
+    ]
+    
+    id_dic = {i['video_profile']:(i['id']) for i in stream_types}
+    
+    api_endpoint = 'http://www.qie.tv/api/v1/room/{room_id}'
+
+    @staticmethod
+    def get_vid_from_url(url):
+        """Extracts video ID from live.qq.com.
+        """
+        html = get_content(url)
+        return match1(html, r'room_id\":(\d+)')
+
+    def download_playlist_by_url(self, url, **kwargs):
+        pass
+
+    def prepare(self, **kwargs):
+        if self.url:
+            self.vid = self.get_vid_from_url(self.url)
+        
+        content = get_content(self.api_endpoint.format(room_id = self.vid))
+        content = loads(content)
+        self.title = content['data']['room_name']
+        rtmp_url =  content['data']['rtmp_url']
+        #stream_avalable = [i['name'] for i in content['data']['stream']]
+        stream_available = {}
+        stream_available['normal'] = rtmp_url + '/' + content['data']['rtmp_live']
+        if len(content['data']['rtmp_multi_bitrate']) > 0:
+            for k , v in content['data']['rtmp_multi_bitrate'].items():
+                stream_available[k] = rtmp_url + '/' + v
+        
+        for s in self.stream_types:
+            if s['id'] in stream_available.keys():
+                quality_id = s['id']
+                url = stream_available[quality_id]
+                self.streams[quality_id] = {
+                    'container': 'flv',
+                    'video_profile': s['video_profile'],
+                    'size': 0,
+                    'url': url
+                }
+
+    def extract(self, **kwargs):
+        for i in self.streams:
+            s = self.streams[i]
+            s['src'] = [s['url']]
+        if 'stream_id' in kwargs and kwargs['stream_id']:
+            # Extract the stream
+            stream_id = kwargs['stream_id']
+
+            if stream_id not in self.streams:
+                log.e('[Error] Invalid video format.')
+                log.e('Run \'-i\' command with no specific video format to view all available formats.')
+                exit(2)
+        else:
+            # Extract stream with the best quality
+            stream_id = self.streams_sorted[0]['id']
+            s['src'] = [s['url']]
+
+site = QiE()
+download = site.download_by_url
+download_playlist = playlist_not_supported('QiE')
--- a/src/you_get/extractors/qq.py
+++ b/src/you_get/extractors/qq.py
@ -3,6 +3,7 @@
 __all__ = ['qq_download']

 from ..common import *
+from .qie import download as qieDownload

 def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):
    api = "http://h5vv.video.qq.com/getinfo?otype=json&platform=10901&vid=%s" % vid
@ -34,6 +35,9 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
        vid = match1(content, r'vid\s*=\s*"\s*([^"]+)"')
        title = match1(content, r'title">([^"]+)</p>')
        title = title.strip() if title else vid
+    elif 'live.qq.com' in url:
+        qieDownload(url,output_dir=output_dir, merge=merge, info_only=info_only)
+        exit()
    elif 'iframe/player.html' in url:
        vid = match1(url, r'\bvid=(\w+)')
        # for embedded URLs; don't know what the title is
--- a/src/you_get/extractors/twitter.py
+++ b/src/you_get/extractors/twitter.py
@ -5,6 +5,13 @@ __all__ = ['twitter_download']
 from ..common import *
 from .vine import vine_download

+def extract_m3u(source):
+    r1 = get_content(source)
+    s1 = re.findall(r'(/ext_tw_video/.*)', r1)
+    r2 = get_content('https://video.twimg.com%s' % s1[-1])
+    s2 = re.findall(r'(/ext_tw_video/.*)', r2)
+    return ['https://video.twimg.com%s' % i for i in s2]
+
 def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
    html = get_html(url)
    screen_name = r1(r'data-screen-name="([^"]*)"', html) or \
@ -63,11 +70,13 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs)
            source = r1(r'<MediaFile>\s*<!\[CDATA\[(.*)\]\]>', vmap)
            if not item_id: page_title = i['tweet_id']

-        mime, ext, size = url_info(source)
+        urls = extract_m3u(source)
+        size = urls_size(urls)
+        mime, ext = 'video/mp4', 'mp4'

        print_info(site_info, page_title, mime, size)
        if not info_only:
-            download_urls([source], page_title, ext, size, output_dir, merge=merge)
+            download_urls(urls, page_title, ext, size, output_dir, merge=merge)

 site_info = "Twitter.com"
 download = twitter_download
--- a/src/you_get/processor/ffmpeg.py
+++ b/src/you_get/processor/ffmpeg.py
@ -169,7 +169,7 @@ def ffmpeg_concat_mp4_to_mp4(files, output='output.mp4'):

        params = [FFMPEG] + LOGLEVEL + ['-f', 'concat', '-safe', '-1', '-y', '-i']
        params.append(output + '.txt')
-        params += ['-c', 'copy', output]
+        params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc', output]

        subprocess.check_call(params)
        os.remove(output + '.txt')
--- a/src/you_get/version.py
+++ b/src/you_get/version.py
@ -1,4 +1,4 @@
 #!/usr/bin/env python

 script_name = 'you-get'
-__version__ = '0.4.390'
+__version__ = '0.4.424'