diff --git a/README.md b/README.md index a4f4fcd9..60cb125a 100644 --- a/README.md +++ b/README.md @@ -360,6 +360,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the | PPTV聚力 | |✓| | | | 齐鲁网 | |✓| | | | QQ
腾讯视频 | |✓| | | +| 企鹅直播 | |✓| | | | 阡陌视频 | |✓| | | | THVideo | |✓| | | | Sina
新浪视频
微博秒拍视频 |
|✓| | | @@ -373,6 +374,8 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the | 战旗TV | |✓| | | | 央视网 | |✓| | | | 花瓣 | | |✓| | +| Naver
네이버 | |✓| | | +| 芒果TV | |✓| | | For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page. diff --git a/src/you_get/common.py b/src/you_get/common.py index 6c65bd49..9668df58 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -52,6 +52,7 @@ SITES = { 'mixcloud' : 'mixcloud', 'mtv81' : 'mtv81', 'musicplayon' : 'musicplayon', + 'naver' : 'naver', '7gogo' : 'nanagogo', 'nicovideo' : 'nicovideo', 'panda' : 'panda', diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index 1bb7a7ab..20a7f7cf 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -45,6 +45,7 @@ from .mixcloud import * from .mtv81 import * from .musicplayon import * from .nanagogo import * +from .naver import * from .netease import * from .nicovideo import * from .panda import * @@ -52,6 +53,7 @@ from .pinterest import * from .pixnet import * from .pptv import * from .qianmo import * +from .qie import * from .qq import * from .sina import * from .sohu import * diff --git a/src/you_get/extractors/embed.py b/src/you_get/extractors/embed.py index fd463c92..a177e663 100644 --- a/src/you_get/extractors/embed.py +++ b/src/you_get/extractors/embed.py @@ -8,6 +8,7 @@ from .netease import netease_download from .qq import qq_download_by_vid from .sina import sina_download_by_vid from .tudou import tudou_download_by_id +from .vimeo import vimeo_download_by_id from .yinyuetai import yinyuetai_download_by_id from .youku import youku_download_by_vid @@ -39,6 +40,9 @@ iqiyi_embed_patterns = [ 'player\.video\.qiyi\.com/([^/]+)/[^/]+/[^/]+/[^/]+\.sw netease_embed_patterns = [ '(http://\w+\.163\.com/movie/[^\'"]+)' ] +vimeo_embed_patters = [ 'player\.vimeo\.com/video/(\d+)' ] + + def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwargs): content = get_content(url, headers=fake_headers) found = False @@ -69,6 +73,11 @@ def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwa found = True netease_download(url, title=title, output_dir=output_dir, merge=merge, info_only=info_only) + urls = matchall(content, vimeo_embed_patters) + for url in urls: + found = True + vimeo_download_by_id(url, title=title, output_dir=output_dir, merge=merge, info_only=info_only) + if not found: raise NotImplementedError(url) diff --git a/src/you_get/extractors/naver.py b/src/you_get/extractors/naver.py new file mode 100644 index 00000000..7f7d5548 --- /dev/null +++ b/src/you_get/extractors/naver.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python + +__all__ = ['naver_download'] +import urllib.request, urllib.parse +from ..common import * + +def naver_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): + + assert re.search(r'http://tvcast.naver.com/v/', url), "URL is not supported" + + html = get_html(url) + contentid = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',html) + videoid = contentid.group(1) + inkey = contentid.group(2) + assert videoid + assert inkey + info_key = urllib.parse.urlencode({'vid': videoid, 'inKey': inkey, }) + down_key = urllib.parse.urlencode({'masterVid': videoid,'protocol': 'p2p','inKey': inkey, }) + inf_xml = get_html('http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?%s' % info_key ) + + from xml.dom.minidom import parseString + doc_info = parseString(inf_xml) + Subject = doc_info.getElementsByTagName('Subject')[0].firstChild + title = Subject.data + assert title + + xml = get_html('http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?%s' % down_key ) + doc = parseString(xml) + + encodingoptions = doc.getElementsByTagName('EncodingOption') + old_height = doc.getElementsByTagName('height')[0] + real_url= '' + #to download the highest resolution one, + for node in encodingoptions: + new_height = node.getElementsByTagName('height')[0] + domain_node = node.getElementsByTagName('Domain')[0] + uri_node = node.getElementsByTagName('uri')[0] + if int(new_height.firstChild.data) > int (old_height.firstChild.data): + real_url= domain_node.firstChild.data+ '/' +uri_node.firstChild.data + + type, ext, size = url_info(real_url) + print_info(site_info, title, type, size) + if not info_only: + download_urls([real_url], title, ext, size, output_dir, merge = merge) + +site_info = "tvcast.naver.com" +download = naver_download +download_playlist = playlist_not_supported('naver') diff --git a/src/you_get/extractors/qie.py b/src/you_get/extractors/qie.py new file mode 100644 index 00000000..2288106a --- /dev/null +++ b/src/you_get/extractors/qie.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from ..common import * +from ..extractor import VideoExtractor + +from json import loads + +class QiE(VideoExtractor): + name = "QiE (企鹅直播)" + + # Last updated: 2015-11-24 + stream_types = [ + {'id': 'normal', 'container': 'flv', 'video_profile': '标清'}, + {'id': 'middle', 'container': 'flv', 'video_profile': '550'}, + {'id': 'middle2', 'container': 'flv', 'video_profile': '900'}, + ] + + id_dic = {i['video_profile']:(i['id']) for i in stream_types} + + api_endpoint = 'http://www.qie.tv/api/v1/room/{room_id}' + + @staticmethod + def get_vid_from_url(url): + """Extracts video ID from live.qq.com. + """ + html = get_content(url) + return match1(html, r'room_id\":(\d+)') + + def download_playlist_by_url(self, url, **kwargs): + pass + + def prepare(self, **kwargs): + if self.url: + self.vid = self.get_vid_from_url(self.url) + + content = get_content(self.api_endpoint.format(room_id = self.vid)) + content = loads(content) + self.title = content['data']['room_name'] + rtmp_url = content['data']['rtmp_url'] + #stream_avalable = [i['name'] for i in content['data']['stream']] + stream_available = {} + stream_available['normal'] = rtmp_url + '/' + content['data']['rtmp_live'] + if len(content['data']['rtmp_multi_bitrate']) > 0: + for k , v in content['data']['rtmp_multi_bitrate'].items(): + stream_available[k] = rtmp_url + '/' + v + + for s in self.stream_types: + if s['id'] in stream_available.keys(): + quality_id = s['id'] + url = stream_available[quality_id] + self.streams[quality_id] = { + 'container': 'flv', + 'video_profile': s['video_profile'], + 'size': 0, + 'url': url + } + + def extract(self, **kwargs): + for i in self.streams: + s = self.streams[i] + s['src'] = [s['url']] + if 'stream_id' in kwargs and kwargs['stream_id']: + # Extract the stream + stream_id = kwargs['stream_id'] + + if stream_id not in self.streams: + log.e('[Error] Invalid video format.') + log.e('Run \'-i\' command with no specific video format to view all available formats.') + exit(2) + else: + # Extract stream with the best quality + stream_id = self.streams_sorted[0]['id'] + s['src'] = [s['url']] + +site = QiE() +download = site.download_by_url +download_playlist = playlist_not_supported('QiE') \ No newline at end of file diff --git a/src/you_get/extractors/qq.py b/src/you_get/extractors/qq.py index ebe1a9ad..cb4aeebf 100644 --- a/src/you_get/extractors/qq.py +++ b/src/you_get/extractors/qq.py @@ -3,6 +3,7 @@ __all__ = ['qq_download'] from ..common import * +from .qie import download as qieDownload def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False): api = "http://h5vv.video.qq.com/getinfo?otype=json&platform=10901&vid=%s" % vid @@ -34,6 +35,9 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs): vid = match1(content, r'vid\s*=\s*"\s*([^"]+)"') title = match1(content, r'title">([^"]+)

') title = title.strip() if title else vid + elif 'live.qq.com' in url: + qieDownload(url,output_dir=output_dir, merge=merge, info_only=info_only) + exit() elif 'iframe/player.html' in url: vid = match1(url, r'\bvid=(\w+)') # for embedded URLs; don't know what the title is diff --git a/src/you_get/extractors/twitter.py b/src/you_get/extractors/twitter.py index 251cb8d5..b0c89189 100644 --- a/src/you_get/extractors/twitter.py +++ b/src/you_get/extractors/twitter.py @@ -5,6 +5,13 @@ __all__ = ['twitter_download'] from ..common import * from .vine import vine_download +def extract_m3u(source): + r1 = get_content(source) + s1 = re.findall(r'(/ext_tw_video/.*)', r1) + r2 = get_content('https://video.twimg.com%s' % s1[-1]) + s2 = re.findall(r'(/ext_tw_video/.*)', r2) + return ['https://video.twimg.com%s' % i for i in s2] + def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = get_html(url) screen_name = r1(r'data-screen-name="([^"]*)"', html) or \ @@ -63,11 +70,13 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs) source = r1(r'\s*', vmap) if not item_id: page_title = i['tweet_id'] - mime, ext, size = url_info(source) + urls = extract_m3u(source) + size = urls_size(urls) + mime, ext = 'video/mp4', 'mp4' print_info(site_info, page_title, mime, size) if not info_only: - download_urls([source], page_title, ext, size, output_dir, merge=merge) + download_urls(urls, page_title, ext, size, output_dir, merge=merge) site_info = "Twitter.com" download = twitter_download diff --git a/src/you_get/processor/ffmpeg.py b/src/you_get/processor/ffmpeg.py index 320eb642..e7ee35d6 100644 --- a/src/you_get/processor/ffmpeg.py +++ b/src/you_get/processor/ffmpeg.py @@ -169,7 +169,7 @@ def ffmpeg_concat_mp4_to_mp4(files, output='output.mp4'): params = [FFMPEG] + LOGLEVEL + ['-f', 'concat', '-safe', '-1', '-y', '-i'] params.append(output + '.txt') - params += ['-c', 'copy', output] + params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc', output] subprocess.check_call(params) os.remove(output + '.txt') diff --git a/src/you_get/version.py b/src/you_get/version.py index 0e7b6632..5a9d5581 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.390' +__version__ = '0.4.424'