From 677040a39ef448f8ccf3f4c8056642b6a9a9d585 Mon Sep 17 00:00:00 2001 From: NickeyKim Date: Thu, 19 May 2016 14:48:45 +0900 Subject: [PATCH 1/9] add Naver support add Naver support --- README.md | 1 + src/you_get/common.py | 1 + src/you_get/extractors/__init__.py | 1 + src/you_get/extractors/naver.py | 48 ++++++++++++++++++++++++++++++ 4 files changed, 51 insertions(+) create mode 100644 src/you_get/extractors/naver.py diff --git a/README.md b/README.md index a4f4fcd9..d0a91aa8 100644 --- a/README.md +++ b/README.md @@ -373,6 +373,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the | 战旗TV | |✓| | | | 央视网 | |✓| | | | 花瓣 | | |✓| | +| Naver
네이버 | |✓| | | For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page. diff --git a/src/you_get/common.py b/src/you_get/common.py index 6c65bd49..9668df58 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -52,6 +52,7 @@ SITES = { 'mixcloud' : 'mixcloud', 'mtv81' : 'mtv81', 'musicplayon' : 'musicplayon', + 'naver' : 'naver', '7gogo' : 'nanagogo', 'nicovideo' : 'nicovideo', 'panda' : 'panda', diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index 1bb7a7ab..6fdaa340 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -45,6 +45,7 @@ from .mixcloud import * from .mtv81 import * from .musicplayon import * from .nanagogo import * +from .naver import * from .netease import * from .nicovideo import * from .panda import * diff --git a/src/you_get/extractors/naver.py b/src/you_get/extractors/naver.py new file mode 100644 index 00000000..7f7d5548 --- /dev/null +++ b/src/you_get/extractors/naver.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python + +__all__ = ['naver_download'] +import urllib.request, urllib.parse +from ..common import * + +def naver_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): + + assert re.search(r'http://tvcast.naver.com/v/', url), "URL is not supported" + + html = get_html(url) + contentid = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',html) + videoid = contentid.group(1) + inkey = contentid.group(2) + assert videoid + assert inkey + info_key = urllib.parse.urlencode({'vid': videoid, 'inKey': inkey, }) + down_key = urllib.parse.urlencode({'masterVid': videoid,'protocol': 'p2p','inKey': inkey, }) + inf_xml = get_html('http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?%s' % info_key ) + + from xml.dom.minidom import parseString + doc_info = parseString(inf_xml) + Subject = doc_info.getElementsByTagName('Subject')[0].firstChild + title = Subject.data + assert title + + xml = get_html('http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?%s' % down_key ) + doc = parseString(xml) + + encodingoptions = doc.getElementsByTagName('EncodingOption') + old_height = doc.getElementsByTagName('height')[0] + real_url= '' + #to download the highest resolution one, + for node in encodingoptions: + new_height = node.getElementsByTagName('height')[0] + domain_node = node.getElementsByTagName('Domain')[0] + uri_node = node.getElementsByTagName('uri')[0] + if int(new_height.firstChild.data) > int (old_height.firstChild.data): + real_url= domain_node.firstChild.data+ '/' +uri_node.firstChild.data + + type, ext, size = url_info(real_url) + print_info(site_info, title, type, size) + if not info_only: + download_urls([real_url], title, ext, size, output_dir, merge = merge) + +site_info = "tvcast.naver.com" +download = naver_download +download_playlist = playlist_not_supported('naver') From e6af1c6265be5a45f978f3321db20cfe9cb30a05 Mon Sep 17 00:00:00 2001 From: jmargeta Date: Thu, 19 May 2016 22:57:48 +0200 Subject: [PATCH 2/9] Support for embedded Vimeo videos Example URL: - http://www.miracletutorials.com/howto-embed-vimeo/ --- src/you_get/extractors/embed.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/you_get/extractors/embed.py b/src/you_get/extractors/embed.py index fd463c92..a177e663 100644 --- a/src/you_get/extractors/embed.py +++ b/src/you_get/extractors/embed.py @@ -8,6 +8,7 @@ from .netease import netease_download from .qq import qq_download_by_vid from .sina import sina_download_by_vid from .tudou import tudou_download_by_id +from .vimeo import vimeo_download_by_id from .yinyuetai import yinyuetai_download_by_id from .youku import youku_download_by_vid @@ -39,6 +40,9 @@ iqiyi_embed_patterns = [ 'player\.video\.qiyi\.com/([^/]+)/[^/]+/[^/]+/[^/]+\.sw netease_embed_patterns = [ '(http://\w+\.163\.com/movie/[^\'"]+)' ] +vimeo_embed_patters = [ 'player\.vimeo\.com/video/(\d+)' ] + + def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwargs): content = get_content(url, headers=fake_headers) found = False @@ -69,6 +73,11 @@ def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwa found = True netease_download(url, title=title, output_dir=output_dir, merge=merge, info_only=info_only) + urls = matchall(content, vimeo_embed_patters) + for url in urls: + found = True + vimeo_download_by_id(url, title=title, output_dir=output_dir, merge=merge, info_only=info_only) + if not found: raise NotImplementedError(url) From 2cd4656b32c1005edebf3047ba7f1f7470abfbf0 Mon Sep 17 00:00:00 2001 From: David Zhuang Date: Fri, 20 May 2016 04:28:30 -0400 Subject: [PATCH 3/9] [QiE]Add Support --- src/you_get/extractors/__init__.py | 1 + src/you_get/extractors/qie.py | 78 ++++++++++++++++++++++++++++++ src/you_get/extractors/qq.py | 4 ++ 3 files changed, 83 insertions(+) create mode 100644 src/you_get/extractors/qie.py diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index 1bb7a7ab..e2cf656c 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -52,6 +52,7 @@ from .pinterest import * from .pixnet import * from .pptv import * from .qianmo import * +from .qie import * from .qq import * from .sina import * from .sohu import * diff --git a/src/you_get/extractors/qie.py b/src/you_get/extractors/qie.py new file mode 100644 index 00000000..2288106a --- /dev/null +++ b/src/you_get/extractors/qie.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from ..common import * +from ..extractor import VideoExtractor + +from json import loads + +class QiE(VideoExtractor): + name = "QiE (企鹅直播)" + + # Last updated: 2015-11-24 + stream_types = [ + {'id': 'normal', 'container': 'flv', 'video_profile': '标清'}, + {'id': 'middle', 'container': 'flv', 'video_profile': '550'}, + {'id': 'middle2', 'container': 'flv', 'video_profile': '900'}, + ] + + id_dic = {i['video_profile']:(i['id']) for i in stream_types} + + api_endpoint = 'http://www.qie.tv/api/v1/room/{room_id}' + + @staticmethod + def get_vid_from_url(url): + """Extracts video ID from live.qq.com. + """ + html = get_content(url) + return match1(html, r'room_id\":(\d+)') + + def download_playlist_by_url(self, url, **kwargs): + pass + + def prepare(self, **kwargs): + if self.url: + self.vid = self.get_vid_from_url(self.url) + + content = get_content(self.api_endpoint.format(room_id = self.vid)) + content = loads(content) + self.title = content['data']['room_name'] + rtmp_url = content['data']['rtmp_url'] + #stream_avalable = [i['name'] for i in content['data']['stream']] + stream_available = {} + stream_available['normal'] = rtmp_url + '/' + content['data']['rtmp_live'] + if len(content['data']['rtmp_multi_bitrate']) > 0: + for k , v in content['data']['rtmp_multi_bitrate'].items(): + stream_available[k] = rtmp_url + '/' + v + + for s in self.stream_types: + if s['id'] in stream_available.keys(): + quality_id = s['id'] + url = stream_available[quality_id] + self.streams[quality_id] = { + 'container': 'flv', + 'video_profile': s['video_profile'], + 'size': 0, + 'url': url + } + + def extract(self, **kwargs): + for i in self.streams: + s = self.streams[i] + s['src'] = [s['url']] + if 'stream_id' in kwargs and kwargs['stream_id']: + # Extract the stream + stream_id = kwargs['stream_id'] + + if stream_id not in self.streams: + log.e('[Error] Invalid video format.') + log.e('Run \'-i\' command with no specific video format to view all available formats.') + exit(2) + else: + # Extract stream with the best quality + stream_id = self.streams_sorted[0]['id'] + s['src'] = [s['url']] + +site = QiE() +download = site.download_by_url +download_playlist = playlist_not_supported('QiE') \ No newline at end of file diff --git a/src/you_get/extractors/qq.py b/src/you_get/extractors/qq.py index ebe1a9ad..cb4aeebf 100644 --- a/src/you_get/extractors/qq.py +++ b/src/you_get/extractors/qq.py @@ -3,6 +3,7 @@ __all__ = ['qq_download'] from ..common import * +from .qie import download as qieDownload def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False): api = "http://h5vv.video.qq.com/getinfo?otype=json&platform=10901&vid=%s" % vid @@ -34,6 +35,9 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs): vid = match1(content, r'vid\s*=\s*"\s*([^"]+)"') title = match1(content, r'title">([^"]+)

') title = title.strip() if title else vid + elif 'live.qq.com' in url: + qieDownload(url,output_dir=output_dir, merge=merge, info_only=info_only) + exit() elif 'iframe/player.html' in url: vid = match1(url, r'\bvid=(\w+)') # for embedded URLs; don't know what the title is From b4eb73965ccadd9ba78e9d65d1c05b93c9467979 Mon Sep 17 00:00:00 2001 From: cnbeining Date: Thu, 7 Apr 2016 16:43:17 -0400 Subject: [PATCH 4/9] Add -bsf:a aac_adtstoasc when merging As in #813. --- src/you_get/processor/ffmpeg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) mode change 100644 => 100755 src/you_get/processor/ffmpeg.py diff --git a/src/you_get/processor/ffmpeg.py b/src/you_get/processor/ffmpeg.py old mode 100644 new mode 100755 index 320eb642..e7ee35d6 --- a/src/you_get/processor/ffmpeg.py +++ b/src/you_get/processor/ffmpeg.py @@ -169,7 +169,7 @@ def ffmpeg_concat_mp4_to_mp4(files, output='output.mp4'): params = [FFMPEG] + LOGLEVEL + ['-f', 'concat', '-safe', '-1', '-y', '-i'] params.append(output + '.txt') - params += ['-c', 'copy', output] + params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc', output] subprocess.check_call(params) os.remove(output + '.txt') From 67d18c766ae832864ad8188e71f981db24983025 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 22 May 2016 17:28:01 +0200 Subject: [PATCH 5/9] processor/ffmpeg.py: 644 --- src/you_get/processor/ffmpeg.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 src/you_get/processor/ffmpeg.py diff --git a/src/you_get/processor/ffmpeg.py b/src/you_get/processor/ffmpeg.py old mode 100755 new mode 100644 From d1d62ae3040c98b7c3efb81937db211480ecb859 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 23 May 2016 17:28:29 +0200 Subject: [PATCH 6/9] [twitter] fix #1139 --- src/you_get/extractors/twitter.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/you_get/extractors/twitter.py b/src/you_get/extractors/twitter.py index 251cb8d5..1794081b 100644 --- a/src/you_get/extractors/twitter.py +++ b/src/you_get/extractors/twitter.py @@ -5,6 +5,13 @@ __all__ = ['twitter_download'] from ..common import * from .vine import vine_download +def extract_m3u(source): + r1 = get_content(source) + s1 = re.findall(r'(/ext_tw_video/.*)', r1) + r2 = get_content('https://video.twimg.com%s' % s1[-1]) + s2 = re.findall(r'(/ext_tw_video/.*)', r2) + return 'https://video.twimg.com%s' % s2[-1] + def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = get_html(url) screen_name = r1(r'data-screen-name="([^"]*)"', html) or \ @@ -63,6 +70,7 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs) source = r1(r'\s*', vmap) if not item_id: page_title = i['tweet_id'] + source = extract_m3u(source) mime, ext, size = url_info(source) print_info(site_info, page_title, mime, size) From d2d6fcb95397b969272a53dc9abf852f1181916d Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 23 May 2016 22:13:12 +0200 Subject: [PATCH 7/9] [twitter] fix #1139 (really) --- src/you_get/extractors/twitter.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/you_get/extractors/twitter.py b/src/you_get/extractors/twitter.py index 1794081b..b0c89189 100644 --- a/src/you_get/extractors/twitter.py +++ b/src/you_get/extractors/twitter.py @@ -10,7 +10,7 @@ def extract_m3u(source): s1 = re.findall(r'(/ext_tw_video/.*)', r1) r2 = get_content('https://video.twimg.com%s' % s1[-1]) s2 = re.findall(r'(/ext_tw_video/.*)', r2) - return 'https://video.twimg.com%s' % s2[-1] + return ['https://video.twimg.com%s' % i for i in s2] def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = get_html(url) @@ -70,12 +70,13 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs) source = r1(r'\s*', vmap) if not item_id: page_title = i['tweet_id'] - source = extract_m3u(source) - mime, ext, size = url_info(source) + urls = extract_m3u(source) + size = urls_size(urls) + mime, ext = 'video/mp4', 'mp4' print_info(site_info, page_title, mime, size) if not info_only: - download_urls([source], page_title, ext, size, output_dir, merge=merge) + download_urls(urls, page_title, ext, size, output_dir, merge=merge) site_info = "Twitter.com" download = twitter_download From 186762b4b51679d1af40139e782e2b722a52c633 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 23 May 2016 22:26:36 +0200 Subject: [PATCH 8/9] update README --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index d0a91aa8..60cb125a 100644 --- a/README.md +++ b/README.md @@ -360,6 +360,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the | PPTV聚力 | |✓| | | | 齐鲁网 | |✓| | | | QQ
腾讯视频 | |✓| | | +| 企鹅直播 | |✓| | | | 阡陌视频 | |✓| | | | THVideo | |✓| | | | Sina
新浪视频
微博秒拍视频 |
|✓| | | @@ -374,6 +375,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the | 央视网 | |✓| | | | 花瓣 | | |✓| | | Naver
네이버 | |✓| | | +| 芒果TV | |✓| | | For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page. From 5f99e1dfd3ca5d6c83b11e5c060a8c0bfe611d8d Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 23 May 2016 22:38:19 +0200 Subject: [PATCH 9/9] version 0.4.424 --- src/you_get/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/version.py b/src/you_get/version.py index 0e7b6632..5a9d5581 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.390' +__version__ = '0.4.424'