From 93fc74db2af9a6af02b574090e84c1c402099f50 Mon Sep 17 00:00:00 2001 From: cage <120989324@qq.com> Date: Mon, 28 Oct 2019 01:39:55 +0800 Subject: [PATCH 01/51] Update bilibili.py download multipart video form given P number --- src/you_get/extractors/bilibili.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index e5ddbafc..d0bbace3 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -624,7 +624,8 @@ class Bilibili(VideoExtractor): html_content_ = get_content(self.url, headers=self.bilibili_headers(cookie='CURRENT_FNVAL=16')) playinfo_text_ = match1(html_content_, r'__playinfo__=(.*?)') diff --git a/tests/test.py b/tests/test.py index e2f77a79..16741722 100644 --- a/tests/test.py +++ b/tests/test.py @@ -6,7 +6,8 @@ from you_get.extractors import ( imgur, magisto, youtube, - missevan + missevan, + acfun ) @@ -38,6 +39,8 @@ class YouGetTests(unittest.TestCase): info_only=True ) + def test_acfun(self): + acfun.download('https://www.acfun.cn/v/ac11701912', info_only=True) if __name__ == '__main__': unittest.main() From 867b66effc3554273efcb3e756ae5e388a1caa5c Mon Sep 17 00:00:00 2001 From: SFMDI <36741818+SFMDI@users.noreply.github.com> Date: Sat, 23 Nov 2019 03:05:21 +0900 Subject: [PATCH 06/51] fix extractor naver.py can download both old and recent videos without wrong result --- src/you_get/extractors/naver.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/src/you_get/extractors/naver.py b/src/you_get/extractors/naver.py index add884e9..42a607e4 100644 --- a/src/you_get/extractors/naver.py +++ b/src/you_get/extractors/naver.py @@ -16,15 +16,8 @@ def naver_download_by_url(url, output_dir='.', merge=True, info_only=False, **kw ep = 'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{}?key={}' page = get_content(url) try: - temp = re.search(r"", page) - if temp is not None: - og_video_url = temp.group(1) - params_dict = urllib.parse.parse_qs(urllib.parse.urlparse(og_video_url).query) - vid = params_dict['vid'][0] - key = params_dict['outKey'][0] - else: - vid = re.search(r"\"videoId\"\s*:\s*\"(.+?)\"", page).group(1) - key = re.search(r"\"inKey\"\s*:\s*\"(.+?)\"", page).group(1) + vid = re.search(r"\"videoId\"\s*:\s*\"(.+?)\"", page).group(1) + key = re.search(r"\"inKey\"\s*:\s*\"(.+?)\"", page).group(1) meta_str = get_content(ep.format(vid, key)) meta_json = json.loads(meta_str) if 'errorCode' in meta_json: @@ -38,7 +31,7 @@ def naver_download_by_url(url, output_dir='.', merge=True, info_only=False, **kw size = url_size(video_url) print_info(site_info, title, 'mp4', size) if not info_only: - download_urls([video_url], title, 'mp4', size, **kwargs) + download_urls([video_url], title, 'mp4', size, output_dir, **kwargs) except: universal_download(url, output_dir, merge=merge, info_only=info_only, **kwargs) From 6c4fbd0651ad0b002864cf5f33ed3b0f28d59a53 Mon Sep 17 00:00:00 2001 From: SFMDI <36741818+SFMDI@users.noreply.github.com> Date: Sat, 23 Nov 2019 03:53:32 +0900 Subject: [PATCH 07/51] add tv.kakao.com extractor can download video from tv.kakao.com and sort __init__ by extarctor name --- src/you_get/common.py | 1 + src/you_get/extractors/__init__.py | 7 +++-- src/you_get/extractors/kakao.py | 50 ++++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 3 deletions(-) create mode 100644 src/you_get/extractors/kakao.py diff --git a/src/you_get/common.py b/src/you_get/common.py index 2397a0a6..19474a75 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -66,6 +66,7 @@ SITES = { 'iwara' : 'iwara', 'joy' : 'joy', 'kankanews' : 'bilibili', + 'kakao' : 'kakao', 'khanacademy' : 'khan', 'ku6' : 'ku6', 'kuaishou' : 'kuaishou', diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index 2961f015..5ed5264b 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -33,7 +33,10 @@ from .interest import * from .iqilu import * from .iqiyi import * from .joy import * +from .khan import * from .ku6 import * +from .kakao import * +from .kuaishou import * from .kugou import * from .kuwo import * from .le import * @@ -62,6 +65,7 @@ from .sina import * from .sohu import * from .soundcloud import * from .suntv import * +from .ted import * from .theplatform import * from .tiktok import * from .tucao import * @@ -81,9 +85,6 @@ from .yinyuetai import * from .yixia import * from .youku import * from .youtube import * -from .ted import * -from .khan import * from .zhanqi import * -from .kuaishou import * from .zhibo import * from .zhihu import * diff --git a/src/you_get/extractors/kakao.py b/src/you_get/extractors/kakao.py new file mode 100644 index 00000000..4ec282e3 --- /dev/null +++ b/src/you_get/extractors/kakao.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +from ..common import * +from .universal import * + +__all__ = ['kakao_download'] + + +def kakao_download(url, output_dir='.', info_only=False, **kwargs): + json_request_url = 'https://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json?vid={}' + + # in this implementation playlist not supported so use url_without_playlist + # if want to support playlist need to change that + if re.search('playlistId', url): + url = re.search(r"(.+)\?.+?", url).group(1) + + page = get_content(url) + try: + vid = re.search(r"", page).group(1) + title = re.search(r"", page).group(1) + + meta_str = get_content(json_request_url.format(vid)) + meta_json = json.loads(meta_str) + + standard_preset = meta_json['output_list']['standard_preset'] + output_videos = meta_json['output_list']['output_list'] + size = '' + if meta_json['svcname'] == 'smr_pip': + for v in output_videos: + if v['preset'] == 'mp4_PIP_SMR_480P': + size = int(v['filesize']) + break + else: + for v in output_videos: + if v['preset'] == standard_preset: + size = int(v['filesize']) + break + + video_url = meta_json['location']['url'] + + print_info(site_info, title, 'mp4', size) + if not info_only: + download_urls([video_url], title, 'mp4', size, output_dir, **kwargs) + except: + universal_download(url, output_dir, merge=kwargs['merge'], info_only=info_only, **kwargs) + + +site_info = "tv.kakao.com" +download = kakao_download +download_playlist = playlist_not_supported('kakao') From e9d5cc0232df0e1593d60aed7ae256c048178e0f Mon Sep 17 00:00:00 2001 From: Jerry Date: Sun, 1 Dec 2019 23:49:34 +0800 Subject: [PATCH 08/51] Fix playback of VideoExtractor.dash_streams --- src/you_get/common.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 2397a0a6..65063647 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -272,15 +272,21 @@ def matchall(text, patterns): def launch_player(player, urls): import subprocess import shlex + urls = list(urls) + for url in urls.copy(): + if type(url) is list: + urls.extend(url) + urls = [url for url in urls if type(url) is str] + assert urls if (sys.version_info >= (3, 3)): import shutil exefile=shlex.split(player)[0] if shutil.which(exefile) is not None: - subprocess.call(shlex.split(player) + list(urls)) + subprocess.call(shlex.split(player) + urls) else: log.wtf('[Failed] Cannot find player "%s"' % exefile) else: - subprocess.call(shlex.split(player) + list(urls)) + subprocess.call(shlex.split(player) + urls) def parse_query_param(url, param): From 44698a0f39d6b6c5e5fd1e5a8efffe8de3278519 Mon Sep 17 00:00:00 2001 From: out001a <545827465@qq.com> Date: Sun, 15 Dec 2019 12:29:20 +0800 Subject: [PATCH 09/51] [ixigua] fix error 'video_id not found' --- src/you_get/extractors/ixigua.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/ixigua.py b/src/you_get/extractors/ixigua.py index 20e45616..151107a6 100644 --- a/src/you_get/extractors/ixigua.py +++ b/src/you_get/extractors/ixigua.py @@ -5,6 +5,8 @@ import binascii from ..common import * import random +import requests +import string import ctypes from json import loads @@ -80,7 +82,23 @@ def get_video_url_from_video_id(video_id): def ixigua_download(url, output_dir='.', merge=True, info_only=False, **kwargs): # example url: https://www.ixigua.com/i6631065141750268420/#mid=63024814422 - html = get_html(url, faker=True) + sess = requests.session() + html = sess.get(url, headers=headers).text + conf = loads(match1(html, r"window\.config = (.+);")) + if not conf: + log.e("Get window.config from url failed, url: {}".format(url)) + return + verify_url = conf['prefix'] + conf['url'] + '?key=' + conf['key'] + '&psm=' + conf['psm'] \ + + '&_signature=' + ''.join(random.sample(string.ascii_letters + string.digits, 31)) + try: + ok = get_content(verify_url) + except Exception as e: + ok = e.msg + if ok != 'OK': + log.e("Verify failed, verify_url: {}, result: {}".format(verify_url, ok)) + return + html = sess.get(url, headers=headers).text + video_id = match1(html, r"\"vid\":\"([^\"]+)") title = match1(html, r"\"player__videoTitle\">.*?(.*)<\/h1><\/div>") if not video_id: From a54a9b36260f84bcd6f7fda3017a54dc9b5330fd Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 25 Dec 2019 02:47:29 +0100 Subject: [PATCH 10/51] [baidu] fix tiebapic --- src/you_get/extractors/baidu.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/you_get/extractors/baidu.py b/src/you_get/extractors/baidu.py index 7914667e..77e666b3 100644 --- a/src/you_get/extractors/baidu.py +++ b/src/you_get/extractors/baidu.py @@ -140,8 +140,8 @@ def baidu_download(url, output_dir='.', stream_type=None, merge=True, info_only= output_dir=output_dir, merge=False) items = re.findall( - r'//imgsrc.baidu.com/forum/w[^"]+/([^/"]+)', html) - urls = ['http://imgsrc.baidu.com/forum/pic/item/' + i + r'//tiebapic.baidu.com/forum/w[^"]+/([^/"]+)', html) + urls = ['http://tiebapic.baidu.com/forum/pic/item/' + i for i in set(items)] # handle albums @@ -151,7 +151,7 @@ def baidu_download(url, output_dir='.', stream_type=None, merge=True, info_only= album_info = json.loads(get_content(album_url)) for i in album_info['data']['pic_list']: urls.append( - 'http://imgsrc.baidu.com/forum/pic/item/' + i['pic_id'] + '.jpg') + 'http://tiebapic.baidu.com/forum/pic/item/' + i['pic_id'] + '.jpg') ext = 'jpg' size = float('Inf') From 654371e851d0c8b2f403ee81d29e41d3bed52e2b Mon Sep 17 00:00:00 2001 From: chonpsk Date: Fri, 27 Dec 2019 22:55:51 +0800 Subject: [PATCH 11/51] fix issue about KeyError: 'url_encoded_fmt_stream_map' --- src/you_get/extractors/youtube.py | 82 ++++++++++++++++++++++++------- 1 file changed, 64 insertions(+), 18 deletions(-) diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py index 4483f8eb..3606a3c6 100644 --- a/src/you_get/extractors/youtube.py +++ b/src/you_get/extractors/youtube.py @@ -220,7 +220,10 @@ class YouTube(VideoExtractor): stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',') #stream_list = ytplayer_config['args']['adaptive_fmts'].split(',') except: - stream_list = video_info['url_encoded_fmt_stream_map'][0].split(',') + if 'url_encoded_fmt_stream_map' not in video_info: + stream_list = json.loads(video_info['player_response'][0])['streamingData']['formats'] + else: + stream_list = video_info['url_encoded_fmt_stream_map'][0].split(',') if re.search('([^"]*/base\.js)"', video_page): self.html5player = 'https://www.youtube.com' + re.search('([^"]*/base\.js)"', video_page).group(1) else: @@ -302,19 +305,35 @@ class YouTube(VideoExtractor): exit(0) for stream in stream_list: - metadata = parse.parse_qs(stream) - stream_itag = metadata['itag'][0] - self.streams[stream_itag] = { - 'itag': metadata['itag'][0], - 'url': metadata['url'][0], - 'sig': metadata['sig'][0] if 'sig' in metadata else None, - 's': metadata['s'][0] if 's' in metadata else None, - 'quality': metadata['quality'][0] if 'quality' in metadata else None, - #'quality': metadata['quality_label'][0] if 'quality_label' in metadata else None, - 'type': metadata['type'][0], - 'mime': metadata['type'][0].split(';')[0], - 'container': mime_to_container(metadata['type'][0].split(';')[0]), - } + if isinstance(stream, str): + metadata = parse.parse_qs(stream) + stream_itag = metadata['itag'][0] + self.streams[stream_itag] = { + 'itag': metadata['itag'][0], + 'url': metadata['url'][0], + 'sig': metadata['sig'][0] if 'sig' in metadata else None, + 's': metadata['s'][0] if 's' in metadata else None, + 'quality': metadata['quality'][0] if 'quality' in metadata else None, + #'quality': metadata['quality_label'][0] if 'quality_label' in metadata else None, + 'type': metadata['type'][0], + 'mime': metadata['type'][0].split(';')[0], + 'container': mime_to_container(metadata['type'][0].split(';')[0]), + } + else: + stream_itag = stream['itag'] + self.streams[stream_itag] = { + 'itag': stream['itag'], + 'url': stream['url'] if 'url' in stream else None, + 'sig': None, + 's': None, + 'quality': stream['quality'], + 'type': stream['mimeType'], + 'mime': stream['mimeType'].split(';')[0], + 'container': mime_to_container(stream['mimeType'].split(';')[0]), + } + if 'cipher' in stream: + self.streams[stream_itag].update(dict([(_.split('=')[0], parse.unquote(_.split('=')[1])) + for _ in stream['cipher'].split('&')])) # Prepare caption tracks try: @@ -425,10 +444,37 @@ class YouTube(VideoExtractor): for i in afmt.split('&')]) for afmt in ytplayer_config['args']['adaptive_fmts'].split(',')] except: - streams = [dict([(i.split('=')[0], - parse.unquote(i.split('=')[1])) - for i in afmt.split('&')]) - for afmt in video_info['adaptive_fmts'][0].split(',')] + if 'adaptive_fmts' in video_info: + streams = [dict([(i.split('=')[0], + parse.unquote(i.split('=')[1])) + for i in afmt.split('&')]) + for afmt in video_info['adaptive_fmts'][0].split(',')] + else: + streams = json.loads(video_info['player_response'][0])['streamingData']['adaptiveFormats'] + for stream in streams: + if 'qualityLabel' in stream: + stream['quality_label'] = stream['qualityLabel'] + del stream['qualityLabel'] + if 'width' in stream: + stream['size'] = '{}x{}'.format(stream['width'], stream['height']) + del stream['width'] + del stream['height'] + stream['type'] = stream['mimeType'] + stream['clen'] = stream['contentLength'] + stream['init'] = '{}-{}'.format( + stream['initRange']['start'], + stream['initRange']['end']) + stream['index'] = '{}-{}'.format( + stream['indexRange']['start'], + stream['indexRange']['end']) + del stream['mimeType'] + del stream['contentLength'] + del stream['initRange'] + del stream['indexRange'] + if 'cipher' in stream: + stream.update(dict([(_.split('=')[0], parse.unquote(_.split('=')[1])) + for _ in stream['cipher'].split('&')])) + del stream['cipher'] for stream in streams: # get over speed limiting stream['url'] += '&ratebypass=yes' From ab4bce79f3641a651f5ad2953e05edbe17df958f Mon Sep 17 00:00:00 2001 From: chonpsk Date: Fri, 27 Dec 2019 23:25:59 +0800 Subject: [PATCH 12/51] fix issue on itag --- src/you_get/extractors/youtube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py index 3606a3c6..ebb42c69 100644 --- a/src/you_get/extractors/youtube.py +++ b/src/you_get/extractors/youtube.py @@ -322,7 +322,7 @@ class YouTube(VideoExtractor): else: stream_itag = stream['itag'] self.streams[stream_itag] = { - 'itag': stream['itag'], + 'itag': str(stream['itag']), 'url': stream['url'] if 'url' in stream else None, 'sig': None, 's': None, @@ -452,6 +452,7 @@ class YouTube(VideoExtractor): else: streams = json.loads(video_info['player_response'][0])['streamingData']['adaptiveFormats'] for stream in streams: + stream['itag'] = str(stream['itag']) if 'qualityLabel' in stream: stream['quality_label'] = stream['qualityLabel'] del stream['qualityLabel'] From a934dea8c5d9e91b62f0a7b91a5da9a201f72982 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 28 Dec 2019 21:35:25 +0100 Subject: [PATCH 13/51] version 0.4.1388 --- src/you_get/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/version.py b/src/you_get/version.py index 1d87177c..235b8f85 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1355' +__version__ = '0.4.1388' From 767339915b44172dcfb3a394feed4af169f739fb Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 6 Jan 2020 18:25:43 +0100 Subject: [PATCH 14/51] [tests] remove one test_imgur case since it fails too often --- tests/test.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test.py b/tests/test.py index 16741722..220b2169 100644 --- a/tests/test.py +++ b/tests/test.py @@ -14,7 +14,6 @@ from you_get.extractors import ( class YouGetTests(unittest.TestCase): def test_imgur(self): imgur.download('http://imgur.com/WVLk5nD', info_only=True) - imgur.download('http://imgur.com/gallery/WVLk5nD', info_only=True) def test_magisto(self): magisto.download( @@ -40,7 +39,7 @@ class YouGetTests(unittest.TestCase): ) def test_acfun(self): - acfun.download('https://www.acfun.cn/v/ac11701912', info_only=True) + acfun.download('https://www.acfun.cn/v/ac11701912', info_only=True) if __name__ == '__main__': unittest.main() From b96acaa526f61667518ad0aac233a50eed9b38f4 Mon Sep 17 00:00:00 2001 From: laiqing Date: Wed, 8 Jan 2020 14:05:57 +0800 Subject: [PATCH 15/51] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=90=9C=E7=8B=90?= =?UTF-8?q?=E5=8F=B7=E7=9A=84=E8=A7=86=E9=A2=91=E4=B8=8D=E8=83=BD=E4=B8=8B?= =?UTF-8?q?=E8=BD=BD=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/you_get/extractors/sohu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py index a1afc126..1aedb3e6 100644 --- a/src/you_get/extractors/sohu.py +++ b/src/you_get/extractors/sohu.py @@ -26,7 +26,7 @@ def sohu_download(url, output_dir='.', merge=True, info_only=False, extractor_pr vid = r1('id=(\d+)', url) else: html = get_html(url) - vid = r1(r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?', html) + vid = r1(r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?', html) or r1(r'bid:\'(\d+)\',', html) assert vid if extractor_proxy: From 67c240abd043e5effd92f955d420f18e25c76dc2 Mon Sep 17 00:00:00 2001 From: shanhm Date: Wed, 8 Jan 2020 15:20:27 +0800 Subject: [PATCH 16/51] use urllib instead of requests --- src/you_get/extractors/ixigua.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/you_get/extractors/ixigua.py b/src/you_get/extractors/ixigua.py index 151107a6..2f11e7f9 100644 --- a/src/you_get/extractors/ixigua.py +++ b/src/you_get/extractors/ixigua.py @@ -5,10 +5,10 @@ import binascii from ..common import * import random -import requests import string import ctypes from json import loads +from urllib import request __all__ = ['ixigua_download', 'ixigua_download_playlist_by_url'] @@ -82,8 +82,14 @@ def get_video_url_from_video_id(video_id): def ixigua_download(url, output_dir='.', merge=True, info_only=False, **kwargs): # example url: https://www.ixigua.com/i6631065141750268420/#mid=63024814422 - sess = requests.session() - html = sess.get(url, headers=headers).text + resp = urlopen_with_retry(request.Request(url)) + html = resp.read().decode('utf-8') + + _cookies = [] + for c in resp.getheader('Set-Cookie').split("httponly,"): + _cookies.append(c.strip().split(' ')[0]) + headers['cookie'] = ' '.join(_cookies) + conf = loads(match1(html, r"window\.config = (.+);")) if not conf: log.e("Get window.config from url failed, url: {}".format(url)) @@ -97,7 +103,7 @@ def ixigua_download(url, output_dir='.', merge=True, info_only=False, **kwargs): if ok != 'OK': log.e("Verify failed, verify_url: {}, result: {}".format(verify_url, ok)) return - html = sess.get(url, headers=headers).text + html = get_content(url, headers=headers) video_id = match1(html, r"\"vid\":\"([^\"]+)") title = match1(html, r"\"player__videoTitle\">.*?(.*)<\/h1><\/div>") From 5943fb6ca34371c3a87219c7c67b5eb139b34980 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 8 Jan 2020 17:36:16 +0100 Subject: [PATCH 17/51] [sohu] fix bid --- src/you_get/extractors/sohu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py index 1aedb3e6..74374202 100644 --- a/src/you_get/extractors/sohu.py +++ b/src/you_get/extractors/sohu.py @@ -26,7 +26,7 @@ def sohu_download(url, output_dir='.', merge=True, info_only=False, extractor_pr vid = r1('id=(\d+)', url) else: html = get_html(url) - vid = r1(r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?', html) or r1(r'bid:\'(\d+)\',', html) + vid = r1(r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?', html) or r1(r'bid:\'(\d+)\',', html) or r1(r'bid=(\d+)', html) assert vid if extractor_proxy: From c3ae61c04e8235b444caedcd25064fa5af4f4c92 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 13 Jan 2020 22:16:33 +0100 Subject: [PATCH 18/51] [youtube] remove streams without contentLength (fix #2767) --- src/you_get/extractors/youtube.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py index ebb42c69..07c1382e 100644 --- a/src/you_get/extractors/youtube.py +++ b/src/you_get/extractors/youtube.py @@ -222,7 +222,7 @@ class YouTube(VideoExtractor): except: if 'url_encoded_fmt_stream_map' not in video_info: stream_list = json.loads(video_info['player_response'][0])['streamingData']['formats'] - else: + else: stream_list = video_info['url_encoded_fmt_stream_map'][0].split(',') if re.search('([^"]*/base\.js)"', video_page): self.html5player = 'https://www.youtube.com' + re.search('([^"]*/base\.js)"', video_page).group(1) @@ -451,6 +451,8 @@ class YouTube(VideoExtractor): for afmt in video_info['adaptive_fmts'][0].split(',')] else: streams = json.loads(video_info['player_response'][0])['streamingData']['adaptiveFormats'] + # streams without contentLength got broken urls, just remove them (#2767) + streams = [stream for stream in streams if 'contentLength' in stream] for stream in streams: stream['itag'] = str(stream['itag']) if 'qualityLabel' in stream: From 5a008ad878fa2676ee4fa55a020f09acc7e5f66e Mon Sep 17 00:00:00 2001 From: Jarry Shaw Date: Sat, 25 Jan 2020 12:11:41 +0800 Subject: [PATCH 19/51] Added PySocks extra requirement --- setup.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 21246c5f..24dc9fb2 100755 --- a/setup.py +++ b/setup.py @@ -41,5 +41,9 @@ setup( classifiers = proj_info['classifiers'], - entry_points = {'console_scripts': proj_info['console_scripts']} + entry_points = {'console_scripts': proj_info['console_scripts']}, + + extras_require={ + 'socks': ['PySocks'], + } ) From ccdc58a82d17a0eebe81d4d337353b81d9b6cb68 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 27 Jan 2020 00:19:51 +0100 Subject: [PATCH 20/51] [youtube] new pattern for function name --- src/you_get/extractors/youtube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py index 07c1382e..b8ca4280 100644 --- a/src/you_get/extractors/youtube.py +++ b/src/you_get/extractors/youtube.py @@ -94,7 +94,8 @@ class YouTube(VideoExtractor): f1 = match1(js, r'\.set\(\w+\.sp,encodeURIComponent\(([$\w]+)') or \ match1(js, r'\.set\(\w+\.sp,\(0,window\.encodeURIComponent\)\(([$\w]+)') or \ match1(js, r'\.set\(\w+\.sp,([$\w]+)\(\w+\.s\)\)') or \ - match1(js, r'"signature",([$\w]+)\(\w+\.\w+\)') + match1(js, r'"signature",([$\w]+)\(\w+\.\w+\)') or \ + match1(js, r'=([$\w]+)\(decodeURIComponent\(') f1def = match1(js, r'function %s(\(\w+\)\{[^\{]+\})' % re.escape(f1)) or \ match1(js, r'\W%s=function(\(\w+\)\{[^\{]+\})' % re.escape(f1)) f1def = re.sub(r'([$\w]+\.)([$\w]+\(\w+,\d+\))', r'\2', f1def) From 610c3e8942c9f7b5dad2b9342d869b4693a72ceb Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 27 Jan 2020 00:27:15 +0100 Subject: [PATCH 21/51] version 0.4.1403 --- src/you_get/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/version.py b/src/you_get/version.py index 235b8f85..c124a979 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1388' +__version__ = '0.4.1403' From 771a89fc8568007b870c34834ed00f48038a0d2d Mon Sep 17 00:00:00 2001 From: Ok Date: Wed, 29 Jan 2020 01:25:21 +0200 Subject: [PATCH 22/51] fixed regex --- src/you_get/extractors/coub.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/coub.py b/src/you_get/extractors/coub.py index 36a0a5d6..a71cbc18 100644 --- a/src/you_get/extractors/coub.py +++ b/src/you_get/extractors/coub.py @@ -79,7 +79,7 @@ def get_title_and_urls(json_data): def get_coub_data(html): - coub_data = r1(r'', html) + coub_data = r1(r'))', html) json_data = json.loads(coub_data) return json_data From 5147481a89ea752913914ddd60366b8143b2a06c Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 31 Jan 2020 15:11:58 +0100 Subject: [PATCH 23/51] [json_output] remove sort_keys in json.dumps call (fix #2773) --- src/you_get/json_output.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/json_output.py b/src/you_get/json_output.py index 5971bd93..c6195761 100644 --- a/src/you_get/json_output.py +++ b/src/you_get/json_output.py @@ -29,7 +29,7 @@ def output(video_extractor, pretty_print=True): if extra: out["extra"] = extra if pretty_print: - print(json.dumps(out, indent=4, sort_keys=True, ensure_ascii=False)) + print(json.dumps(out, indent=4, ensure_ascii=False)) else: print(json.dumps(out)) From cd9931e1f6b56f28dcb7202051bc9c6520b6e5ff Mon Sep 17 00:00:00 2001 From: nsb2006 Date: Mon, 10 Feb 2020 23:29:09 +0800 Subject: [PATCH 24/51] =?UTF-8?q?=E8=AE=BE=E6=83=B3=E4=BC=AA=E8=A3=85?= =?UTF-8?q?=E8=85=BE=E8=AE=AF=E8=A7=86=E9=A2=91=E5=AE=A2=E6=88=B7=E7=AB=AF?= =?UTF-8?q?=E4=B8=8B=E8=BD=BD1080P?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 尝试通过修改UA下载1080p,但是之前没接触过Python,依葫芦画瓢改了半天重新编译后发现UA还是默认的Python-urllib。 或者有没有UA的命令参数,试了-user-agent报错。 祝新春吉祥,百毒不侵,感谢。 --- src/you_get/extractors/qq.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/you_get/extractors/qq.py b/src/you_get/extractors/qq.py index 232a08b4..6411b195 100644 --- a/src/you_get/extractors/qq.py +++ b/src/you_get/extractors/qq.py @@ -6,6 +6,10 @@ from .qie import download as qieDownload from .qie_video import download_by_url as qie_video_download from ..common import * +headers = { + 'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) QQLive/10275340/50192209 Chrome/43.0.2357.134 Safari/537.36 QBCore/3.43.561.202 QQBrowser/9.0.2524.400' +} + def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False): @@ -14,7 +18,7 @@ def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False): platforms = [4100201, 11] for platform in platforms: info_api = 'http://vv.video.qq.com/getinfo?otype=json&appver=3.2.19.333&platform={}&defnpayver=1&defn=shd&vid={}'.format(platform, vid) - info = get_content(info_api) + info = get_content(info_api, headers) video_json = json.loads(match1(info, r'QZOutputJson=(.*)')[:-1]) if not video_json.get('msg')=='cannot play outside': break @@ -41,7 +45,7 @@ def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False): filename = '.'.join([fn_pre, magic_str, str(part), video_type]) key_api = "http://vv.video.qq.com/getkey?otype=json&platform=11&format={}&vid={}&filename={}&appver=3.2.19.333".format(part_format_id, vid, filename) - part_info = get_content(key_api) + part_info = get_content(key_api, headers) key_json = json.loads(match1(part_info, r'QZOutputJson=(.*)')[:-1]) if key_json.get('key') is None: vkey = video_json['vl']['vi'][0]['fvkey'] @@ -71,7 +75,7 @@ def kg_qq_download_by_shareid(shareid, output_dir='.', info_only=False, caption= BASE_URL = 'http://cgi.kg.qq.com/fcgi-bin/kg_ugc_getdetail' params_str = '?dataType=jsonp&jsonp=callback&jsonpCallback=jsopgetsonginfo&v=4&outCharset=utf-8&shareid=' + shareid url = BASE_URL + params_str - content = get_content(url) + content = get_content(url, headers) json_str = content[len('jsonpcallback('):-1] json_data = json.loads(json_str) @@ -127,7 +131,7 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs): return if 'mp.weixin.qq.com/s' in url: - content = get_content(url) + content = get_content(url, headers) vids = matchall(content, [r'[?;]vid=(\w+)']) for vid in vids: qq_download_by_vid(vid, vid, output_dir, merge, info_only) @@ -142,7 +146,7 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs): title=info_json['videoinfo']['title'] elif 'kuaibao.qq.com' in url or re.match(r'http://daxue.qq.com/content/content/id/\d+', url): # http://daxue.qq.com/content/content/id/2321 - content = get_content(url) + content = get_content(url, headers) vid = match1(content, r'vid\s*=\s*"\s*([^"]+)"') title = match1(content, r'title">([^"]+)

') title = title.strip() if title else vid @@ -152,11 +156,11 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs): title = vid elif 'view.inews.qq.com' in url: # view.inews.qq.com/a/20180521V0Z9MH00 - content = get_content(url) + content = get_content(url, headers) vid = match1(content, r'"vid":"(\w+)"') title = match1(content, r'"title":"(\w+)"') else: - content = get_content(url) + content = get_content(url, headers) #vid = parse_qs(urlparse(url).query).get('vid') #for links specified vid like http://v.qq.com/cover/p/ps6mnfqyrfo7es3.html?vid=q0181hpdvo5 rurl = match1(content, r'') #https://v.qq.com/x/cover/9hpjiv5fhiyn86u/t0522x58xma.html vid = "" From bf49e2d1b398d4901243115746b9fd14a71aceda Mon Sep 17 00:00:00 2001 From: flewsea Date: Thu, 27 Feb 2020 22:38:31 +0800 Subject: [PATCH 25/51] =?UTF-8?q?=E6=94=AF=E6=8C=81=E4=B8=8B=E8=BD=BD?= =?UTF-8?q?=E9=A1=B5=E9=9D=A2=E5=86=85=E6=89=80=E6=9C=89=E8=A7=86=E9=A2=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/you_get/extractors/iwara.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/you_get/extractors/iwara.py b/src/you_get/extractors/iwara.py index a30159d7..67a41d41 100644 --- a/src/you_get/extractors/iwara.py +++ b/src/you_get/extractors/iwara.py @@ -9,12 +9,15 @@ headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Cache-Control': 'max-age=0', - 'Connection': 'keep-alive', 'Save-Data': 'on', 'Cookie':'has_js=1;show_adult=1', } - +stream_types = [ + {'id': 'Source', 'container': 'mp4', 'video_profile': '原始'}, + {'id': '540p', 'container': 'mp4', 'video_profile': '540p'}, + {'id': '360p', 'container': 'mp4', 'video_profile': '360P'}, + ] def iwara_download(url, output_dir='.', merge=True, info_only=False, **kwargs): global headers video_hash = match1(url, r'https?://\w+.iwara.tv/videos/(\w+)') @@ -31,6 +34,17 @@ def iwara_download(url, output_dir='.', merge=True, info_only=False, **kwargs): if not info_only: download_urls([down_urls], title, ext, size, output_dir, merge=merge, headers=headers) +def download_playlist_by_url( url, **kwargs): + video_page = get_content(url) + # url_first=re.findall(r"(http[s]?://[^/]+)",url) + url_first=match1(url, r"(http[s]?://[^/]+)") + # print (url_first) + videos = set(re.findall(r'0): + for video in videos: + iwara_download(url_first+video, **kwargs) + else: + maybe_print('this page not found any videos') site_info = "Iwara" download = iwara_download -download_playlist = playlist_not_supported('iwara') +download_playlist = download_playlist_by_url From 358d79778122c391d83b2eaed5c139be2f798e7f Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 2 Mar 2020 14:27:30 +0100 Subject: [PATCH 26/51] [youtube] fix download for non-DASH streams --- src/you_get/extractors/youtube.py | 14 ++++++++++---- tests/test.py | 3 +++ 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py index b8ca4280..38aa1a4e 100644 --- a/src/you_get/extractors/youtube.py +++ b/src/you_get/extractors/youtube.py @@ -218,7 +218,10 @@ class YouTube(VideoExtractor): ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+?});', video_page).group(1)) self.html5player = 'https://www.youtube.com' + ytplayer_config['assets']['js'] # Workaround: get_video_info returns bad s. Why? - stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',') + if 'url_encoded_fmt_stream_map' not in ytplayer_config['args']: + stream_list = json.loads(ytplayer_config['args']['player_response'])['streamingData']['formats'] + else: + stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',') #stream_list = ytplayer_config['args']['adaptive_fmts'].split(',') except: if 'url_encoded_fmt_stream_map' not in video_info: @@ -321,7 +324,7 @@ class YouTube(VideoExtractor): 'container': mime_to_container(metadata['type'][0].split(';')[0]), } else: - stream_itag = stream['itag'] + stream_itag = str(stream['itag']) self.streams[stream_itag] = { 'itag': str(stream['itag']), 'url': stream['url'] if 'url' in stream else None, @@ -367,7 +370,7 @@ class YouTube(VideoExtractor): self.caption_tracks[lang] = srt except: pass - # Prepare DASH streams + # Prepare DASH streams (NOTE: not every video has DASH streams!) try: dashmpd = ytplayer_config['args']['dashmpd'] dash_xml = parseString(get_content(dashmpd)) @@ -451,7 +454,10 @@ class YouTube(VideoExtractor): for i in afmt.split('&')]) for afmt in video_info['adaptive_fmts'][0].split(',')] else: - streams = json.loads(video_info['player_response'][0])['streamingData']['adaptiveFormats'] + try: + streams = json.loads(video_info['player_response'][0])['streamingData']['adaptiveFormats'] + except: # no DASH stream at all + return # streams without contentLength got broken urls, just remove them (#2767) streams = [stream for stream in streams if 'contentLength' in stream] for stream in streams: diff --git a/tests/test.py b/tests/test.py index 220b2169..7187cfb0 100644 --- a/tests/test.py +++ b/tests/test.py @@ -37,6 +37,9 @@ class YouGetTests(unittest.TestCase): 'http://www.youtube.com/attribution_link?u=/watch?v%3DldAKIzq7bvs%26feature%3Dshare', # noqa info_only=True ) + youtube.download( + 'https://www.youtube.com/watch?v=Fpr4fQSh1cc', info_only=True + ) def test_acfun(self): acfun.download('https://www.acfun.cn/v/ac11701912', info_only=True) From 8b7566eeb3f82112ac9996619164503c8cd8f309 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 2 Mar 2020 14:45:57 +0100 Subject: [PATCH 27/51] version 0.4.1410 --- src/you_get/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/version.py b/src/you_get/version.py index c124a979..c867e5b8 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1403' +__version__ = '0.4.1410' From 11f78325e637cff01aad6e52d13f757052511965 Mon Sep 17 00:00:00 2001 From: Ivan Tham Date: Sun, 8 Mar 2020 00:10:23 +0800 Subject: [PATCH 28/51] Sort return without duplicate condition --- src/you_get/util/log.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/you_get/util/log.py b/src/you_get/util/log.py index 67b26b78..81fd1bf5 100644 --- a/src/you_get/util/log.py +++ b/src/you_get/util/log.py @@ -99,6 +99,4 @@ def wtf(message, exit_code=1): def yes_or_no(message): ans = str(input('%s (y/N) ' % message)).lower().strip() - if ans == 'y': - return True - return False + return ans == 'y' From 7a43ac0782325d596e2372519faf033738d9a9ea Mon Sep 17 00:00:00 2001 From: helong0911 Date: Sat, 14 Mar 2020 13:33:34 +0800 Subject: [PATCH 29/51] [baomihua] fix download 403 --- src/you_get/extractors/baomihua.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/baomihua.py b/src/you_get/extractors/baomihua.py index 99dd7132..9e97879a 100644 --- a/src/you_get/extractors/baomihua.py +++ b/src/you_get/extractors/baomihua.py @@ -6,6 +6,16 @@ from ..common import * import urllib +def baomihua_headers(referer=None, cookie=None): + # a reasonable UA + ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36' + headers = {'Accept': '*/*', 'Accept-Language': 'en-US,en;q=0.5', 'User-Agent': ua} + if referer is not None: + headers.update({'Referer': referer}) + if cookie is not None: + headers.update({'Cookie': cookie}) + return headers + def baomihua_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False, **kwargs): html = get_html('http://play.baomihua.com/getvideourl.aspx?flvid=%s&devicetype=phone_app' % id) host = r1(r'host=([^&]*)', html) @@ -16,10 +26,10 @@ def baomihua_download_by_id(id, title=None, output_dir='.', merge=True, info_onl assert vid dir_str = r1(r'&dir=([^&]*)', html).strip() url = "http://%s/%s/%s.%s" % (host, dir_str, vid, type) - _, ext, size = url_info(url) + _, ext, size = url_info(url, headers=baomihua_headers()) print_info(site_info, title, type, size) if not info_only: - download_urls([url], title, ext, size, output_dir, merge = merge) + download_urls([url], title, ext, size, output_dir, merge = merge, headers=baomihua_headers()) def baomihua_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = get_html(url) From 8a47a729a9805032a94b7ce5171609ef3b5cb90d Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 19 Mar 2020 11:46:44 +0100 Subject: [PATCH 30/51] [tests] remove test_missevan --- tests/test.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tests/test.py b/tests/test.py index 7187cfb0..b5454fbf 100644 --- a/tests/test.py +++ b/tests/test.py @@ -21,13 +21,6 @@ class YouGetTests(unittest.TestCase): info_only=True ) - def test_missevan(self): - missevan.download('https://m.missevan.com/sound/1285995', info_only=True) - missevan.download_playlist( - 'https://www.missevan.com/mdrama/drama/24130', info_only=True) - missevan.download_playlist( - 'https://www.missevan.com/albuminfo/203090', info_only=True) - def test_youtube(self): youtube.download( 'http://www.youtube.com/watch?v=pzKerr0JIPA', info_only=True From a8d5819417ce0d4c7d7c4789043b2fa7e065b721 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 19 Mar 2020 11:57:36 +0100 Subject: [PATCH 31/51] update .travis.yml --- .travis.yml | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8433fe75..8dd26bfa 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,15 +4,10 @@ python: - "3.4" - "3.5" - "3.6" + - "3.7" + - "3.8" + - "nightly" - "pypy3" -matrix: - include: - - python: "3.7" - dist: xenial - - python: "3.8-dev" - dist: xenial - - python: "nightly" - dist: xenial before_install: - pip install flake8 before_script: From 50318b1e4d94da2e7034080cfa428feff3904df6 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 19 Mar 2020 12:04:32 +0100 Subject: [PATCH 32/51] update .travis.yml (remove nightly) --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 8dd26bfa..eedbeeb2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,7 +6,7 @@ python: - "3.6" - "3.7" - "3.8" - - "nightly" + #- "nightly" (flake8 not working in python 3.9 yet, module 'ast' has no attribute 'AugLoad') - "pypy3" before_install: - pip install flake8 From cfa93fb16c2f0460caf62f6fce6fada683dad564 Mon Sep 17 00:00:00 2001 From: Yiyin Gu Date: Fri, 20 Mar 2020 19:16:41 -0400 Subject: [PATCH 33/51] temp fix of netease download Some vip download can download through this, others can't. Still looking into it. --- src/you_get/extractors/netease.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/netease.py b/src/you_get/extractors/netease.py index f74747b1..c7c0f666 100644 --- a/src/you_get/extractors/netease.py +++ b/src/you_get/extractors/netease.py @@ -107,6 +107,9 @@ def netease_video_download(vinfo, output_dir='.', info_only=False): def netease_song_download(song, output_dir='.', info_only=False, playlist_prefix=""): title = "%s%s. %s" % (playlist_prefix, song['position'], song['name']) + url_best = "http://music.163.com/song/media/outer/url?id=" + \ + str(song['id']) + ".mp3" + ''' songNet = 'p' + song['mp3Url'].split('/')[2][1:] if 'hMusic' in song and song['hMusic'] != None: @@ -115,7 +118,7 @@ def netease_song_download(song, output_dir='.', info_only=False, playlist_prefix url_best = song['mp3Url'] elif 'bMusic' in song: url_best = make_url(songNet, song['bMusic']['dfsId']) - + ''' netease_download_common(title, url_best, output_dir=output_dir, info_only=info_only) From b4ea5976fb51233e0289196eb39fa73a6f3e1829 Mon Sep 17 00:00:00 2001 From: brainbush <960821@gmail.com> Date: Mon, 23 Mar 2020 15:52:15 +0800 Subject: [PATCH 34/51] add support for BVID of bilibili --- src/you_get/extractors/bilibili.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 444ccb6f..29ba0aff 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -159,7 +159,7 @@ class Bilibili(VideoExtractor): sort = 'live' elif re.match(r'https?://vc\.bilibili\.com/video/(\d+)', self.url): sort = 'vc' - elif re.match(r'https?://(www\.)?bilibili\.com/video/av(\d+)', self.url): + elif re.match(r'https?://(www\.)?bilibili\.com/video/(av(\d+)|(BV(\S+)))', self.url): sort = 'video' else: self.download_playlist_by_url(self.url, **kwargs) From 25c481cdcddf40b784c4b24fd8840d1574854845 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 23 Mar 2020 11:55:48 +0100 Subject: [PATCH 35/51] purge dead sites --- README.md | 5 --- src/you_get/extractors/musicplayon.py | 38 ----------------------- src/you_get/extractors/videomega.py | 44 --------------------------- src/you_get/extractors/vidto.py | 40 ------------------------ 4 files changed, 127 deletions(-) delete mode 100644 src/you_get/extractors/musicplayon.py delete mode 100644 src/you_get/extractors/videomega.py delete mode 100644 src/you_get/extractors/vidto.py diff --git a/README.md b/README.md index 0735bd8a..3105766b 100644 --- a/README.md +++ b/README.md @@ -368,15 +368,12 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the | VK | |✓|✓| | | Vine | |✓| | | | Vimeo | |✓| | | -| Vidto | |✓| | | -| Videomega | |✓| | | | Veoh | |✓| | | | **Tumblr** | |✓|✓|✓| | TED | |✓| | | | SoundCloud | | | |✓| | SHOWROOM | |✓| | | | Pinterest | | |✓| | -| MusicPlayOn | |✓| | | | MTV81 | |✓| | | | Mixcloud | | | |✓| | Metacafe | |✓| | | @@ -387,7 +384,6 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the | InfoQ | |✓| | | | Imgur | | |✓| | | Heavy Music Archive | | | |✓| -| **Google+** | |✓|✓| | | Freesound | | | |✓| | Flickr | |✓|✓| | | FC2 Video | |✓| | | @@ -409,7 +405,6 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the | **bilibili
哔哩哔哩** | |✓| | | | 豆瓣 | |✓| |✓| | 斗鱼 | |✓| | | -| Panda
熊猫 | |✓| | | | 凤凰视频 | |✓| | | | 风行网 | |✓| | | | iQIYI
爱奇艺 | |✓| | | diff --git a/src/you_get/extractors/musicplayon.py b/src/you_get/extractors/musicplayon.py deleted file mode 100644 index ffc4ec36..00000000 --- a/src/you_get/extractors/musicplayon.py +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env python - -from ..common import * -from ..extractor import VideoExtractor - -import json - -class MusicPlayOn(VideoExtractor): - name = "MusicPlayOn" - - stream_types = [ - {'id': '720p HD'}, - {'id': '360p SD'}, - ] - - def prepare(self, **kwargs): - content = get_content(self.url) - - self.title = match1(content, - r'setup\[\'title\'\] = "([^"]+)";') - - for s in self.stream_types: - quality = s['id'] - src = match1(content, - r'src: "([^"]+)", "data-res": "%s"' % quality) - if src is not None: - url = 'http://en.musicplayon.com%s' % src - self.streams[quality] = {'url': url} - - def extract(self, **kwargs): - for i in self.streams: - s = self.streams[i] - _, s['container'], s['size'] = url_info(s['url']) - s['src'] = [s['url']] - -site = MusicPlayOn() -download = site.download_by_url -# TBD: implement download_playlist diff --git a/src/you_get/extractors/videomega.py b/src/you_get/extractors/videomega.py deleted file mode 100644 index 34fb5205..00000000 --- a/src/you_get/extractors/videomega.py +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env python - -__all__ = ['videomega_download'] - -from ..common import * -import ssl - -def videomega_download(url, output_dir='.', merge=True, info_only=False, **kwargs): - # Hot-plug cookie handler - ssl_context = request.HTTPSHandler( - context=ssl.SSLContext(ssl.PROTOCOL_TLSv1)) - cookie_handler = request.HTTPCookieProcessor() - opener = request.build_opener(ssl_context, cookie_handler) - opener.addheaders = [('Referer', url), - ('Cookie', 'noadvtday=0')] - request.install_opener(opener) - - if re.search(r'view\.php', url): - php_url = url - else: - content = get_content(url) - m = re.search(r'ref="([^"]*)";\s*width="([^"]*)";\s*height="([^"]*)"', content) - ref = m.group(1) - width, height = m.group(2), m.group(3) - php_url = 'http://videomega.tv/view.php?ref=%s&width=%s&height=%s' % (ref, width, height) - content = get_content(php_url) - - title = match1(content, r'(.*)') - js = match1(content, r'(eval.*)') - t = match1(js, r'\$\("\w+"\)\.\w+\("\w+","([^"]+)"\)') - t = re.sub(r'(\w)', r'{\1}', t) - t = t.translate({87 + i: str(i) for i in range(10, 36)}) - s = match1(js, r"'([^']+)'\.split").split('|') - src = t.format(*s) - - type, ext, size = url_info(src, faker=True) - - print_info(site_info, title, type, size) - if not info_only: - download_urls([src], title, ext, size, output_dir, merge=merge, faker=True) - -site_info = "Videomega.tv" -download = videomega_download -download_playlist = playlist_not_supported('videomega') diff --git a/src/you_get/extractors/vidto.py b/src/you_get/extractors/vidto.py deleted file mode 100644 index c4e3b87e..00000000 --- a/src/you_get/extractors/vidto.py +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env python - -__all__ = ['vidto_download'] - -from ..common import * -import pdb -import time - - -def vidto_download(url, output_dir='.', merge=True, info_only=False, **kwargs): - html = get_content(url) - params = {} - r = re.findall( - r'type="(?:hidden|submit)?"(?:.*?)name="(.+?)"\s* value="?(.+?)">', html) - for name, value in r: - params[name] = value - data = parse.urlencode(params).encode('utf-8') - req = request.Request(url) - print("Please wait for 6 seconds...") - time.sleep(6) - print("Starting") - new_html = request.urlopen(req, data).read().decode('utf-8', 'replace') - new_stff = re.search('lnk_download" href="(.*?)">', new_html) - if(new_stff): - url = new_stff.group(1) - title = params['fname'] - type = "" - ext = "" - a, b, size = url_info(url) - print_info(site_info, title, type, size) - if not info_only: - download_urls([url], title, ext, size, output_dir, merge=merge) - else: - print("cannot find link, please review") - pdb.set_trace() - - -site_info = "vidto.me" -download = vidto_download -download_playlist = playlist_not_supported('vidto') From e1edd9f912c147a4f04d780a39169b14a589208c Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 23 Mar 2020 12:03:41 +0100 Subject: [PATCH 36/51] purge dead sites --- src/you_get/common.py | 3 --- src/you_get/extractors/__init__.py | 2 -- 2 files changed, 5 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 70602c89..8c609d8c 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -83,7 +83,6 @@ SITES = { 'missevan' : 'missevan', 'mixcloud' : 'mixcloud', 'mtv81' : 'mtv81', - 'musicplayon' : 'musicplayon', 'miaopai' : 'yixia', 'naver' : 'naver', '7gogo' : 'nanagogo', @@ -107,8 +106,6 @@ SITES = { 'twimg' : 'twitter', 'twitter' : 'twitter', 'ucas' : 'ucas', - 'videomega' : 'videomega', - 'vidto' : 'vidto', 'vimeo' : 'vimeo', 'wanmen' : 'wanmen', 'weibo' : 'miaopai', diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index 5ed5264b..ce95904c 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -49,7 +49,6 @@ from .miaopai import * from .miomio import * from .mixcloud import * from .mtv81 import * -from .musicplayon import * from .nanagogo import * from .naver import * from .netease import * @@ -74,7 +73,6 @@ from .tumblr import * from .twitter import * from .ucas import * from .veoh import * -from .videomega import * from .vimeo import * from .vine import * from .vk import * From 88d574a4e9144f5a122460a2ea24c849fd04b88b Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 23 Mar 2020 12:13:33 +0100 Subject: [PATCH 37/51] version 0.4.1423 --- src/you_get/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/version.py b/src/you_get/version.py index c867e5b8..ab19c2be 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1410' +__version__ = '0.4.1423' From 25422ea3c5520bafc35614865637968634f93086 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 23 Mar 2020 12:23:40 +0100 Subject: [PATCH 38/51] update supported Python versions --- you-get.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/you-get.json b/you-get.json index 56f8212a..e98e2e8a 100644 --- a/you-get.json +++ b/you-get.json @@ -18,14 +18,13 @@ "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.0", - "Programming Language :: Python :: 3.1", "Programming Language :: Python :: 3.2", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", "Topic :: Internet", "Topic :: Internet :: WWW/HTTP", "Topic :: Multimedia", From f10a1cdade1398758aafa8137118d21e7b77179e Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 23 Mar 2020 12:28:02 +0100 Subject: [PATCH 39/51] update LICENSE.txt --- LICENSE.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/LICENSE.txt b/LICENSE.txt index 5964bf20..a193d8e2 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,6 +1,7 @@ MIT License -Copyright (c) 2012-2019 Mort Yao +Copyright (c) 2012-2020 Mort Yao and other contributors + (https://github.com/soimort/you-get/graphs/contributors) Copyright (c) 2012 Boyu Guo Permission is hereby granted, free of charge, to any person obtaining a copy From d6afc2e829f152d3b6d88944d1ad1ce7fe30776b Mon Sep 17 00:00:00 2001 From: icpz Date: Tue, 24 Mar 2020 18:48:22 +0800 Subject: [PATCH 40/51] add support for BVID in playlist mode of bilibili --- src/you_get/extractors/bilibili.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 29ba0aff..045853f3 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -552,7 +552,7 @@ class Bilibili(VideoExtractor): elif re.match(r'https?://(www\.)?bilibili\.com/bangumi/media/md(\d+)', self.url) or \ re.match(r'https?://bangumi\.bilibili\.com/anime/(\d+)', self.url): sort = 'bangumi_md' - elif re.match(r'https?://(www\.)?bilibili\.com/video/av(\d+)', self.url): + elif re.match(r'https?://(www\.)?bilibili\.com/video/(av(\d+)|BV(\S+))', self.url): sort = 'video' elif re.match(r'https?://space\.?bilibili\.com/(\d+)/channel/detail\?.*cid=(\d+)', self.url): sort = 'space_channel' From f12943aa00a5c59fe183eb9b59f6f1928d26a230 Mon Sep 17 00:00:00 2001 From: e <1160590998@qq.com> Date: Sat, 28 Mar 2020 18:15:22 +0800 Subject: [PATCH 41/51] add support for BVID in watchlater mode. --- src/you_get/extractors/bilibili.py | 6 +++--- tests/test.py | 10 +++++++++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 045853f3..c669415d 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -132,10 +132,10 @@ class Bilibili(VideoExtractor): # r'

bangumi/play/ep diff --git a/tests/test.py b/tests/test.py index b5454fbf..6fd3db6c 100644 --- a/tests/test.py +++ b/tests/test.py @@ -7,7 +7,8 @@ from you_get.extractors import ( magisto, youtube, missevan, - acfun + acfun, + bilibili ) @@ -37,5 +38,12 @@ class YouGetTests(unittest.TestCase): def test_acfun(self): acfun.download('https://www.acfun.cn/v/ac11701912', info_only=True) + def test_bilibil(self): + bilibili.download( + "https://www.bilibili.com/watchlater/#/BV1PE411q7mZ/p6", info_only=True + ) + bilibili.download( + "https://www.bilibili.com/watchlater/#/av74906671/p6", info_only=True + ) if __name__ == '__main__': unittest.main() From c7b7a996ffa348833787dd77da70be288c65a9a5 Mon Sep 17 00:00:00 2001 From: zhufengning Date: Sun, 29 Mar 2020 18:09:29 +0800 Subject: [PATCH 42/51] fix bilibili favlist download and updated the api url --- src/you_get/extractors/bilibili.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index c669415d..2152661f 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -103,8 +103,8 @@ class Bilibili(VideoExtractor): return 'https://api.bilibili.com/x/space/channel/video?mid=%s&cid=%s&pn=%s&ps=%s&order=0&jsonp=jsonp' % (mid, cid, pn, ps) @staticmethod - def bilibili_space_favlist_api(vmid, fid, pn=1, ps=100): - return 'https://api.bilibili.com/x/space/fav/arc?vmid=%s&fid=%s&pn=%s&ps=%s&order=0&jsonp=jsonp' % (vmid, fid, pn, ps) + def bilibili_space_favlist_api(fid, pn=1, ps=20): + return 'https://api.bilibili.com/x/v3/fav/resource/list?media_id=%s&pn=%s&ps=%s&order=mtime&type=0&tid=0&jsonp=jsonp' % (fid, pn, ps) @staticmethod def bilibili_space_video_api(mid, pn=1, ps=100): @@ -679,20 +679,22 @@ class Bilibili(VideoExtractor): elif sort == 'space_favlist': m = re.match(r'https?://space\.?bilibili\.com/(\d+)/favlist\?.*fid=(\d+)', self.url) vmid, fid = m.group(1), m.group(2) - api_url = self.bilibili_space_favlist_api(vmid, fid) + api_url = self.bilibili_space_favlist_api(fid) api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url)) favlist_info = json.loads(api_content) - pc = favlist_info['data']['pagecount'] - - for pn in range(1, pc + 1): - api_url = self.bilibili_space_favlist_api(vmid, fid, pn=pn) + pc = favlist_info['data']['info']['media_count'] // len(favlist_info['data']['medias']) + if favlist_info['data']['info']['media_count'] % len(favlist_info['data']['medias']) != 0: + pc += 1 + for pn in range(1, pc): + log.w('Extracting %s of %s pages ...' % (pn, pc)) + api_url = self.bilibili_space_favlist_api(fid, pn=pn) api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url)) favlist_info = json.loads(api_content) - epn, i = len(favlist_info['data']['archives']), 0 - for video in favlist_info['data']['archives']: + epn, i = len(favlist_info['data']['medias']), 0 + for video in favlist_info['data']['medias']: i += 1; log.w('Extracting %s of %s videos ...' % (i, epn)) - url = 'https://www.bilibili.com/video/av%s' % video['aid'] + url = 'https://www.bilibili.com/video/av%s' % video['id'] self.__class__().download_playlist_by_url(url, **kwargs) elif sort == 'space_video': From d603266a421f6ee00f24a16cb29063403cee389a Mon Sep 17 00:00:00 2001 From: zhufengning Date: Sun, 29 Mar 2020 19:31:26 +0800 Subject: [PATCH 43/51] fix wrong range usage --- src/you_get/extractors/bilibili.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 2152661f..95ce707a 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -685,7 +685,7 @@ class Bilibili(VideoExtractor): pc = favlist_info['data']['info']['media_count'] // len(favlist_info['data']['medias']) if favlist_info['data']['info']['media_count'] % len(favlist_info['data']['medias']) != 0: pc += 1 - for pn in range(1, pc): + for pn in range(1, pc + 1): log.w('Extracting %s of %s pages ...' % (pn, pc)) api_url = self.bilibili_space_favlist_api(fid, pn=pn) api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url)) From 5c339cc68893fa67cdf2d09163e9c4ad1e85d060 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 29 Mar 2020 22:49:46 +0200 Subject: [PATCH 44/51] [baidu] support https --- src/you_get/extractors/baidu.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/you_get/extractors/baidu.py b/src/you_get/extractors/baidu.py index 77e666b3..521d5e99 100644 --- a/src/you_get/extractors/baidu.py +++ b/src/you_get/extractors/baidu.py @@ -112,15 +112,15 @@ def baidu_download(url, output_dir='.', stream_type=None, merge=True, info_only= time.sleep(5) download_urls([real_url], title, ext, size, output_dir, url, merge=merge, faker=True) - elif re.match(r'http://music.baidu.com/album/\d+', url): - id = r1(r'http://music.baidu.com/album/(\d+)', url) + elif re.match(r'https?://music.baidu.com/album/\d+', url): + id = r1(r'https?://music.baidu.com/album/(\d+)', url) baidu_download_album(id, output_dir, merge, info_only) - elif re.match('http://music.baidu.com/song/\d+', url): - id = r1(r'http://music.baidu.com/song/(\d+)', url) + elif re.match('https?://music.baidu.com/song/\d+', url): + id = r1(r'https?://music.baidu.com/song/(\d+)', url) baidu_download_song(id, output_dir, merge, info_only) - elif re.match('http://tieba.baidu.com/', url): + elif re.match('https?://tieba.baidu.com/', url): try: # embedded videos embed_download(url, output_dir, merge=merge, info_only=info_only, **kwargs) From b347b1bb06c2f2aee71ddb1d770d7c1294919cee Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 30 Mar 2020 02:40:05 +0200 Subject: [PATCH 45/51] [bilibili] support h --- src/you_get/extractors/bilibili.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 95ce707a..f53af468 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -28,6 +28,8 @@ class Bilibili(VideoExtractor): 'container': 'FLV', 'video_resolution': '360p', 'desc': '流畅 360P'}, # 'quality': 15? {'id': 'mp4', 'quality': 0}, + + {'id': 'jpg', 'quality': 0}, ] @staticmethod @@ -114,6 +116,10 @@ class Bilibili(VideoExtractor): def bilibili_vc_api(video_id): return 'https://api.vc.bilibili.com/clip/v1/video/detail?video_id=%s' % video_id + @staticmethod + def bilibili_h_api(doc_id): + return 'https://api.vc.bilibili.com/link_draw/v1/doc/detail?doc_id=%s' % doc_id + @staticmethod def url_size(url, faker=False, headers={},err_value=0): try: @@ -161,6 +167,8 @@ class Bilibili(VideoExtractor): sort = 'vc' elif re.match(r'https?://(www\.)?bilibili\.com/video/(av(\d+)|(BV(\S+)))', self.url): sort = 'video' + elif re.match(r'https?://h\.?bilibili\.com/(\d+)', self.url): + sort = 'h' else: self.download_playlist_by_url(self.url, **kwargs) return @@ -426,6 +434,24 @@ class Bilibili(VideoExtractor): self.streams['mp4'] = {'container': container, 'size': size, 'src': [playurl]} + # h images + elif sort == 'h': + m = re.match(r'https?://h\.?bilibili\.com/(\d+)', self.url) + doc_id = m.group(1) + api_url = self.bilibili_h_api(doc_id) + api_content = get_content(api_url, headers=self.bilibili_headers()) + h_info = json.loads(api_content) + + urls = [] + for pic in h_info['data']['item']['pictures']: + img_src = pic['img_src'] + urls.append(img_src) + size = urls_size(urls) + + self.title = doc_id + container = 'jpg' # enforce JPG container + self.streams[container] = {'container': container, + 'size': size, 'src': urls} def prepare_by_cid(self,avid,cid,title,html_content,playinfo,playinfo_,url): #response for interaction video From 9858e2f25daca32f9205d5be9e3371e387976e2d Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 30 Mar 2020 02:43:59 +0200 Subject: [PATCH 46/51] version 0.4.1432 --- src/you_get/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/version.py b/src/you_get/version.py index ab19c2be..d5004187 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1423' +__version__ = '0.4.1432' From bd06317fcc947d2705adaae4ee5e2a21acececc4 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 30 Mar 2020 18:16:58 +0200 Subject: [PATCH 47/51] [README] [bilibili] images and audios supported --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3105766b..8ec210b7 100644 --- a/README.md +++ b/README.md @@ -402,7 +402,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the | **AcFun** | |✓| | | | **Baidu
百度贴吧** | |✓|✓| | | 爆米花网 | |✓| | | -| **bilibili
哔哩哔哩** | |✓| | | +| **bilibili
哔哩哔哩** | |✓|✓|✓| | 豆瓣 | |✓| |✓| | 斗鱼 | |✓| | | | 凤凰视频 | |✓| | | From 84a5611939443dbf03e9751a0b33598934647652 Mon Sep 17 00:00:00 2001 From: richard Date: Tue, 7 Apr 2020 23:45:32 -0400 Subject: [PATCH 48/51] inital --- README.md | 1 + src/you_get/common.py | 1 + src/you_get/extractors/__init__.py | 3 +- src/you_get/extractors/xinpianchang.py | 46 ++++++++++++++++++++++++++ tests/test.py | 7 +++- 5 files changed, 56 insertions(+), 2 deletions(-) create mode 100644 src/you_get/extractors/xinpianchang.py diff --git a/README.md b/README.md index 8ec210b7..3429f9d8 100644 --- a/README.md +++ b/README.md @@ -436,6 +436,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the | 火猫TV | |✓| | | | 阳光宽频网 | |✓| | | | 西瓜视频 | |✓| | | +| 新片场 | |✓| | | | 快手 | |✓|✓| | | 抖音 | |✓| | | | TikTok | |✓| | | diff --git a/src/you_get/common.py b/src/you_get/common.py index 8c609d8c..2e4edef5 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -116,6 +116,7 @@ SITES = { 'xiaokaxiu' : 'yixia', 'xiaojiadianvideo' : 'fc2video', 'ximalaya' : 'ximalaya', + 'xinpianchang' : 'xinpianchang', 'yinyuetai' : 'yinyuetai', 'yizhibo' : 'yizhibo', 'youku' : 'youku', diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index ce95904c..4280d236 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -79,10 +79,11 @@ from .vk import * from .w56 import * from .wanmen import * from .xiami import * +from .xinpianchang import * from .yinyuetai import * from .yixia import * from .youku import * from .youtube import * from .zhanqi import * from .zhibo import * -from .zhihu import * +from .zhihu import * \ No newline at end of file diff --git a/src/you_get/extractors/xinpianchang.py b/src/you_get/extractors/xinpianchang.py new file mode 100644 index 00000000..a15b193a --- /dev/null +++ b/src/you_get/extractors/xinpianchang.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python + +import re +import json +from ..extractor import VideoExtractor +from ..common import get_content, playlist_not_supported + + +class Xinpianchang(VideoExtractor): + stream_types = [ + {'id': '4K', 'quality': '超清 4K', 'video_profile': 'mp4-4K'}, + {'id': '2K', 'quality': '超清 2K', 'video_profile': 'mp4-2K'}, + {'id': '1080', 'quality': '高清 1080P', 'video_profile': 'mp4-FHD'}, + {'id': '720', 'quality': '高清 720P', 'video_profile': 'mp4-HD'}, + {'id': '540', 'quality': '清晰 540P', 'video_profile': 'mp4-SD'}, + {'id': '360', 'quality': '流畅 360P', 'video_profile': 'mp4-LD'} + ] + + name = 'xinpianchang' + + def prepare(self, **kwargs): + # find key + page_content = get_content(self.url) + match_rule = r"vid: \"(.+?)\"," + key = re.findall(match_rule, page_content)[0] + + # get videos info + video_url = 'https://openapi-vtom.vmovier.com/v3/video/' + key + '?expand=resource' + data = json.loads(get_content(video_url)) + self.title = data["data"]["video"]["title"] + video_info = data["data"]["resource"]["progressive"] + + # set streams dict + for video in video_info: + url = video["https_url"] + size = video["filesize"] + profile = video["profile_code"] + stype = [st for st in self.__class__.stream_types if st['video_profile'] == profile][0] + + stream_data = dict(src=[url], size=size, container='mp4', quality=stype['quality']) + print(stream_data) + self.streams[stype['id']] = stream_data + + +download = Xinpianchang().download_by_url +download_playlist = playlist_not_supported('xinpianchang') diff --git a/tests/test.py b/tests/test.py index 6fd3db6c..5bc0a2e5 100644 --- a/tests/test.py +++ b/tests/test.py @@ -8,7 +8,8 @@ from you_get.extractors import ( youtube, missevan, acfun, - bilibili + bilibili, + xinpianchang ) @@ -45,5 +46,9 @@ class YouGetTests(unittest.TestCase): bilibili.download( "https://www.bilibili.com/watchlater/#/av74906671/p6", info_only=True ) + + def test_xinpianchang(self): + imgur.download('https://www.xinpianchang.com/a10673220', info_only=True) + if __name__ == '__main__': unittest.main() From b771248d23a73c7dc18e2b1ea5bd13247342e456 Mon Sep 17 00:00:00 2001 From: richard Date: Tue, 7 Apr 2020 23:54:43 -0400 Subject: [PATCH 49/51] fix --- src/you_get/extractors/xinpianchang.py | 1 - tests/test.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/you_get/extractors/xinpianchang.py b/src/you_get/extractors/xinpianchang.py index a15b193a..48830d97 100644 --- a/src/you_get/extractors/xinpianchang.py +++ b/src/you_get/extractors/xinpianchang.py @@ -38,7 +38,6 @@ class Xinpianchang(VideoExtractor): stype = [st for st in self.__class__.stream_types if st['video_profile'] == profile][0] stream_data = dict(src=[url], size=size, container='mp4', quality=stype['quality']) - print(stream_data) self.streams[stype['id']] = stream_data diff --git a/tests/test.py b/tests/test.py index 5bc0a2e5..5e4de738 100644 --- a/tests/test.py +++ b/tests/test.py @@ -48,7 +48,7 @@ class YouGetTests(unittest.TestCase): ) def test_xinpianchang(self): - imgur.download('https://www.xinpianchang.com/a10673220', info_only=True) + xinpianchang.download('https://www.xinpianchang.com/a10673220', info_only=True) if __name__ == '__main__': unittest.main() From 4e0ca6f3e4a02d851a51e56ebcff472891ad6a56 Mon Sep 17 00:00:00 2001 From: richard Date: Tue, 7 Apr 2020 23:55:50 -0400 Subject: [PATCH 50/51] rm test --- tests/test.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tests/test.py b/tests/test.py index 5e4de738..6fd3db6c 100644 --- a/tests/test.py +++ b/tests/test.py @@ -8,8 +8,7 @@ from you_get.extractors import ( youtube, missevan, acfun, - bilibili, - xinpianchang + bilibili ) @@ -46,9 +45,5 @@ class YouGetTests(unittest.TestCase): bilibili.download( "https://www.bilibili.com/watchlater/#/av74906671/p6", info_only=True ) - - def test_xinpianchang(self): - xinpianchang.download('https://www.xinpianchang.com/a10673220', info_only=True) - if __name__ == '__main__': unittest.main() From 018cfde6048707a8a642493a3dc0e934de2f267e Mon Sep 17 00:00:00 2001 From: Richard Xue Date: Wed, 8 Apr 2020 00:08:44 -0400 Subject: [PATCH 51/51] Update xinpianchang.py --- src/you_get/extractors/xinpianchang.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/you_get/extractors/xinpianchang.py b/src/you_get/extractors/xinpianchang.py index 48830d97..fac3d01f 100644 --- a/src/you_get/extractors/xinpianchang.py +++ b/src/you_get/extractors/xinpianchang.py @@ -7,6 +7,7 @@ from ..common import get_content, playlist_not_supported class Xinpianchang(VideoExtractor): + name = 'xinpianchang' stream_types = [ {'id': '4K', 'quality': '超清 4K', 'video_profile': 'mp4-4K'}, {'id': '2K', 'quality': '超清 2K', 'video_profile': 'mp4-2K'}, @@ -16,8 +17,6 @@ class Xinpianchang(VideoExtractor): {'id': '360', 'quality': '流畅 360P', 'video_profile': 'mp4-LD'} ] - name = 'xinpianchang' - def prepare(self, **kwargs): # find key page_content = get_content(self.url)