From 6fa81497d539a99fda2636126df3e485179a4edd Mon Sep 17 00:00:00 2001 From: MaxwellGoblin Date: Thu, 20 Jul 2017 17:14:05 +0800 Subject: [PATCH 001/271] [youtube]use mp4_audio track when no audio track for webm --- src/you_get/extractors/youtube.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py index 986906d6..3b412dc2 100644 --- a/src/you_get/extractors/youtube.py +++ b/src/you_get/extractors/youtube.py @@ -366,14 +366,22 @@ class YouTube(VideoExtractor): dash_url += '&signature={}'.format(sig) dash_size = stream['clen'] itag = stream['itag'] + audio_url = None + audio_size = None + try: + audio_url = dash_webm_a_url + audio_size = int(dash_webm_a_size) + except UnboundLocalError as e: + audio_url = dash_mp4_a_url + audio_size = int(dash_mp4_a_size) self.dash_streams[itag] = { 'quality': stream['size'], 'itag': itag, 'type': mimeType, 'mime': mimeType, 'container': 'webm', - 'src': [dash_url, dash_webm_a_url], - 'size': int(dash_size) + int(dash_webm_a_size) + 'src': [dash_url, audio_url], + 'size': int(dash_size) + int(audio_size) } def extract(self, **kwargs): From a1290be2ec14943c6a77cbb4d80c561083cee44e Mon Sep 17 00:00:00 2001 From: hellsof Date: Wed, 3 Jan 2018 11:43:26 +0800 Subject: [PATCH 002/271] fix https://v.qq.com/x/page/q0527wsyqpv.html --- src/you_get/extractors/qq.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/you_get/extractors/qq.py b/src/you_get/extractors/qq.py index 438c1f24..199df921 100644 --- a/src/you_get/extractors/qq.py +++ b/src/you_get/extractors/qq.py @@ -149,6 +149,8 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs): vid = "" if rurl: vid = rurl.split('/')[-1].split('.')[0] + if vid == "undefined": + vid = "" vid = vid if vid else url.split('/')[-1].split('.')[0] #https://v.qq.com/x/cover/ps6mnfqyrfo7es3/q0181hpdvo5.html? vid = vid if vid else match1(content, r'vid"*\s*:\s*"\s*([^"]+)"') #general fallback if not vid: From fb2c9368933d4ba59a622c65765c2368fd12c19d Mon Sep 17 00:00:00 2001 From: Justsoos Date: Mon, 19 Feb 2018 22:00:55 +0800 Subject: [PATCH 003/271] Add longzhu.com --- src/you_get/common.py | 1 + src/you_get/extractors/__init__.py | 1 + src/you_get/extractors/longzhu.py | 73 ++++++++++++++++++++++++++++++ 3 files changed, 75 insertions(+) create mode 100644 src/you_get/extractors/longzhu.py diff --git a/src/you_get/common.py b/src/you_get/common.py index a4a036a4..4ea144f1 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -74,6 +74,7 @@ SITES = { 'le' : 'le', 'letv' : 'le', 'lizhi' : 'lizhi', + 'longzhu' : 'longzhu', 'magisto' : 'magisto', 'metacafe' : 'metacafe', 'mgtv' : 'mgtv', diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index 46e5c89c..ec9e86ae 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -41,6 +41,7 @@ from .kugou import * from .kuwo import * from .le import * from .lizhi import * +from .longzhu import * from .magisto import * from .metacafe import * from .mgtv import * diff --git a/src/you_get/extractors/longzhu.py b/src/you_get/extractors/longzhu.py new file mode 100644 index 00000000..ed0cb084 --- /dev/null +++ b/src/you_get/extractors/longzhu.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python + +__all__ = ['longzhu_download'] + +import json +from ..common import ( + get_content, + match1, + print_info, + download_urls, + playlist_not_supported, +) +from ..common import player + +def longzhu_download(url, output_dir = '.', merge=True, info_only=False, **kwargs): + web_domain = url.split('/')[2] + if (web_domain == 'star.longzhu.com') or (web_domain == 'y.longzhu.com'): + domain = url.split('/')[3].split('?')[0] + m_url = 'http://m.longzhu.com/{0}'.format(domain) + m_html = get_content(m_url) + room_id_patt = r'var\s*roomId\s*=\s*(\d+);' + room_id = match1(m_html,room_id_patt) + + json_url = 'http://liveapi.plu.cn/liveapp/roomstatus?roomId={0}'.format(room_id) + content = get_content(json_url) + data = json.loads(content) + streamUri = data['streamUri'] + if len(streamUri) <= 4: + raise ValueError('The live stream is not online!') + title = data['title'] + streamer = data['userName'] + title = str.format(streamer,': ',title) + + steam_api_url = 'http://livestream.plu.cn/live/getlivePlayurl?roomId={0}'.format(room_id) + content = get_content(steam_api_url) + data = json.loads(content) + isonline = data.get('isTransfer') + if isonline == '0': + raise ValueError('The live stream is not online!') + + real_url = data['playLines'][0]['urls'][0]['securityUrl'] + + print_info(site_info, title, 'flv', float('inf')) + + if not info_only: + download_urls([real_url], title, 'flv', None, output_dir, merge=merge) + + elif web_domain == 'replay.longzhu.com': + videoid = match1(url, r'(\d+)$') + json_url = 'http://liveapi.longzhu.com/livereplay/getreplayfordisplay?videoId={0}'.format(videoid) + content = get_content(json_url) + data = json.loads(content) + + username = data['userName'] + title = data['title'] + title = str.format(username,':',title) + real_url = data['videoUrl'] + + if player: + print_info('Longzhu Video', title, 'm3u8', 0) + download_urls([real_url], title, 'm3u8', 0, output_dir, merge=merge) + else: + urls = general_m3u8_extractor(real_url) + print_info('Longzhu Video', title, 'm3u8', 0) + if not info_only: + download_urls(urls, title, 'ts', 0, output_dir=output_dir, merge=merge, **kwargs) + + else: + raise ValueError('Wrong url or unsupported link ... {0}'.format(url)) + +site_info = 'longzhu.com' +download = longzhu_download +download_playlist = playlist_not_supported('longzhu') \ No newline at end of file From a22dce896cd13ca398f748b93c73ea172b837df1 Mon Sep 17 00:00:00 2001 From: John128217 <35967907+John128217@users.noreply.github.com> Date: Wed, 21 Feb 2018 14:23:21 -0800 Subject: [PATCH 004/271] Added an auto rename option and fixed the force option I've noticed that if I am downloading two videos with same names from youtube (e.g. https://www.youtube.com/watch?v=606hmlA_nxw and https://www.youtube.com/watch?v=CLrXTnggUeY), only one of them will be saved (usually the bigger one according to the original script "os.path.getsize(output_filepath) >= total_size * 0.9"). However, I want them both while preserving their names from youtube. So I modified the source code. It looks like there are a lot of changes, but I just added an indent and everything changed. Also, I've noticed that "force" is not working at all. I fixed that issue. --- src/you_get/common.py | 63 +++++++++++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index a4a036a4..76cf5b97 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -134,6 +134,7 @@ player = None extractor_proxy = None cookies = None output_filename = None +auto_rename = False fake_headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', # noqa @@ -598,27 +599,40 @@ def url_save( tmp_headers['Referer'] = refer file_size = url_size(url, faker=faker, headers=tmp_headers) - if os.path.exists(filepath): - if not force and file_size == os.path.getsize(filepath): - if not is_part: - if bar: - bar.done() - print( - 'Skipping {}: file already exists'.format( - tr(os.path.basename(filepath)) + continue_renameing = True + while continue_renameing: + continue_renameing = False + if os.path.exists(filepath): + if not force and file_size == os.path.getsize(filepath): + if not is_part: + if bar: + bar.done() + print( + 'Skipping {}: file already exists'.format( + tr(os.path.basename(filepath)) + ) ) - ) + else: + if bar: + bar.update_received(file_size) + return else: - if bar: - bar.update_received(file_size) - return - else: - if not is_part: - if bar: - bar.done() - print('Overwriting %s' % tr(os.path.basename(filepath)), '...') - elif not os.path.exists(os.path.dirname(filepath)): - os.mkdir(os.path.dirname(filepath)) + if not is_part: + if bar: + bar.done() + if not force and auto_rename: + path, ext = os.path.basename(filepath).rsplit('.', 1) + if (re.compile(' \(\d\)').match(path[-4:]) is None): + thisfile = path + ' (1).' + ext + else: + thisfile = path[:-2] + str(int(path[-2]) + 1) + ').' + ext + filepath = os.path.join(os.path.dirname(filepath), thisfile) + print('Changing name to %s' % tr(os.path.basename(filepath)), '...') + continue_renameing = True + continue + print('Overwriting %s' % tr(os.path.basename(filepath)), '...') + elif not os.path.exists(os.path.dirname(filepath)): + os.mkdir(os.path.dirname(filepath)) temp_filepath = filepath + '.download' if file_size != float('inf') \ else filepath @@ -883,7 +897,7 @@ def download_urls( output_filepath = os.path.join(output_dir, output_filename) if total_size: - if not force and os.path.exists(output_filepath) \ + if not force and os.path.exists(output_filepath) and not auto_rename\ and os.path.getsize(output_filepath) >= total_size * 0.9: print('Skipping %s: file already exists' % output_filepath) print() @@ -1370,6 +1384,10 @@ def script_main(download, download_playlist, **kwargs): '-l', '--playlist', action='store_true', help='Prefer to download a playlist' ) + download_grp.add_argument( + '-a', '--auto-rename', action='store_true', default=False, + help='Auto rename same name different files' + ) proxy_grp = parser.add_argument_group('Proxy options') proxy_grp = proxy_grp.add_mutually_exclusive_group() @@ -1414,11 +1432,16 @@ def script_main(download, download_playlist, **kwargs): global player global extractor_proxy global output_filename + global auto_rename output_filename = args.output_filename extractor_proxy = args.extractor_proxy info_only = args.info + if args.force: + force = True + if args.auto_rename: + auto_rename = True if args.url: dry_run = True if args.json: From c7290c7c2fa5bbf9c4623cf5694b742212d74df7 Mon Sep 17 00:00:00 2001 From: John128217 <35967907+John128217@users.noreply.github.com> Date: Wed, 21 Feb 2018 21:55:28 -0800 Subject: [PATCH 005/271] A number bug fixed Now if you can have more than 10 videos that have same names. --- src/you_get/common.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 76cf5b97..0d1b1810 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -622,10 +622,13 @@ def url_save( bar.done() if not force and auto_rename: path, ext = os.path.basename(filepath).rsplit('.', 1) - if (re.compile(' \(\d\)').match(path[-4:]) is None): + finder = re.compile(' \([1-9]\d*?\)$') + if (finder.search(path) is None): thisfile = path + ' (1).' + ext else: - thisfile = path[:-2] + str(int(path[-2]) + 1) + ').' + ext + def numreturn(a): + return ' (' + str(int(a.group()[2:-1]) + 1) + ').' + thisfile = finder.sub(numreturn, path) + ext filepath = os.path.join(os.path.dirname(filepath), thisfile) print('Changing name to %s' % tr(os.path.basename(filepath)), '...') continue_renameing = True From b4850f5a5907a263d36d7d78e231f86e2321bd4c Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 22 Feb 2018 17:21:43 +0100 Subject: [PATCH 006/271] [common] indent! --- src/you_get/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 4ea144f1..b4d57841 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -74,7 +74,7 @@ SITES = { 'le' : 'le', 'letv' : 'le', 'lizhi' : 'lizhi', - 'longzhu' : 'longzhu', + 'longzhu' : 'longzhu', 'magisto' : 'magisto', 'metacafe' : 'metacafe', 'mgtv' : 'mgtv', From 0b50fdfab430cff3b1e02c17def260ae0a5b47a3 Mon Sep 17 00:00:00 2001 From: perror <15058342792@163.com> Date: Wed, 28 Feb 2018 16:45:48 +0800 Subject: [PATCH 007/271] [ixigua] fix URL request error and video download error and video encryption parameters acquisition --- src/you_get/extractors/ixigua.py | 40 ++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/src/you_get/extractors/ixigua.py b/src/you_get/extractors/ixigua.py index aaed195d..0c668e82 100644 --- a/src/you_get/extractors/ixigua.py +++ b/src/you_get/extractors/ixigua.py @@ -5,30 +5,35 @@ import random import binascii from ..common import * -def get_video_id(text): - re_id = r"videoId: '(.*?)'" - return re.findall(re_id, text)[0] +headers = { + 'User-Agent': 'Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36' + ' (KHTML, like Gecko) Chrome/61.0.3163.100 Mobile Safari/537.36' +} + def get_r(): return str(random.random())[2:] + def right_shift(val, n): return val >> n if val >= 0 else (val + 0x100000000) >> n + def get_s(text): """get video info""" - id = get_video_id(text) + js_data = json.loads(text) + id = js_data['data']['video_id'] p = get_r() url = 'http://i.snssdk.com/video/urls/v/1/toutiao/mp4/%s' % id n = parse.urlparse(url).path + '?r=%s' % p c = binascii.crc32(n.encode('utf-8')) s = right_shift(c, 0) - title = ''.join(re.findall(r"title: '(.*?)',", text)) - return url + '?r=%s&s=%s' % (p, s), title + return url + '?r=%s&s=%s' % (p, s), js_data['data']['title'] + def get_moment(url, user_id, base_url, video_list): """Recursively obtaining a video list""" - video_list_data = json.loads(get_content(url)) + video_list_data = json.loads(get_content(url, headers=headers)) if not video_list_data['next']['max_behot_time']: return video_list [video_list.append(i["display_url"]) for i in video_list_data["data"]] @@ -41,23 +46,33 @@ def get_moment(url, user_id, base_url, video_list): } return get_moment(**_param) + def ixigua_download(url, output_dir='.', info_only=False, **kwargs): """ Download a single video Sample URL: https://www.ixigua.com/a6487187567887254029/#mid=59051127876 """ try: - video_info_url, title = get_s(get_content(url)) - video_info = json.loads(get_content(video_info_url)) + video_page_id = re.findall('(\d+)', [i for i in url.split('/') if i][3])[0] if 'toutiao.com' in url \ + else re.findall('(\d+)', [i for i in url.split('/') if i][2])[0] + + video_start_info_url = r'https://m.ixigua.com/i{}/info/'.format(video_page_id) + video_info_url, title = get_s(get_content(video_start_info_url, headers=headers or kwargs.get('headers', {}))) + video_info = json.loads(get_content(video_info_url, headers=headers or kwargs.get('headers', {}))) except Exception: raise NotImplementedError(url) try: video_url = base64.b64decode(video_info["data"]["video_list"]["video_1"]["main_url"]).decode() except Exception: raise NotImplementedError(url) - filetype, ext, size = url_info(video_url) + filetype, ext, size = url_info(video_url, headers=headers or kwargs.get('headers', {})) print_info(site_info, title, filetype, size) if not info_only: - download_urls([video_url], title, ext, size, output_dir=output_dir) + _param = { + 'output_dir': output_dir, + 'headers': headers or kwargs.get('headers', {}) + } + download_urls([video_url], title, ext, size, **_param) + def ixigua_download_playlist(url, output_dir='.', info_only=False, **kwargs): """Download all video from the user's video list @@ -80,6 +95,7 @@ def ixigua_download_playlist(url, output_dir='.', info_only=False, **kwargs): for i in get_moment(**_param): ixigua_download(i, output_dir, info_only, **kwargs) + site_info = "ixigua.com" download = ixigua_download -download_playlist = ixigua_download_playlist \ No newline at end of file +download_playlist = ixigua_download_playlist From 7633898850f6ed30c78e1fb5bdb0f96b81d9d87a Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 1 Mar 2018 22:55:09 +0100 Subject: [PATCH 008/271] version 0.4.1040 --- src/you_get/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/version.py b/src/you_get/version.py index 2d4ff9d0..7e220d0d 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1025' +__version__ = '0.4.1040' From 1900f7608cc2756d5460c99eb792c8e0eb42e7f4 Mon Sep 17 00:00:00 2001 From: mq-liu Date: Wed, 7 Mar 2018 09:48:11 +0800 Subject: [PATCH 009/271] fix bilibili download fail the bilibili api has changed "https://interface.bilibili.com/v2/playurl?cid=33250486&appkey=84956560bc028eb7&otype=json&type=&quality=0&qn=0&sign=a1b0401c8bf70d676bab133fa032469f" --- src/you_get/extractors/bilibili.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index e5abccab..046d2cb1 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -23,7 +23,7 @@ from .youku import youku_download_by_vid class Bilibili(VideoExtractor): name = 'Bilibili' live_api = 'http://live.bilibili.com/api/playurl?cid={}&otype=json' - api_url = 'http://interface.bilibili.com/playurl?' + api_url = 'http://interface.bilibili.com/v2/playurl?' bangumi_api_url = 'http://bangumi.bilibili.com/player/web_api/playurl?' live_room_init_api_url = 'https://api.live.bilibili.com/room/v1/Room/room_init?id={}' live_room_info_api_url = 'https://api.live.bilibili.com/room/v1/Room/get_info?room_id={}' From 92eb72bc7d20370e2835ed78dad94c0accaa068a Mon Sep 17 00:00:00 2001 From: Kugel-Blitz <21170940+Kugel-Blitz@users.noreply.github.com> Date: Sun, 11 Mar 2018 10:06:19 +1300 Subject: [PATCH 010/271] Use 0513 when cookies are used 0507 doesn't seem to honour cookies when they're loaded. --- src/you_get/extractors/youku.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/youku.py b/src/you_get/extractors/youku.py index 9d74b9c8..d40d16c1 100644 --- a/src/you_get/extractors/youku.py +++ b/src/you_get/extractors/youku.py @@ -78,7 +78,10 @@ class Youku(VideoExtractor): self.api_error_code = None self.api_error_msg = None - self.ccode = '0513' + if cookies: + self.ccode = '0513' + else: + self.ccode = '0507' self.utid = None def youku_ups(self): From 344502af0d223def2a9ed0fde3766f6f7490b23b Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 15 Mar 2018 22:38:23 +0100 Subject: [PATCH 011/271] [youku] resolve conflict --- src/you_get/extractors/youku.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/youku.py b/src/you_get/extractors/youku.py index 099552cf..d40d16c1 100644 --- a/src/you_get/extractors/youku.py +++ b/src/you_get/extractors/youku.py @@ -78,7 +78,10 @@ class Youku(VideoExtractor): self.api_error_code = None self.api_error_msg = None - self.ccode = '0507' + if cookies: + self.ccode = '0513' + else: + self.ccode = '0507' self.utid = None def youku_ups(self): From fdf53508388135917bb976319b2be01b96034634 Mon Sep 17 00:00:00 2001 From: Phun Date: Mon, 19 Mar 2018 12:48:38 +0800 Subject: [PATCH 012/271] fix the bug of v.qq.com --- src/you_get/extractors/qq.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/qq.py b/src/you_get/extractors/qq.py index 89dd7b61..5591e3eb 100644 --- a/src/you_get/extractors/qq.py +++ b/src/you_get/extractors/qq.py @@ -122,9 +122,9 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs): return #do redirect - if 'v.qq.com/page' in url: + if 'v.qq.com/x' in url: # for URLs like this: - # http://v.qq.com/page/k/9/7/k0194pwgw97.html + # https://v.qq.com/x/page/r05533mns3s.html new_url = url_locations([url])[0] if url == new_url: #redirect in js? From 3faaebb6762ff1bbea1e9b45b6dd348a92ddbcfc Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 22 Mar 2018 22:40:07 +0100 Subject: [PATCH 013/271] [qq] no more redirect (close #2586) --- src/you_get/extractors/qq.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/you_get/extractors/qq.py b/src/you_get/extractors/qq.py index 5591e3eb..ffca5a85 100644 --- a/src/you_get/extractors/qq.py +++ b/src/you_get/extractors/qq.py @@ -121,18 +121,6 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs): qq_download_by_vid(vid, vid, output_dir, merge, info_only) return - #do redirect - if 'v.qq.com/x' in url: - # for URLs like this: - # https://v.qq.com/x/page/r05533mns3s.html - new_url = url_locations([url])[0] - if url == new_url: - #redirect in js? - content = get_content(url) - url = match1(content,r'window\.location\.href="(.*?)"') - else: - url = new_url - if 'kuaibao.qq.com' in url or re.match(r'http://daxue.qq.com/content/content/id/\d+', url): content = get_content(url) vid = match1(content, r'vid\s*=\s*"\s*([^"]+)"') From 8979cd63eaa0979f249e4132a10b3706c4952b02 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 22 Mar 2018 22:44:33 +0100 Subject: [PATCH 014/271] [qq] break if no pay --- src/you_get/extractors/qq.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/you_get/extractors/qq.py b/src/you_get/extractors/qq.py index ffca5a85..7b1a6860 100644 --- a/src/you_get/extractors/qq.py +++ b/src/you_get/extractors/qq.py @@ -47,6 +47,9 @@ def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False): else: log.w(key_json['msg']) break + if key_json.get('filename') is None: + log.w(key_json['msg']) + break part_urls.append(url) _, ext, size = url_info(url) From 821e639e025296b4c041d0535ca4d95ad72ea397 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 22 Mar 2018 22:46:12 +0100 Subject: [PATCH 015/271] [youku] boom boom boom --- src/you_get/extractors/youku.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/you_get/extractors/youku.py b/src/you_get/extractors/youku.py index d40d16c1..bc4d8088 100644 --- a/src/you_get/extractors/youku.py +++ b/src/you_get/extractors/youku.py @@ -78,10 +78,7 @@ class Youku(VideoExtractor): self.api_error_code = None self.api_error_msg = None - if cookies: - self.ccode = '0513' - else: - self.ccode = '0507' + self.ccode = '0590' self.utid = None def youku_ups(self): From d3719ed4b62be2697e18755bcda7cb2249c8d7c1 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 24 Mar 2018 23:56:38 +0100 Subject: [PATCH 016/271] [bilibili] warn when target URL is a playlist --- src/you_get/extractors/bilibili.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 046d2cb1..7e5bdb37 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -167,8 +167,8 @@ class Bilibili(VideoExtractor): qq_download_by_vid(tc_flashvars, self.title, output_dir=kwargs['output_dir'], merge=kwargs['merge'], info_only=kwargs['info_only']) return - has_plist = re.search(r' Date: Mon, 26 Mar 2018 12:05:12 +0200 Subject: [PATCH 017/271] from ..common import general_m3u8_extractor Import the definition of __general_m3u8_extractor()__ for the function call on line 64... flake8 testing of https://github.com/soimort/you-get on Python 3.6.3 $ __flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics__ ``` ./src/you_get/extractors/longzhu.py:63:20: F821 undefined name 'general_m3u8_extractor' urls = general_m3u8_extractor(real_url) ^ 1 F821 undefined name 'general_m3u8_extractor' ``` --- src/you_get/extractors/longzhu.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/longzhu.py b/src/you_get/extractors/longzhu.py index ed0cb084..29b340c5 100644 --- a/src/you_get/extractors/longzhu.py +++ b/src/you_get/extractors/longzhu.py @@ -5,6 +5,7 @@ __all__ = ['longzhu_download'] import json from ..common import ( get_content, + general_m3u8_extractor, match1, print_info, download_urls, @@ -70,4 +71,4 @@ def longzhu_download(url, output_dir = '.', merge=True, info_only=False, **kwarg site_info = 'longzhu.com' download = longzhu_download -download_playlist = playlist_not_supported('longzhu') \ No newline at end of file +download_playlist = playlist_not_supported('longzhu') From 43923bc8f6c7df552e672a4e80aed0e58010964d Mon Sep 17 00:00:00 2001 From: JayXon Date: Thu, 29 Mar 2018 00:59:28 -0700 Subject: [PATCH 018/271] [youku] use default ckey 1080p works --- src/you_get/extractors/youku.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/youku.py b/src/you_get/extractors/youku.py index bc4d8088..fc1a5cd2 100644 --- a/src/you_get/extractors/youku.py +++ b/src/you_get/extractors/youku.py @@ -78,7 +78,10 @@ class Youku(VideoExtractor): self.api_error_code = None self.api_error_msg = None - self.ccode = '0590' + self.ccode = '0502' + # Found in http://g.alicdn.com/player/ykplayer/0.5.28/youku-player.min.js + # grep -oE '"[0-9a-zA-Z+/=]{256}"' youku-player.min.js + self.ckey = 'DIl58SLFxFNndSV1GFNnMQVYkx1PP5tKe1siZu/86PR1u/Wh1Ptd+WOZsHHWxysSfAOhNJpdVWsdVJNsfJ8Sxd8WKVvNfAS8aS8fAOzYARzPyPc3JvtnPHjTdKfESTdnuTW6ZPvk2pNDh4uFzotgdMEFkzQ5wZVXl2Pf1/Y6hLK0OnCNxBj3+nb0v72gZ6b0td+WOZsHHWxysSo/0y9D2K42SaB8Y/+aD2K42SaB8Y/+ahU+WOZsHcrxysooUeND' self.utid = None def youku_ups(self): @@ -86,6 +89,7 @@ class Youku(VideoExtractor): url += '&client_ip=192.168.1.1' url += '&utid=' + self.utid url += '&client_ts=' + str(int(time.time())) + url += '&ckey=' + urllib.parse.quote(self.ckey) if self.password_protected: url += '&password=' + self.password headers = dict(Referer=self.referer) From 6a9039aab110f40ba6a4fed5915d58cffee8aa46 Mon Sep 17 00:00:00 2001 From: hellsof Date: Wed, 11 Apr 2018 19:59:14 +0800 Subject: [PATCH 019/271] fix https://v.qq.com/x/page/d0552xbadkl.html https://y.qq.com/n/yqq/mv/v/g00268vlkzy.html --- src/you_get/extractors/qq.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/qq.py b/src/you_get/extractors/qq.py index 6a859b8b..915f1b4b 100644 --- a/src/you_get/extractors/qq.py +++ b/src/you_get/extractors/qq.py @@ -140,7 +140,8 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs): vid = "" if rurl: vid = rurl.split('/')[-1].split('.')[0] - if vid == "undefined": + # https://v.qq.com/x/page/d0552xbadkl.html https://y.qq.com/n/yqq/mv/v/g00268vlkzy.html + if vid == "undefined" or vid == "index": vid = "" vid = vid if vid else url.split('/')[-1].split('.')[0] #https://v.qq.com/x/cover/ps6mnfqyrfo7es3/q0181hpdvo5.html? vid = vid if vid else match1(content, r'vid"*\s*:\s*"\s*([^"]+)"') #general fallback From ead0979ca133e75b62835c3d3ac2783955534a0f Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 12 Apr 2018 03:19:12 +0200 Subject: [PATCH 020/271] [universal] relative path lah --- src/you_get/extractors/universal.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/you_get/extractors/universal.py b/src/you_get/extractors/universal.py index 633cf55b..b6bb68b1 100644 --- a/src/you_get/extractors/universal.py +++ b/src/you_get/extractors/universal.py @@ -67,6 +67,14 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg urls += re.findall(r'href="(https?://[^"]+\.png)"', page, re.I) urls += re.findall(r'href="(https?://[^"]+\.gif)"', page, re.I) + # relative path + rel_urls = [] + rel_urls += re.findall(r'href="(\.[^"]+\.jpe?g)"', page, re.I) + rel_urls += re.findall(r'href="(\.[^"]+\.png)"', page, re.I) + rel_urls += re.findall(r'href="(\.[^"]+\.gif)"', page, re.I) + for rel_url in rel_urls: + urls += [ r1(r'(.*/)', url) + rel_url ] + # MPEG-DASH MPD mpd_urls = re.findall(r'src="(https?://[^"]+\.mpd)"', page) for mpd_url in mpd_urls: From 25aa2ac2e5f5b408edfc53f64a6706a716f0e0c3 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 15 Apr 2018 16:34:15 +0200 Subject: [PATCH 021/271] [universal] better extraction of title and ext --- src/you_get/extractors/universal.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/you_get/extractors/universal.py b/src/you_get/extractors/universal.py index b6bb68b1..e343d4cd 100644 --- a/src/you_get/extractors/universal.py +++ b/src/you_get/extractors/universal.py @@ -112,10 +112,9 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg else: # direct download - filename = parse.unquote(url.split('/')[-1]) - title = '.'.join(filename.split('.')[:-1]) - ext = filename.split('.')[-1] - _, _, size = url_info(url, faker=True) + filename = parse.unquote(url.split('/')[-1]) or parse.unquote(url.split('/')[-2]) + title = '.'.join(filename.split('.')[:-1]) or filename + _, ext, size = url_info(url, faker=True) print_info(site_info, title, ext, size) if not info_only: download_urls([url], title, ext, size, From c77f29861c27725811c54285f351fc120279d75c Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 15 Apr 2018 17:07:46 +0200 Subject: [PATCH 022/271] [universal] support Open Graph og:video:url --- src/you_get/extractors/universal.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/you_get/extractors/universal.py b/src/you_get/extractors/universal.py index e343d4cd..573d8eea 100644 --- a/src/you_get/extractors/universal.py +++ b/src/you_get/extractors/universal.py @@ -31,6 +31,19 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg if page_title: page_title = unescape_html(page_title) + meta_videos = re.findall(r' Date: Sun, 15 Apr 2018 17:13:08 +0200 Subject: [PATCH 023/271] version 0.4.1060 --- src/you_get/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/version.py b/src/you_get/version.py index 7e220d0d..4d91c55d 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1040' +__version__ = '0.4.1060' From b28d78f71d50369cb6d306ef3e68430dedf86f1a Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 19 Apr 2018 03:22:18 +0200 Subject: [PATCH 024/271] [twitter] support twitter moments --- src/you_get/extractors/twitter.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/twitter.py b/src/you_get/extractors/twitter.py index 9c5acb31..894439aa 100644 --- a/src/you_get/extractors/twitter.py +++ b/src/you_get/extractors/twitter.py @@ -18,6 +18,17 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs) if re.match(r'https?://mobile', url): # normalize mobile URL url = 'https://' + match1(url, r'//mobile\.(.+)') + if re.match(r'https?://twitter\.com/i/moments/', url): # moments + html = get_html(url) + paths = re.findall(r'data-permalink-path="([^"]+)"', html) + for path in paths: + twitter_download('https://twitter.com' + path, + output_dir=output_dir, + merge=merge, + info_only=info_only, + **kwargs) + return + html = get_html(url) screen_name = r1(r'data-screen-name="([^"]*)"', html) or \ r1(r' Date: Sun, 22 Apr 2018 10:38:40 +0800 Subject: [PATCH 025/271] fix bilibili update xml-url to get all format of video --- src/you_get/extractors/bilibili.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 7e5bdb37..d23bbe5c 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -28,7 +28,8 @@ class Bilibili(VideoExtractor): live_room_init_api_url = 'https://api.live.bilibili.com/room/v1/Room/room_init?id={}' live_room_info_api_url = 'https://api.live.bilibili.com/room/v1/Room/get_info?room_id={}' - SEC1 = '1c15888dc316e05a15fdd0a02ed6584f' + #SEC1 = '1c15888dc316e05a15fdd0a02ed6584f' + SEC1 = '94aba54af9065f71de72f5508f1cd42e' SEC2 = '9b288147e5474dd2aa67085f716c560d' stream_types = [ {'id': 'hdflv'}, @@ -44,7 +45,7 @@ class Bilibili(VideoExtractor): @staticmethod def bilibili_stream_type(urls): url = urls[0] - if 'hd.flv' in url or '-112.flv' in url: + if 'hd.flv' in url or '-80.flv' in url: return 'hdflv', 'flv' if '-64.flv' in url: return 'flv720', 'flv' @@ -59,7 +60,8 @@ class Bilibili(VideoExtractor): def api_req(self, cid, quality, bangumi, bangumi_movie=False, **kwargs): ts = str(int(time.time())) if not bangumi: - params_str = 'cid={}&player=1&quality={}&ts={}'.format(cid, quality, ts) + #params_str = 'cid={}&player=1&quality={}&ts={}'.format(cid, quality, ts) + params_str = 'appkey=84956560bc028eb7&cid={}&otype=xml&qn={}&quality={}&type='.format(cid, quality, quality) chksum = hashlib.md5(bytes(params_str+self.SEC1, 'utf8')).hexdigest() api_url = self.api_url + params_str + '&sign=' + chksum else: @@ -97,7 +99,7 @@ class Bilibili(VideoExtractor): quality = 'hdflv' if bangumi else 'flv' info_only = kwargs.get('info_only') - for qlt in range(4, -1, -1): + for qlt in [116,112,80,74,64,32,16,15]: api_xml = self.api_req(cid, qlt, bangumi, **kwargs) self.parse_bili_xml(api_xml) if not info_only or stream_id: From d057a49e5b4222cb69b47008cb9e0af1b6b0209f Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 24 Apr 2018 14:46:38 +0200 Subject: [PATCH 026/271] [common] url_to_module: quote non-ASCII characters in URL --- src/you_get/common.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/you_get/common.py b/src/you_get/common.py index 11200d10..1a6cac2b 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -1581,6 +1581,9 @@ def url_to_module(url): domain = r1(r'(\.[^.]+\.[^.]+)$', video_host) or video_host assert domain, 'unsupported url: ' + url + # all non-ASCII code points must be quoted (percent-encoded UTF-8) + url = ''.join([ch if ord(ch) in range(128) else parse.quote(ch) for ch in url]) + k = r1(r'([^.]+)', domain) if k in SITES: return ( From ff6deaf2bde4a8e81094c7ff5893fa4d9b30efb0 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 24 Apr 2018 14:48:20 +0200 Subject: [PATCH 027/271] [tumblr] fallback to universal_download --- src/you_get/extractors/tumblr.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/you_get/extractors/tumblr.py b/src/you_get/extractors/tumblr.py index 5817e548..fe4973be 100644 --- a/src/you_get/extractors/tumblr.py +++ b/src/you_get/extractors/tumblr.py @@ -70,6 +70,11 @@ def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs): real_url = r1(r']+tumblr_video_container[^>]+>]+src=[\'"]([^\'"]*)[\'"]', html) + + if iframe_url is None: + universal_download(url, output_dir, merge=merge, info_only=info_only, **kwargs) + return + if iframe_url: iframe_html = get_content(iframe_url, headers=fake_headers) real_url = r1(r']*>[\n ]*]+src=[\'"]([^\'"]*)[\'"]', iframe_html) From 44960677c4c315e479d3b2015582f98f32d40c48 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 25 Apr 2018 22:30:46 +0200 Subject: [PATCH 028/271] [common] use quoted video_host and video_url as well --- src/you_get/common.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/you_get/common.py b/src/you_get/common.py index 1a6cac2b..e3000854 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -1583,6 +1583,8 @@ def url_to_module(url): # all non-ASCII code points must be quoted (percent-encoded UTF-8) url = ''.join([ch if ord(ch) in range(128) else parse.quote(ch) for ch in url]) + video_host = r1(r'https?://([^/]+)/', url) + video_url = r1(r'https?://[^/]+(.*)', url) k = r1(r'([^.]+)', domain) if k in SITES: From fe34688d07872e18fa0127c969a4f05152e3342a Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 25 Apr 2018 22:42:23 +0200 Subject: [PATCH 029/271] [universal] fix my brain damage since 2015 (2c7aa3b) --- src/you_get/extractors/universal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/universal.py b/src/you_get/extractors/universal.py index 573d8eea..57994b9c 100644 --- a/src/you_get/extractors/universal.py +++ b/src/you_get/extractors/universal.py @@ -112,7 +112,7 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg for candy in candies: try: mime, ext, size = url_info(candy['url'], faker=True) - if not size: size = float('Int') + if not size: size = float('Inf') except: continue else: From bcc98c5a5cfae4cd13487f0a51662ede35e746bd Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 25 Apr 2018 22:59:39 +0200 Subject: [PATCH 030/271] [universal] use faker only if necessary --- src/you_get/extractors/universal.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/you_get/extractors/universal.py b/src/you_get/extractors/universal.py index 57994b9c..6a1c2d30 100644 --- a/src/you_get/extractors/universal.py +++ b/src/you_get/extractors/universal.py @@ -111,16 +111,25 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg for candy in candies: try: - mime, ext, size = url_info(candy['url'], faker=True) - if not size: size = float('Inf') + try: + mime, ext, size = url_info(candy['url'], faker=False) + assert size + except: + mime, ext, size = url_info(candy['url'], faker=True) + if not size: size = float('Inf') except: continue else: print_info(site_info, candy['title'], ext, size) if not info_only: - download_urls([candy['url']], candy['title'], ext, size, - output_dir=output_dir, merge=merge, - faker=True) + try: + download_urls([candy['url']], candy['title'], ext, size, + output_dir=output_dir, merge=merge, + faker=False) + except: + download_urls([candy['url']], candy['title'], ext, size, + output_dir=output_dir, merge=merge, + faker=True) return else: From 4f1b609d71a04672a2b95b4fb13f0ba486e57df0 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 28 Apr 2018 18:59:52 +0200 Subject: [PATCH 031/271] [bilibili] fix title --- src/you_get/extractors/bilibili.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index d23bbe5c..916782af 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -130,6 +130,9 @@ class Bilibili(VideoExtractor): m = re.search(r'(.*?)', self.page) or re.search(r'

', self.page) if m is not None: self.title = m.group(1) + s = re.search(r'([^<]+)', m.group(1)) + if s: + self.title = unescape_html(s.group(1)) if self.title is None: m = re.search(r'property="og:title" content="([^"]+)"', self.page) if m is not None: From 18d3cf0eb424fa92473141c2af6a9d0183550a72 Mon Sep 17 00:00:00 2001 From: QYLGithub <15058342792@163.com> Date: Sun, 29 Apr 2018 11:38:49 +0800 Subject: [PATCH 032/271] Call toutiao.py method --- src/you_get/extractors/ixigua.py | 98 ++------------------------------ 1 file changed, 5 insertions(+), 93 deletions(-) diff --git a/src/you_get/extractors/ixigua.py b/src/you_get/extractors/ixigua.py index 0c668e82..bc19b1d0 100644 --- a/src/you_get/extractors/ixigua.py +++ b/src/you_get/extractors/ixigua.py @@ -1,101 +1,13 @@ #!/usr/bin/env python __all__ = ['ixigua_download', 'ixigua_download_playlist'] -import base64 -import random -import binascii -from ..common import * - -headers = { - 'User-Agent': 'Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36' - ' (KHTML, like Gecko) Chrome/61.0.3163.100 Mobile Safari/537.36' -} +from .toutiao import download as toutiao_download +from .toutiao import download_playlist as toutiao_download_playlist -def get_r(): - return str(random.random())[2:] - - -def right_shift(val, n): - return val >> n if val >= 0 else (val + 0x100000000) >> n - - -def get_s(text): - """get video info""" - js_data = json.loads(text) - id = js_data['data']['video_id'] - p = get_r() - url = 'http://i.snssdk.com/video/urls/v/1/toutiao/mp4/%s' % id - n = parse.urlparse(url).path + '?r=%s' % p - c = binascii.crc32(n.encode('utf-8')) - s = right_shift(c, 0) - return url + '?r=%s&s=%s' % (p, s), js_data['data']['title'] - - -def get_moment(url, user_id, base_url, video_list): - """Recursively obtaining a video list""" - video_list_data = json.loads(get_content(url, headers=headers)) - if not video_list_data['next']['max_behot_time']: - return video_list - [video_list.append(i["display_url"]) for i in video_list_data["data"]] - max_behot_time = video_list_data['next']['max_behot_time'] - _param = { - 'user_id': user_id, - 'base_url': base_url, - 'video_list': video_list, - 'url': base_url.format(user_id=user_id, max_behot_time=max_behot_time), - } - return get_moment(**_param) - - -def ixigua_download(url, output_dir='.', info_only=False, **kwargs): - """ Download a single video - Sample URL: https://www.ixigua.com/a6487187567887254029/#mid=59051127876 - """ - try: - video_page_id = re.findall('(\d+)', [i for i in url.split('/') if i][3])[0] if 'toutiao.com' in url \ - else re.findall('(\d+)', [i for i in url.split('/') if i][2])[0] - - video_start_info_url = r'https://m.ixigua.com/i{}/info/'.format(video_page_id) - video_info_url, title = get_s(get_content(video_start_info_url, headers=headers or kwargs.get('headers', {}))) - video_info = json.loads(get_content(video_info_url, headers=headers or kwargs.get('headers', {}))) - except Exception: - raise NotImplementedError(url) - try: - video_url = base64.b64decode(video_info["data"]["video_list"]["video_1"]["main_url"]).decode() - except Exception: - raise NotImplementedError(url) - filetype, ext, size = url_info(video_url, headers=headers or kwargs.get('headers', {})) - print_info(site_info, title, filetype, size) - if not info_only: - _param = { - 'output_dir': output_dir, - 'headers': headers or kwargs.get('headers', {}) - } - download_urls([video_url], title, ext, size, **_param) - - -def ixigua_download_playlist(url, output_dir='.', info_only=False, **kwargs): - """Download all video from the user's video list - Sample URL: https://www.ixigua.com/c/user/71141690831/ - """ - if 'user' not in url: - raise NotImplementedError(url) - user_id = url.split('/')[-2] - max_behot_time = 0 - if not user_id: - raise NotImplementedError(url) - base_url = "https://www.ixigua.com/c/user/article/?user_id={user_id}" \ - "&max_behot_time={max_behot_time}&max_repin_time=0&count=20&page_type=0" - _param = { - 'user_id': user_id, - 'base_url': base_url, - 'video_list': [], - 'url': base_url.format(user_id=user_id, max_behot_time=max_behot_time), - } - for i in get_moment(**_param): - ixigua_download(i, output_dir, info_only, **kwargs) +def ixigua_download(url, output_dir='.', merge=True, info_only=False, **kwargs): + return toutiao_download(url.replace('ixigua', '365yg')) site_info = "ixigua.com" download = ixigua_download -download_playlist = ixigua_download_playlist +download_playlist = toutiao_download_playlist \ No newline at end of file From 351173ba797ad1ebc830ed1de223f48c3570248e Mon Sep 17 00:00:00 2001 From: yangxiaochen Date: Thu, 10 May 2018 20:32:59 +0800 Subject: [PATCH 033/271] [qq] fix some error cases("check vid&filename failed" and "format invalid") --- src/you_get/extractors/qq.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/qq.py b/src/you_get/extractors/qq.py index 915f1b4b..15116b0c 100644 --- a/src/you_get/extractors/qq.py +++ b/src/you_get/extractors/qq.py @@ -18,11 +18,14 @@ def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False): host = video_json['vl']['vi'][0]['ul']['ui'][0]['url'] streams = video_json['fl']['fi'] seg_cnt = video_json['vl']['vi'][0]['cl']['fc'] + filename = video_json['vl']['vi'][0]['fn'] if seg_cnt == 0: seg_cnt = 1 + else: + fn_pre, magic_str, video_type = filename.split('.') best_quality = streams[-1]['name'] - part_format_id = streams[-1]['id'] + #part_format_id = streams[-1]['id'] part_urls= [] total_size = 0 @@ -31,7 +34,17 @@ def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False): # filename = fn_pre + '.mp4' #else: # filename = fn_pre + '.p' + str(part_format_id % 10000) + '.' + str(part) + '.mp4' - filename = fn_pre + '.p' + str(part_format_id % 10000) + '.' + str(part) + '.mp4' + #filename = fn_pre + '.p' + str(part_format_id % 10000) + '.' + str(part) + '.mp4' + + # fix some error cases("check vid&filename failed" and "format invalid") + # https://v.qq.com/x/page/q06058th9ll.html + # https://v.qq.com/x/page/t060789a21e.html + if seg_cnt == 1: + part_format_id = video_json['vl']['vi'][0]['cl']['keyid'].split('.')[-1] + else: + part_format_id = video_json['vl']['vi'][0]['cl']['ci'][part - 1]['keyid'].split('.')[1] + filename = '.'.join([fn_pre, magic_str, str(part), video_type]) + key_api = "http://vv.video.qq.com/getkey?otype=json&platform=11&format={}&vid={}&filename={}&appver=3.2.19.333".format(part_format_id, vid, filename) part_info = get_content(key_api) key_json = json.loads(match1(part_info, r'QZOutputJson=(.*)')[:-1]) From 24578efa1e34c24c7828d82cc27c70b478e6740a Mon Sep 17 00:00:00 2001 From: perror <15058342792@163.com> Date: Fri, 11 May 2018 12:01:31 +0800 Subject: [PATCH 034/271] repair douyutv 403 error --- src/you_get/extractors/douyutv.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/you_get/extractors/douyutv.py b/src/you_get/extractors/douyutv.py index b7b15e74..72a41a0a 100644 --- a/src/you_get/extractors/douyutv.py +++ b/src/you_get/extractors/douyutv.py @@ -9,6 +9,10 @@ import hashlib import time import re +headers = { + 'user-agent': 'Mozilla/5.0 (iPad; CPU OS 8_1_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B466 Safari/600.1.4' + } + def douyutv_video_download(url, output_dir='.', merge=True, info_only=False, **kwargs): ep = 'http://vmobile.douyu.com/video/getInfo?vid=' patt = r'show/([0-9A-Za-z]+)' @@ -19,7 +23,7 @@ def douyutv_video_download(url, output_dir='.', merge=True, info_only=False, **k log.wtf('Unknown url pattern') vid = hit.group(1) - page = get_content(url) + page = get_content(url, headers=headers) hit = re.search(title_patt, page) if hit is None: title = vid @@ -35,21 +39,18 @@ def douyutv_video_download(url, output_dir='.', merge=True, info_only=False, **k urls = general_m3u8_extractor(m3u8_url) download_urls(urls, title, 'ts', 0, output_dir=output_dir, merge=merge, **kwargs) -def douyutv_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): + +def douyutv_download(url, output_dir='.', merge=True, info_only=False, **kwargs): if 'v.douyu.com/show/' in url: douyutv_video_download(url, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs) return - headers = { - 'user-agent': 'Mozilla/5.0 (iPad; CPU OS 8_1_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B466 Safari/600.1.4' - } - - url = re.sub(r'[w.]*douyu.com','m.douyu.com',url) + url = re.sub(r'[w.]*douyu.com', 'm.douyu.com', url) html = get_content(url, headers) room_id_patt = r'room_id\s*:\s*(\d+),' room_id = match1(html, room_id_patt) if room_id == "0": - room_id = url[url.rfind('/')+1:] + room_id = url[url.rfind('/') + 1:] api_url = "http://www.douyutv.com/api/v1/" args = "room/%s?aid=wp&client_sys=wp&time=%d" % (room_id, int(time.time())) @@ -60,7 +61,7 @@ def douyutv_download(url, output_dir = '.', merge = True, info_only = False, **k content = get_content(json_request_url, headers) json_content = json.loads(content) data = json_content['data'] - server_status = json_content.get('error',0) + server_status = json_content.get('error', 0) if server_status is not 0: raise ValueError("Server returned error:%s" % server_status) @@ -73,7 +74,8 @@ def douyutv_download(url, output_dir = '.', merge = True, info_only = False, **k print_info(site_info, title, 'flv', float('inf')) if not info_only: - download_url_ffmpeg(real_url, title, 'flv', params={}, output_dir = output_dir, merge = merge) + download_url_ffmpeg(real_url, title, 'flv', params={}, output_dir=output_dir, merge=merge) + site_info = "douyu.com" download = douyutv_download From 50bba5527b52121a1f41b75d763a38fe2432e73e Mon Sep 17 00:00:00 2001 From: yangxiaochen Date: Fri, 11 May 2018 17:12:01 +0800 Subject: [PATCH 035/271] [douyin] send the request without fake headers, the douyin website will return fake body or the 403 response! --- src/you_get/extractors/douyin.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/douyin.py b/src/you_get/extractors/douyin.py index 213837e7..20ce0e26 100644 --- a/src/you_get/extractors/douyin.py +++ b/src/you_get/extractors/douyin.py @@ -7,6 +7,7 @@ from ..common import ( url_size, print_info, get_content, + fake_headers, download_urls, playlist_not_supported, ) @@ -16,13 +17,13 @@ __all__ = ['douyin_download_by_url'] def douyin_download_by_url(url, **kwargs): - page_content = get_content(url) + page_content = get_content(url, headers=fake_headers) match_rule = re.compile(r'var data = \[(.*?)\];') video_info = json.loads(match_rule.findall(page_content)[0]) video_url = video_info['video']['play_addr']['url_list'][0] title = video_info['cha_list'][0]['cha_name'] video_format = 'mp4' - size = url_size(video_url) + size = url_size(video_url, faker=True) print_info( site_info='douyin.com', title=title, type=video_format, size=size @@ -30,6 +31,7 @@ def douyin_download_by_url(url, **kwargs): if not kwargs['info_only']: download_urls( urls=[video_url], title=title, ext=video_format, total_size=size, + faker=True, **kwargs ) From daf630e9d782c53878b77b33a891d8003e747a72 Mon Sep 17 00:00:00 2001 From: yangxiaochen Date: Fri, 11 May 2018 18:08:23 +0800 Subject: [PATCH 036/271] [douyin] fix if there is not title, you-get cannot works well https://www.douyin.com/share/video/6553248251821165832 --- src/you_get/extractors/douyin.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/douyin.py b/src/you_get/extractors/douyin.py index 20ce0e26..e39775f4 100644 --- a/src/you_get/extractors/douyin.py +++ b/src/you_get/extractors/douyin.py @@ -21,7 +21,13 @@ def douyin_download_by_url(url, **kwargs): match_rule = re.compile(r'var data = \[(.*?)\];') video_info = json.loads(match_rule.findall(page_content)[0]) video_url = video_info['video']['play_addr']['url_list'][0] - title = video_info['cha_list'][0]['cha_name'] + # fix: https://www.douyin.com/share/video/6553248251821165832 + # if there is no title, use desc + cha_list = video_info['cha_list'] + if cha_list: + title = cha_list[0]['cha_name'] + else: + title = video_info['desc'] video_format = 'mp4' size = url_size(video_url, faker=True) print_info( From 9b03331589e645d76b28fd9021b6d17426186695 Mon Sep 17 00:00:00 2001 From: Zheng Luo Date: Sat, 12 May 2018 20:08:13 -0400 Subject: [PATCH 037/271] Update cccode to 0510 for youku.py --- src/you_get/extractors/youku.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/youku.py b/src/you_get/extractors/youku.py index fc1a5cd2..bfdb014f 100644 --- a/src/you_get/extractors/youku.py +++ b/src/you_get/extractors/youku.py @@ -78,7 +78,7 @@ class Youku(VideoExtractor): self.api_error_code = None self.api_error_msg = None - self.ccode = '0502' + self.ccode = '0510' # Found in http://g.alicdn.com/player/ykplayer/0.5.28/youku-player.min.js # grep -oE '"[0-9a-zA-Z+/=]{256}"' youku-player.min.js self.ckey = 'DIl58SLFxFNndSV1GFNnMQVYkx1PP5tKe1siZu/86PR1u/Wh1Ptd+WOZsHHWxysSfAOhNJpdVWsdVJNsfJ8Sxd8WKVvNfAS8aS8fAOzYARzPyPc3JvtnPHjTdKfESTdnuTW6ZPvk2pNDh4uFzotgdMEFkzQ5wZVXl2Pf1/Y6hLK0OnCNxBj3+nb0v72gZ6b0td+WOZsHHWxysSo/0y9D2K42SaB8Y/+aD2K42SaB8Y/+ahU+WOZsHcrxysooUeND' From 9ae300029f5de925be4e1de304e2809ec694d668 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 15 May 2018 21:22:51 +0200 Subject: [PATCH 038/271] version 0.4.1077 --- src/you_get/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/version.py b/src/you_get/version.py index 4d91c55d..64ef890f 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1060' +__version__ = '0.4.1077' From f7179968f8147e304fb61b26e381f28d4da07dff Mon Sep 17 00:00:00 2001 From: cclauss Date: Fri, 18 May 2018 09:08:13 +0200 Subject: [PATCH 039/271] Add flake8 to the testing (again) Another attempt at #2145 --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index 2d780e81..ed1531b9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,6 +8,8 @@ python: - "3.6" - "nightly" - "pypy3" +before_install: pip install flake8 +before_script: flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics script: make test sudo: false notifications: From 59f544665ff89a270c7c1e11f90f423c7690929c Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 20 May 2018 13:00:12 +0200 Subject: [PATCH 040/271] [ixigua] remove undefined name (#2599) --- src/you_get/extractors/ixigua.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/ixigua.py b/src/you_get/extractors/ixigua.py index bc19b1d0..59133442 100644 --- a/src/you_get/extractors/ixigua.py +++ b/src/you_get/extractors/ixigua.py @@ -1,5 +1,6 @@ #!/usr/bin/env python -__all__ = ['ixigua_download', 'ixigua_download_playlist'] +__all__ = ['ixigua_download'] + from .toutiao import download as toutiao_download from .toutiao import download_playlist as toutiao_download_playlist @@ -10,4 +11,4 @@ def ixigua_download(url, output_dir='.', merge=True, info_only=False, **kwargs): site_info = "ixigua.com" download = ixigua_download -download_playlist = toutiao_download_playlist \ No newline at end of file +download_playlist = toutiao_download_playlist From 582d89e2f268ab1a72d6b065694760097c270702 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 20 May 2018 13:47:28 +0200 Subject: [PATCH 041/271] .travis.yml: skip flake8 on python 3.2 --- .travis.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index ed1531b9..9df327b0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,8 @@ python: - "nightly" - "pypy3" before_install: pip install flake8 -before_script: flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics +before_script: + - if [[ $TRAVIS_PYTHON_VERSION != '3.2'* ]]; then flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics; fi script: make test sudo: false notifications: From 25b1c25517fbfb71a9e997edb4dad991249da6f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9F=A9=E6=9E=97=E6=B3=BD?= <7954178+hanlz@users.noreply.github.com> Date: Mon, 21 May 2018 17:40:56 +0800 Subject: [PATCH 042/271] Move the warning message output to standard error. `ffmpeg -version` gives "ffmpeg version 2.8.14-0ubuntu0.16.04.1 Copyright (c) 2000-2018 the FFmpeg developers" on Ubuntu Xenial, which make int() failed during extracting version code. --- src/you_get/processor/ffmpeg.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/you_get/processor/ffmpeg.py b/src/you_get/processor/ffmpeg.py index 89d53e50..1e3bd7eb 100755 --- a/src/you_get/processor/ffmpeg.py +++ b/src/you_get/processor/ffmpeg.py @@ -3,6 +3,7 @@ import logging import os.path import subprocess +import sys from ..util.strings import parameterize from ..common import print_more_compatible as print @@ -25,8 +26,8 @@ def get_usable_ffmpeg(cmd): try: version = [int(i) for i in vers[2].split('.')] except: - print('It seems that your ffmpeg is a nightly build.') - print('Please switch to the latest stable if merging failed.') + print('It seems that your ffmpeg is a nightly build.', file=sys.stderr) + print('Please switch to the latest stable if merging failed.', file=sys.stderr) version = [1, 0] return cmd, 'ffprobe', version except: From d26482b9a92ccaaecd683dc4fb5f17a6519ce417 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 21 May 2018 22:59:51 +0200 Subject: [PATCH 043/271] [twitter] prevent mobile redirection --- src/you_get/extractors/twitter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/twitter.py b/src/you_get/extractors/twitter.py index 894439aa..9cc3c5c7 100644 --- a/src/you_get/extractors/twitter.py +++ b/src/you_get/extractors/twitter.py @@ -19,7 +19,7 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs) url = 'https://' + match1(url, r'//mobile\.(.+)') if re.match(r'https?://twitter\.com/i/moments/', url): # moments - html = get_html(url) + html = get_html(url, faker=True) paths = re.findall(r'data-permalink-path="([^"]+)"', html) for path in paths: twitter_download('https://twitter.com' + path, @@ -29,7 +29,7 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs) **kwargs) return - html = get_html(url) + html = get_html(url, faker=True) screen_name = r1(r'data-screen-name="([^"]*)"', html) or \ r1(r' Date: Sun, 27 May 2018 19:16:31 +0200 Subject: [PATCH 044/271] [instagram] download video_url --- src/you_get/extractors/instagram.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/you_get/extractors/instagram.py b/src/you_get/extractors/instagram.py index e06eba00..332d9b61 100755 --- a/src/you_get/extractors/instagram.py +++ b/src/you_get/extractors/instagram.py @@ -27,6 +27,8 @@ def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwarg for edge in edges: title = edge['node']['shortcode'] image_url = edge['node']['display_url'] + if 'video_url' in edge['node']: + image_url = edge['node']['video_url'] ext = image_url.split('.')[-1] size = int(get_head(image_url)['Content-Length']) print_info(site_info, title, ext, size) @@ -39,6 +41,8 @@ def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwarg else: title = info['entry_data']['PostPage'][0]['graphql']['shortcode_media']['shortcode'] image_url = info['entry_data']['PostPage'][0]['graphql']['shortcode_media']['display_url'] + if 'video_url' in info['entry_data']['PostPage'][0]['graphql']['shortcode_media']: + image_url =info['entry_data']['PostPage'][0]['graphql']['shortcode_media']['video_url'] ext = image_url.split('.')[-1] size = int(get_head(image_url)['Content-Length']) print_info(site_info, title, ext, size) From b7603a4db0cbe1578f471da946cfa07c2de01736 Mon Sep 17 00:00:00 2001 From: "Agent Fitz ;-)" Date: Tue, 29 May 2018 21:36:29 +0800 Subject: [PATCH 045/271] =?UTF-8?q?=E8=85=BE=E8=AE=AF=E8=A7=86=E9=A2=91?= =?UTF-8?q?=E9=BB=98=E8=AE=A4=E4=B8=8B=E8=BD=BDSHD=E8=A7=86=E9=A2=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 现在可以直接下载720P(SHD)的腾讯视频 --- src/you_get/extractors/qq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/qq.py b/src/you_get/extractors/qq.py index 15116b0c..4a67c57c 100644 --- a/src/you_get/extractors/qq.py +++ b/src/you_get/extractors/qq.py @@ -9,7 +9,7 @@ from .qie_video import download_by_url as qie_video_download from urllib.parse import urlparse,parse_qs def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False): - info_api = 'http://vv.video.qq.com/getinfo?otype=json&appver=3.2.19.333&platform=11&defnpayver=1&vid={}'.format(vid) + info_api = 'http://vv.video.qq.com/getinfo?otype=json&appver=3.2.19.333&platform=11&defnpayver=1&defn=shd&vid={}'.format(vid) info = get_content(info_api) video_json = json.loads(match1(info, r'QZOutputJson=(.*)')[:-1]) From 3653420fe9965df30e63e7ed0495d58fa4538195 Mon Sep 17 00:00:00 2001 From: kiss4u Date: Sat, 2 Jun 2018 23:15:44 +0800 Subject: [PATCH 046/271] fix some url format from v.qq.com https://v.qq.com/x/page/w0674l9yrrh.html http://v.sports.qq.com/#/cover/t0fqsm1y83r8v5j/a0026nvw5jr --- src/you_get/extractors/acfun.py | 2 +- src/you_get/extractors/bilibili.py | 2 +- src/you_get/extractors/qq.py | 29 +++++++++++++++++++++++------ 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/src/you_get/extractors/acfun.py b/src/you_get/extractors/acfun.py index c521422f..4b45c5e9 100644 --- a/src/you_get/extractors/acfun.py +++ b/src/you_get/extractors/acfun.py @@ -65,7 +65,7 @@ def acfun_download_by_vid(vid, title, output_dir='.', merge=True, info_only=Fals elif sourceType == 'tudou': tudou_download_by_iid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) elif sourceType == 'qq': - qq_download_by_vid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) + qq_download_by_vid(sourceId, title, True, output_dir=output_dir, merge=merge, info_only=info_only) elif sourceType == 'letv': letvcloud_download_by_vu(sourceId, '2d8c027396', title, output_dir=output_dir, merge=merge, info_only=info_only) elif sourceType == 'zhuzhan': diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 916782af..9e2b8bc0 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -169,7 +169,7 @@ class Bilibili(VideoExtractor): tc_flashvars = tc_flashvars.group(1) if tc_flashvars is not None: self.out = True - qq_download_by_vid(tc_flashvars, self.title, output_dir=kwargs['output_dir'], merge=kwargs['merge'], info_only=kwargs['info_only']) + qq_download_by_vid(tc_flashvars, self.title, True, output_dir=kwargs['output_dir'], merge=kwargs['merge'], info_only=kwargs['info_only']) return has_plist = re.search(r'"page":2', self.page) diff --git a/src/you_get/extractors/qq.py b/src/you_get/extractors/qq.py index 15116b0c..60fb751f 100644 --- a/src/you_get/extractors/qq.py +++ b/src/you_get/extractors/qq.py @@ -8,8 +8,14 @@ from .qie import download as qieDownload from .qie_video import download_by_url as qie_video_download from urllib.parse import urlparse,parse_qs -def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False): - info_api = 'http://vv.video.qq.com/getinfo?otype=json&appver=3.2.19.333&platform=11&defnpayver=1&vid={}'.format(vid) +def qq_download_by_vid(vid, title, default_from, output_dir='.', merge=True, info_only=False): + + if default_from: + platform = 11 + else: + platform = 4100201 + + info_api = 'http://vv.video.qq.com/getinfo?otype=json&appver=3.2.19.333&platform={}&defnpayver=1&vid={}'.format(platform, vid) info = get_content(info_api) video_json = json.loads(match1(info, r'QZOutputJson=(.*)')[:-1]) @@ -17,7 +23,8 @@ def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False): title = video_json['vl']['vi'][0]['ti'] host = video_json['vl']['vi'][0]['ul']['ui'][0]['url'] streams = video_json['fl']['fi'] - seg_cnt = video_json['vl']['vi'][0]['cl']['fc'] + seg_cnt = fc_cnt = video_json['vl']['vi'][0]['cl']['fc'] + filename = video_json['vl']['vi'][0]['fn'] if seg_cnt == 0: seg_cnt = 1 @@ -39,7 +46,10 @@ def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False): # fix some error cases("check vid&filename failed" and "format invalid") # https://v.qq.com/x/page/q06058th9ll.html # https://v.qq.com/x/page/t060789a21e.html - if seg_cnt == 1: + + if fc_cnt == 0: + # fix jason error + # https://v.qq.com/x/page/w0674l9yrrh.html part_format_id = video_json['vl']['vi'][0]['cl']['keyid'].split('.')[-1] else: part_format_id = video_json['vl']['vi'][0]['cl']['ci'][part - 1]['keyid'].split('.')[1] @@ -112,6 +122,8 @@ def kg_qq_download_by_shareid(shareid, output_dir='.', info_only=False, caption= def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs): """""" + default_from = True + if re.match(r'https?://egame.qq.com/live\?anchorid=(\d+)', url): from . import qq_egame qq_egame.qq_egame_download(url, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs) @@ -134,7 +146,7 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs): content = get_content(url) vids = matchall(content, [r'\?vid=(\w+)']) for vid in vids: - qq_download_by_vid(vid, vid, output_dir, merge, info_only) + qq_download_by_vid(vid, vid, default_from, output_dir, merge, info_only) return if 'kuaibao.qq.com' in url or re.match(r'http://daxue.qq.com/content/content/id/\d+', url): @@ -165,7 +177,12 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs): title = match1(content, r'"title":"([^"]+)"') if not title else title title = vid if not title else title #general fallback - qq_download_by_vid(vid, title, output_dir, merge, info_only) + if 'v.sports.qq.com' in url: + # fix url forbidden + # http://v.sports.qq.com/#/cover/t0fqsm1y83r8v5j/a0026nvw5jr + default_from = False + + qq_download_by_vid(vid, title, default_from, output_dir, merge, info_only) site_info = "QQ.com" download = qq_download From 928c8ccbe356800e582c8f0d60901da555ef7631 Mon Sep 17 00:00:00 2001 From: kiss4u Date: Sun, 3 Jun 2018 09:10:46 +0800 Subject: [PATCH 047/271] modify comments --- src/you_get/extractors/qq.py | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/src/you_get/extractors/qq.py b/src/you_get/extractors/qq.py index 60fb751f..c3c653a8 100644 --- a/src/you_get/extractors/qq.py +++ b/src/you_get/extractors/qq.py @@ -13,6 +13,7 @@ def qq_download_by_vid(vid, title, default_from, output_dir='.', merge=True, inf if default_from: platform = 11 else: + # fix return {,"msg":"cannot play outside"} platform = 4100201 info_api = 'http://vv.video.qq.com/getinfo?otype=json&appver=3.2.19.333&platform={}&defnpayver=1&vid={}'.format(platform, vid) @@ -32,24 +33,13 @@ def qq_download_by_vid(vid, title, default_from, output_dir='.', merge=True, inf fn_pre, magic_str, video_type = filename.split('.') best_quality = streams[-1]['name'] - #part_format_id = streams[-1]['id'] part_urls= [] total_size = 0 for part in range(1, seg_cnt+1): - #if seg_cnt == 1 and video_json['vl']['vi'][0]['vh'] <= 480: - # filename = fn_pre + '.mp4' - #else: - # filename = fn_pre + '.p' + str(part_format_id % 10000) + '.' + str(part) + '.mp4' - #filename = fn_pre + '.p' + str(part_format_id % 10000) + '.' + str(part) + '.mp4' - - # fix some error cases("check vid&filename failed" and "format invalid") - # https://v.qq.com/x/page/q06058th9ll.html - # https://v.qq.com/x/page/t060789a21e.html - if fc_cnt == 0: - # fix jason error - # https://v.qq.com/x/page/w0674l9yrrh.html + # fix json parsing error + # example:https://v.qq.com/x/page/w0674l9yrrh.html part_format_id = video_json['vl']['vi'][0]['cl']['keyid'].split('.')[-1] else: part_format_id = video_json['vl']['vi'][0]['cl']['ci'][part - 1]['keyid'].split('.')[1] @@ -179,7 +169,7 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs): if 'v.sports.qq.com' in url: # fix url forbidden - # http://v.sports.qq.com/#/cover/t0fqsm1y83r8v5j/a0026nvw5jr + # example:http://v.sports.qq.com/#/cover/t0fqsm1y83r8v5j/a0026nvw5jr default_from = False qq_download_by_vid(vid, title, default_from, output_dir, merge, info_only) From a10c98c5ab9aa5dc37e0c1baa652a9f456b2ca25 Mon Sep 17 00:00:00 2001 From: kiss4u Date: Mon, 4 Jun 2018 00:43:15 +0800 Subject: [PATCH 048/271] support for zhibo.tv example: http://v.zhibo.tv/31609372 example: http://video.zhibo.tv/video/details/d103057f-663e-11e8-9d83-525400ccac43.html --- README.md | 1 + src/you_get/common.py | 7 ++-- src/you_get/extractors/__init__.py | 1 + src/you_get/extractors/zhibo.py | 52 ++++++++++++++++++++++++++++++ 4 files changed, 58 insertions(+), 3 deletions(-) create mode 100644 src/you_get/extractors/zhibo.py diff --git a/README.md b/README.md index 86c5e4e9..f6f8efdc 100644 --- a/README.md +++ b/README.md @@ -416,6 +416,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the | 西瓜视频 | |✓| | | | 快手 | |✓|✓| | | 抖音 | |✓| | | +| 中国体育(TV) |
|✓| | | For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page. diff --git a/src/you_get/common.py b/src/you_get/common.py index e3000854..30e533f1 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -24,6 +24,7 @@ sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='utf8') SITES = { '163' : 'netease', '56' : 'w56', + '365yg' : 'toutiao', 'acfun' : 'acfun', 'archive' : 'archive', 'baidu' : 'baidu', @@ -64,6 +65,7 @@ SITES = { 'iqiyi' : 'iqiyi', 'ixigua' : 'ixigua', 'isuntv' : 'suntv', + 'iwara' : 'iwara', 'joy' : 'joy', 'kankanews' : 'bilibili', 'khanacademy' : 'khan', @@ -82,6 +84,7 @@ SITES = { 'mixcloud' : 'mixcloud', 'mtv81' : 'mtv81', 'musicplayon' : 'musicplayon', + 'miaopai' : 'yixia', 'naver' : 'naver', '7gogo' : 'nanagogo', 'nicovideo' : 'nicovideo', @@ -118,14 +121,12 @@ SITES = { 'xiaojiadianvideo' : 'fc2video', 'ximalaya' : 'ximalaya', 'yinyuetai' : 'yinyuetai', - 'miaopai' : 'yixia', 'yizhibo' : 'yizhibo', 'youku' : 'youku', - 'iwara' : 'iwara', 'youtu' : 'youtube', 'youtube' : 'youtube', 'zhanqi' : 'zhanqi', - '365yg' : 'toutiao', + 'zhibo' : 'zhibo', } dry_run = False diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index ec9e86ae..649a911f 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -88,3 +88,4 @@ from .ted import * from .khan import * from .zhanqi import * from .kuaishou import * +from .zhibo import * \ No newline at end of file diff --git a/src/you_get/extractors/zhibo.py b/src/you_get/extractors/zhibo.py new file mode 100644 index 00000000..4aaa293e --- /dev/null +++ b/src/you_get/extractors/zhibo.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python + +__all__ = ['zhibo_download'] + +from ..common import * + +def zhibo_vedio_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): + # http://video.zhibo.tv/video/details/d103057f-663e-11e8-9d83-525400ccac43.html + + html = get_html(url) + title = r1(r'([\s\S]*)', html) + total_size = 0 + part_urls= [] + + video_html = r1(r'', html) + + # video_guessulike = r1(r"window.xgData =([s\S'\s\.]*)\'\;[\s\S]*window.vouchData", video_html) + video_url = r1(r"window.vurl = \'([s\S'\s\.]*)\'\;[\s\S]*window.imgurl", video_html) + part_urls.append(video_url) + ext = video_url.split('.')[-1] + + print_info(site_info, title, ext, total_size) + if not info_only: + download_urls(part_urls, title, ext, total_size, output_dir=output_dir, merge=merge) + + +def zhibo_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): + if 'video.zhibo.tv' in url: + zhibo_vedio_download(url, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs) + return + + # if 'v.zhibo.tv' in url: + # http://v.zhibo.tv/31609372 + html = get_html(url) + title = r1(r'([\s\S]*)', html) + is_live = r1(r"window.videoIsLive=\'([s\S'\s\.]*)\'\;[\s\S]*window.resDomain", html) + if is_live is not "1": + raise ValueError("The live stream is not online! (Errno:%s)" % is_live) + + ourStreamName = r1(r"window.ourStreamName=\'([s\S'\s\.]*)\'\;[\s\S]*window.rtmpDefaultSource", html) + rtmpPollUrl = r1(r"window.rtmpPollUrl=\'([s\S'\s\.]*)\'\;[\s\S]*window.hlsDefaultSource", html) + + #real_url = 'rtmp://220.194.213.56/live.zhibo.tv/8live/' + ourStreamName + real_url = rtmpPollUrl + ourStreamName + + print_info(site_info, title, 'flv', float('inf')) + if not info_only: + download_url_ffmpeg(real_url, title, 'flv', params={}, output_dir=output_dir, merge=merge) + +site_info = "zhibo.tv" +download = zhibo_download +download_playlist = playlist_not_supported('zhibo') From ebbe13e88e78e2f6eff80a495ad5a90580391d49 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 23 Jun 2018 00:29:00 +0200 Subject: [PATCH 049/271] [universal] a URL with space is not a good URL --- src/you_get/extractors/universal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/universal.py b/src/you_get/extractors/universal.py index 6a1c2d30..57b9b2d1 100644 --- a/src/you_get/extractors/universal.py +++ b/src/you_get/extractors/universal.py @@ -67,12 +67,12 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg urls = [] for i in media_exts: - urls += re.findall(r'(https?://[^;"\'\\]+' + i + r'[^;"\'\\]*)', page) + urls += re.findall(r'(https?://[^ ;"\'\\]+' + i + r'[^ ;"\'\\]*)', page) p_urls = re.findall(r'(https?%3A%2F%2F[^;&]+' + i + r'[^;&]*)', page) urls += [parse.unquote(url) for url in p_urls] - q_urls = re.findall(r'(https?:\\\\/\\\\/[^;"\']+' + i + r'[^;"\']*)', page) + q_urls = re.findall(r'(https?:\\\\/\\\\/[^ ;"\']+' + i + r'[^ ;"\']*)', page) urls += [url.replace('\\\\/', '/') for url in q_urls] # a link href to an image is often an interesting one From 52e6a7482d99bc5d928f81e27309ff964c17c7a7 Mon Sep 17 00:00:00 2001 From: Justlearnm0re Date: Sun, 24 Jun 2018 17:41:58 +0800 Subject: [PATCH 050/271] fix cid match The old regex is broken, fix it with new one. --- src/you_get/extractors/bilibili.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 9e2b8bc0..523abbdb 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -177,7 +177,9 @@ class Bilibili(VideoExtractor): log.w('This page contains a playlist. (use --playlist to download all videos.)') try: - cid = re.search(r'cid=(\d+)', self.page).group(1) + page_list = json.loads(re.search(r'"pages":(\[.*?\])', self.page).group(1)) + index_id = int(re.search(r'index_(\d+)', self.url).group(1)) + cid = page_list[index_id-1]['cid'] # change cid match rule except: cid = re.search(r'"cid":(\d+)', self.page).group(1) if cid is not None: From 1adb799c869c50ae19c643db9091912dbb68bac1 Mon Sep 17 00:00:00 2001 From: Justsoos Date: Mon, 25 Jun 2018 16:55:13 +0800 Subject: [PATCH 051/271] Update douyutv.py douyu update --- src/you_get/extractors/douyutv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/douyutv.py b/src/you_get/extractors/douyutv.py index 72a41a0a..bdcea458 100644 --- a/src/you_get/extractors/douyutv.py +++ b/src/you_get/extractors/douyutv.py @@ -45,9 +45,9 @@ def douyutv_download(url, output_dir='.', merge=True, info_only=False, **kwargs) douyutv_video_download(url, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs) return - url = re.sub(r'[w.]*douyu.com', 'm.douyu.com', url) + url = re.sub(r'.*douyu.com','https://m.douyu.com/room', url) html = get_content(url, headers) - room_id_patt = r'room_id\s*:\s*(\d+),' + room_id_patt = r'"rid"\s*:\s*(\d+),' room_id = match1(html, room_id_patt) if room_id == "0": room_id = url[url.rfind('/') + 1:] From 4f00ca5b8da7dcaf51a3d5f91e168c53f7efd156 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 27 Jun 2018 22:18:27 +0200 Subject: [PATCH 052/271] [bilibili] warn when target URL is a playlist and --playlist is not used --- src/you_get/extractors/bilibili.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 523abbdb..cd71b071 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -173,7 +173,7 @@ class Bilibili(VideoExtractor): return has_plist = re.search(r'"page":2', self.page) - if has_plist: + if has_plist and not kwargs.get('playlist'): log.w('This page contains a playlist. (use --playlist to download all videos.)') try: @@ -341,6 +341,7 @@ def parse_cid_playurl(xml): def bilibili_download_playlist_by_url(url, **kwargs): url = url_locations([url])[0] + kwargs['playlist'] = True # a bangumi here? possible? if 'live.bilibili' in url: site.download_by_url(url) From 93c08277d20b373d270b488bf1e11d20dda7e17e Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 27 Jun 2018 22:50:10 +0200 Subject: [PATCH 053/271] [bilibili] better subtitle with page no --- src/you_get/extractors/bilibili.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index cd71b071..93749596 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -360,7 +360,7 @@ def bilibili_download_playlist_by_url(url, **kwargs): page_cnt = len(page_list) for no in range(1, page_cnt+1): page_url = 'http://www.bilibili.com/video/av{}/index_{}.html'.format(aid, no) - subtitle = page_list[no-1]['pagename'] + subtitle = '#%s. %s'% (page_list[no-1]['page'], page_list[no-1]['pagename']) Bilibili().download_by_url(page_url, subtitle=subtitle, **kwargs) site = Bilibili() From 1171be87c7dc2d2ae66c1c37d02ae9723fe414e5 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 27 Jun 2018 23:21:42 +0200 Subject: [PATCH 054/271] [baidu] squanch this --- src/you_get/extractors/baidu.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/you_get/extractors/baidu.py b/src/you_get/extractors/baidu.py index 6f558e31..d0146217 100644 --- a/src/you_get/extractors/baidu.py +++ b/src/you_get/extractors/baidu.py @@ -129,6 +129,15 @@ def baidu_download(url, output_dir='.', stream_type=None, merge=True, info_only= html = get_html(url) title = r1(r'title:"([^"]+)"', html) + vhsrc = re.findall(r'vhsrc="([^"]+)"', html) + if vhsrc is not None: + ext = 'mp4' + size = url_size(vhsrc[0]) + print_info(site_info, title, ext, size) + if not info_only: + download_urls(vhsrc, title, ext, size, + output_dir=output_dir, merge=False) + items = re.findall( r'//imgsrc.baidu.com/forum/w[^"]+/([^/"]+)', html) urls = ['http://imgsrc.baidu.com/forum/pic/item/' + i From 18af8f36e2e9a12761ae582527818b8af3c3e891 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 27 Jun 2018 23:35:15 +0200 Subject: [PATCH 055/271] version 0.4.1099 --- src/you_get/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/version.py b/src/you_get/version.py index 64ef890f..9f3287d9 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1077' +__version__ = '0.4.1099' From 4a3f1b5bd2c87d43667730578df12ac121d11322 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 29 Jun 2018 15:04:42 +0200 Subject: [PATCH 056/271] [miaopai] squanch that --- src/you_get/extractors/yixia.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/you_get/extractors/yixia.py b/src/you_get/extractors/yixia.py index ccaaf546..ff45730d 100644 --- a/src/you_get/extractors/yixia.py +++ b/src/you_get/extractors/yixia.py @@ -51,10 +51,10 @@ def yixia_download(url, output_dir = '.', merge = True, info_only = False, **kwa yixia_download_by_scid = yixia_miaopai_download_by_scid site_info = "Yixia Miaopai" - scid = match1(url, r'miaopai\.com/show/channel/(.+)\.htm') or \ - match1(url, r'miaopai\.com/show/(.+)\.htm') or \ - match1(url, r'm\.miaopai\.com/show/channel/(.+)\.htm') or \ - match1(url, r'm\.miaopai\.com/show/channel/(.+)') + scid = match1(url, r'miaopai\.com/show/channel/([^.]+)\.htm') or \ + match1(url, r'miaopai\.com/show/([^.]+)\.htm') or \ + match1(url, r'm\.miaopai\.com/show/channel/([^.]+)\.htm') or \ + match1(url, r'm\.miaopai\.com/show/channel/([^.]+)') elif 'xiaokaxiu.com' in hostname: #Xiaokaxiu yixia_download_by_scid = yixia_xiaokaxiu_download_by_scid From 503ff846f77225ee373f58c016da6428d8d5a2b7 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 1 Jul 2018 13:23:48 +0200 Subject: [PATCH 057/271] [common] do not coerce headers into a dict --- src/you_get/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 30e533f1..73192e61 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -505,7 +505,7 @@ def get_head(url, headers={}, get_method='HEAD'): req = request.Request(url) req.get_method = lambda: get_method res = urlopen_with_retry(req) - return dict(res.headers) + return res.headers def url_info(url, faker=False, headers={}): From 37e2a798972d2ad7d73abfc86ab7c54a2f42a077 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 1 Jul 2018 13:48:22 +0200 Subject: [PATCH 058/271] [common] wubba lubba dub dub --- src/you_get/common.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 73192e61..f4c37f71 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -1594,15 +1594,8 @@ def url_to_module(url): url ) else: - import http.client - video_host = r1(r'https?://([^/]+)/', url) # .cn could be removed - if url.startswith('https://'): - conn = http.client.HTTPSConnection(video_host) - else: - conn = http.client.HTTPConnection(video_host) - conn.request('HEAD', video_url, headers=fake_headers) - res = conn.getresponse() - location = res.getheader('location') + location = get_location(url) + if location and location != url and not location.startswith('/'): return url_to_module(location) else: From 3e8927959836b96982a06fe922a0946cfca52ed2 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 1 Jul 2018 15:47:54 +0200 Subject: [PATCH 059/271] [common] make get_location accept headers --- src/you_get/common.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index f4c37f71..c8b0f80b 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -369,13 +369,16 @@ def get_decoded_html(url, faker=False): return data -def get_location(url): +def get_location(url, headers=None, get_method='HEAD'): logging.debug('get_location: %s' % url) - response = request.urlopen(url) - # urllib will follow redirections and it's too much code to tell urllib - # not to do that - return response.geturl() + if headers: + req = request.Request(url, headers=headers) + else: + req = request.Request(url) + req.get_method = lambda: get_method + res = urlopen_with_retry(req) + return res.geturl() def urlopen_with_retry(*args, **kwargs): @@ -1594,7 +1597,10 @@ def url_to_module(url): url ) else: - location = get_location(url) + try: + location = get_location(url) # t.co isn't happy with fake_headers + except: + location = get_location(url, headers=fake_headers) if location and location != url and not location.startswith('/'): return url_to_module(location) From d503237508167e761f835045d36cdec0a928d31f Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 1 Jul 2018 15:48:22 +0200 Subject: [PATCH 060/271] [common] dumb --- src/you_get/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index c8b0f80b..97bc93a5 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -499,7 +499,7 @@ def urls_size(urls, faker=False, headers={}): return sum([url_size(url, faker=faker, headers=headers) for url in urls]) -def get_head(url, headers={}, get_method='HEAD'): +def get_head(url, headers=None, get_method='HEAD'): logging.debug('get_head: %s' % url) if headers: From 9ab4bfbf3e4ef99bd67c2bcba4eaa4398edc1cbd Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 1 Jul 2018 15:50:08 +0200 Subject: [PATCH 061/271] [common] update UA --- src/you_get/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 97bc93a5..b19d602f 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -143,7 +143,7 @@ fake_headers = { 'Accept-Charset': 'UTF-8,*;q=0.5', 'Accept-Encoding': 'gzip,deflate,sdch', 'Accept-Language': 'en-US,en;q=0.8', - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0', # noqa + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:60.0) Gecko/20100101 Firefox/60.0', # noqa } if sys.stdout.isatty(): From e635628639cc6f709dbc84dd8e45c3bdcef7758a Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 3 Jul 2018 14:49:51 +0200 Subject: [PATCH 062/271] [instagram] no more ? --- src/you_get/extractors/instagram.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/you_get/extractors/instagram.py b/src/you_get/extractors/instagram.py index 332d9b61..65fc01f5 100755 --- a/src/you_get/extractors/instagram.py +++ b/src/you_get/extractors/instagram.py @@ -29,6 +29,7 @@ def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwarg image_url = edge['node']['display_url'] if 'video_url' in edge['node']: image_url = edge['node']['video_url'] + image_url = image_url.split('?')[0] ext = image_url.split('.')[-1] size = int(get_head(image_url)['Content-Length']) print_info(site_info, title, ext, size) @@ -43,6 +44,7 @@ def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwarg image_url = info['entry_data']['PostPage'][0]['graphql']['shortcode_media']['display_url'] if 'video_url' in info['entry_data']['PostPage'][0]['graphql']['shortcode_media']: image_url =info['entry_data']['PostPage'][0]['graphql']['shortcode_media']['video_url'] + image_url = image_url.split('?')[0] ext = image_url.split('.')[-1] size = int(get_head(image_url)['Content-Length']) print_info(site_info, title, ext, size) From 196e94bdfff870e795b6aab5618c33166378fe4a Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 12 Jul 2018 01:03:01 +0200 Subject: [PATCH 063/271] [baidu] because the fleeb has all of the fleeb juice --- src/you_get/extractors/baidu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/baidu.py b/src/you_get/extractors/baidu.py index d0146217..c9d64547 100644 --- a/src/you_get/extractors/baidu.py +++ b/src/you_get/extractors/baidu.py @@ -129,7 +129,7 @@ def baidu_download(url, output_dir='.', stream_type=None, merge=True, info_only= html = get_html(url) title = r1(r'title:"([^"]+)"', html) - vhsrc = re.findall(r'vhsrc="([^"]+)"', html) + vhsrc = re.findall(r'"BDE_Image" src="([^"]+)"', html) if vhsrc is not None: ext = 'mp4' size = url_size(vhsrc[0]) From a07ba1a5dfc8f0a46b957426a1dc528a90c589b8 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 14 Jul 2018 14:54:21 +0200 Subject: [PATCH 064/271] [baidu] it's important that the fleeb is rubbed --- src/you_get/extractors/baidu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/baidu.py b/src/you_get/extractors/baidu.py index c9d64547..1392e7de 100644 --- a/src/you_get/extractors/baidu.py +++ b/src/you_get/extractors/baidu.py @@ -129,7 +129,7 @@ def baidu_download(url, output_dir='.', stream_type=None, merge=True, info_only= html = get_html(url) title = r1(r'title:"([^"]+)"', html) - vhsrc = re.findall(r'"BDE_Image" src="([^"]+)"', html) + vhsrc = re.findall(r'"BDE_Image" src="([^"]+)"', html) or re.findall(r'vhsrc="([^"]+)"', html) if vhsrc is not None: ext = 'mp4' size = url_size(vhsrc[0]) From 50216593e439b6e940e868a9f98c4475ee3636f5 Mon Sep 17 00:00:00 2001 From: Fangzhou Li Date: Mon, 16 Jul 2018 04:22:13 +0800 Subject: [PATCH 065/271] [util] improve compatibility with WSL --- src/you_get/util/fs.py | 8 ++++---- src/you_get/util/os.py | 30 ++++++++++++++++++++++++++++++ tests/test_util.py | 7 ++++--- 3 files changed, 38 insertions(+), 7 deletions(-) create mode 100644 src/you_get/util/os.py diff --git a/src/you_get/util/fs.py b/src/you_get/util/fs.py index d49a117d..b6b7069a 100644 --- a/src/you_get/util/fs.py +++ b/src/you_get/util/fs.py @@ -1,8 +1,8 @@ #!/usr/bin/env python -import platform +from .os import detect_os -def legitimize(text, os=platform.system()): +def legitimize(text, os=detect_os()): """Converts a string to a valid filename. """ @@ -13,7 +13,7 @@ def legitimize(text, os=platform.system()): ord('|'): '-', }) - if os == 'Windows': + if os == 'windows' or os == 'cygwin' or os == 'wsl': # Windows (non-POSIX namespace) text = text.translate({ # Reserved in Windows VFAT and NTFS @@ -31,7 +31,7 @@ def legitimize(text, os=platform.system()): }) else: # *nix - if os == 'Darwin': + if os == 'mac': # Mac OS HFS+ text = text.translate({ ord(':'): '-', diff --git a/src/you_get/util/os.py b/src/you_get/util/os.py new file mode 100644 index 00000000..11730e28 --- /dev/null +++ b/src/you_get/util/os.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python + +from platform import system + +def detect_os(): + """Detect operating system. + """ + + # Inspired by: + # https://github.com/scivision/pybashutils/blob/78b7f2b339cb03b1c37df94015098bbe462f8526/pybashutils/windows_linux_detect.py + + syst = system().lower() + os = 'unknown' + + if 'cygwin' in syst: + os = 'cygwin' + elif 'darwin' in syst: + os = 'mac' + elif 'linux' in syst: + os = 'linux' + # detect WSL https://github.com/Microsoft/BashOnWindows/issues/423 + with open('/proc/version', 'r') as f: + if 'microsoft' in f.read().lower(): + os = 'wsl' + elif 'windows' in syst: + os = 'windows' + elif 'bsd' in syst: + os = 'bsd' + + return os diff --git a/tests/test_util.py b/tests/test_util.py index 239083bc..88743b03 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -6,6 +6,7 @@ from you_get.util.fs import * class TestUtil(unittest.TestCase): def test_legitimize(self): - self.assertEqual(legitimize("1*2", os="Linux"), "1*2") - self.assertEqual(legitimize("1*2", os="Darwin"), "1*2") - self.assertEqual(legitimize("1*2", os="Windows"), "1-2") + self.assertEqual(legitimize("1*2", os="linux"), "1*2") + self.assertEqual(legitimize("1*2", os="mac"), "1*2") + self.assertEqual(legitimize("1*2", os="windows"), "1-2") + self.assertEqual(legitimize("1*2", os="wsl"), "1-2") From ddf67aadb5f90ececd2246c7a6302a66b630eeac Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 17 Jul 2018 17:59:31 +0200 Subject: [PATCH 066/271] [baidu] squanch this, mofo --- src/you_get/extractors/baidu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/baidu.py b/src/you_get/extractors/baidu.py index 1392e7de..b30c9d86 100644 --- a/src/you_get/extractors/baidu.py +++ b/src/you_get/extractors/baidu.py @@ -129,7 +129,7 @@ def baidu_download(url, output_dir='.', stream_type=None, merge=True, info_only= html = get_html(url) title = r1(r'title:"([^"]+)"', html) - vhsrc = re.findall(r'"BDE_Image" src="([^"]+)"', html) or re.findall(r'vhsrc="([^"]+)"', html) + vhsrc = re.findall(r'"BDE_Image"[^>]+src="([^"]+)"', html) or re.findall(r'vhsrc="([^"]+)"', html) if vhsrc is not None: ext = 'mp4' size = url_size(vhsrc[0]) From 80aa34f538f52f60484609405d07fc62377827bc Mon Sep 17 00:00:00 2001 From: ellipse42 Date: Sat, 21 Jul 2018 06:26:19 +0800 Subject: [PATCH 067/271] [miaopai] support not fixed length fid --- src/you_get/extractors/miaopai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/miaopai.py b/src/you_get/extractors/miaopai.py index 6d9a79c7..f37d45b0 100644 --- a/src/you_get/extractors/miaopai.py +++ b/src/you_get/extractors/miaopai.py @@ -31,7 +31,7 @@ def miaopai_download_by_fid(fid, output_dir = '.', merge = False, info_only = Fa #---------------------------------------------------------------------- def miaopai_download(url, output_dir = '.', merge = False, info_only = False, **kwargs): - fid = match1(url, r'\?fid=(\d{4}:\w{32})') + fid = match1(url, r'\?fid=(\d{4}:\w+)') if fid is not None: miaopai_download_by_fid(fid, output_dir, merge, info_only) elif '/p/230444' in url: From feffd883ea30a4b8f6af9cbd3e631489e051919c Mon Sep 17 00:00:00 2001 From: hellsof Date: Sat, 28 Jul 2018 13:51:43 +0800 Subject: [PATCH 068/271] support view.inews.qq.com/a/20180521V0Z9MH00 https://kuaibao.qq.com/s/20180521V0Z9MH00 https://v.qq.com/x/cover/t0fqsm1y83r8v5j/a0026nvw5jr.html --- src/you_get/extractors/qq.py | 51 +++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/src/you_get/extractors/qq.py b/src/you_get/extractors/qq.py index 7f2f4acc..e39bf2e3 100644 --- a/src/you_get/extractors/qq.py +++ b/src/you_get/extractors/qq.py @@ -2,28 +2,25 @@ __all__ = ['qq_download'] -from ..common import * -from ..util.log import * from .qie import download as qieDownload from .qie_video import download_by_url as qie_video_download -from urllib.parse import urlparse,parse_qs +from ..common import * -def qq_download_by_vid(vid, title, default_from, output_dir='.', merge=True, info_only=False): - if default_from: - platform = 11 - else: - # fix return {,"msg":"cannot play outside"} - platform = 4100201 - - info_api = 'http://vv.video.qq.com/getinfo?otype=json&appver=3.2.19.333&platform=11&defnpayver=1&defn=shd&vid={}'.format(vid) - info = get_content(info_api) - video_json = json.loads(match1(info, r'QZOutputJson=(.*)')[:-1]) +def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False): + # http://v.sports.qq.com/#/cover/t0fqsm1y83r8v5j/a0026nvw5jr https://v.qq.com/x/cover/t0fqsm1y83r8v5j/a0026nvw5jr.html + video_json = None + platforms = [4100201, 11] + for platform in platforms: + info_api = 'http://vv.video.qq.com/getinfo?otype=json&appver=3.2.19.333&platform={}&defnpayver=1&defn=shd&vid={}'.format(platform, vid) + info = get_content(info_api) + video_json = json.loads(match1(info, r'QZOutputJson=(.*)')[:-1]) + if not video_json.get('msg')=='cannot play outside': + break fn_pre = video_json['vl']['vi'][0]['lnk'] title = video_json['vl']['vi'][0]['ti'] host = video_json['vl']['vi'][0]['ul']['ui'][0]['url'] - streams = video_json['fl']['fi'] seg_cnt = fc_cnt = video_json['vl']['vi'][0]['cl']['fc'] filename = video_json['vl']['vi'][0]['fn'] @@ -32,8 +29,6 @@ def qq_download_by_vid(vid, title, default_from, output_dir='.', merge=True, inf else: fn_pre, magic_str, video_type = filename.split('.') - best_quality = streams[-1]['name'] - part_urls= [] total_size = 0 for part in range(1, seg_cnt+1): @@ -112,7 +107,6 @@ def kg_qq_download_by_shareid(shareid, output_dir='.', info_only=False, caption= def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs): """""" - default_from = True if re.match(r'https?://egame.qq.com/live\?anchorid=(\d+)', url): from . import qq_egame @@ -136,10 +130,18 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs): content = get_content(url) vids = matchall(content, [r'\?vid=(\w+)']) for vid in vids: - qq_download_by_vid(vid, vid, default_from, output_dir, merge, info_only) + qq_download_by_vid(vid, vid, output_dir, merge, info_only) return - if 'kuaibao.qq.com' in url or re.match(r'http://daxue.qq.com/content/content/id/\d+', url): + if 'kuaibao.qq.com/s/' in url: + # https://kuaibao.qq.com/s/20180521V0Z9MH00 + nid = match1(url, r'/s/([^/&?#]+)') + content = get_content('https://kuaibao.qq.com/getVideoRelate?id=' + nid) + info_json = json.loads(content) + vid=info_json['videoinfo']['vid'] + title=info_json['videoinfo']['title'] + elif 'kuaibao.qq.com' in url or re.match(r'http://daxue.qq.com/content/content/id/\d+', url): + # http://daxue.qq.com/content/content/id/2321 content = get_content(url) vid = match1(content, r'vid\s*=\s*"\s*([^"]+)"') title = match1(content, r'title">([^"]+)

') @@ -148,6 +150,11 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs): vid = match1(url, r'\bvid=(\w+)') # for embedded URLs; don't know what the title is title = vid + elif 'view.inews.qq.com' in url: + # view.inews.qq.com/a/20180521V0Z9MH00 + content = get_content(url) + vid = match1(content, r'"vid":"(\w+)"') + title = match1(content, r'"title":"(\w+)"') else: content = get_content(url) #vid = parse_qs(urlparse(url).query).get('vid') #for links specified vid like http://v.qq.com/cover/p/ps6mnfqyrfo7es3.html?vid=q0181hpdvo5 @@ -167,12 +174,8 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs): title = match1(content, r'"title":"([^"]+)"') if not title else title title = vid if not title else title #general fallback - if 'v.sports.qq.com' in url: - # fix url forbidden - # example:http://v.sports.qq.com/#/cover/t0fqsm1y83r8v5j/a0026nvw5jr - default_from = False - qq_download_by_vid(vid, title, default_from, output_dir, merge, info_only) + qq_download_by_vid(vid, title, output_dir, merge, info_only) site_info = "QQ.com" download = qq_download From 506c5b5bc12f3b3597354cb31212b305227462c5 Mon Sep 17 00:00:00 2001 From: ehds Date: Sun, 29 Jul 2018 14:03:10 +0800 Subject: [PATCH 069/271] upadate kugou api --- src/you_get/extractors/kugou.py | 63 ++++++++++++++++++++++++++------- 1 file changed, 50 insertions(+), 13 deletions(-) diff --git a/src/you_get/extractors/kugou.py b/src/you_get/extractors/kugou.py index 925bdf1c..a98daac6 100644 --- a/src/you_get/extractors/kugou.py +++ b/src/you_get/extractors/kugou.py @@ -20,32 +20,69 @@ def kugou_download(url, output_dir=".", merge=True, info_only=False, **kwargs): print_info(site_info, title, songtype, size) if not info_only: download_urls([url], title, ext, size, output_dir, merge=merge) + elif url.lower().find("hash")!=-1: + return kugou_download_by_hash(url,output_dir,merge,info_only) else: #for the www.kugou.com/ return kugou_download_playlist(url, output_dir=output_dir, merge=merge, info_only=info_only) # raise NotImplementedError(url) -def kugou_download_by_hash(title,hash_val,output_dir = '.', merge = True, info_only = False): + +def kugou_download_by_hash(url,output_dir = '.', merge = True, info_only = False): #sample - #url_sample:http://www.kugou.com/yy/album/single/536957.html - #hash ->key md5(hash+kgcloud")->key decompile swf - #cmd 4 for mp3 cmd 3 for m4a - key=hashlib.new('md5',(hash_val+"kgcloud").encode("utf-8")).hexdigest() - html=get_html("http://trackercdn.kugou.com/i/?pid=6&key=%s&acceptMp3=1&cmd=4&hash=%s"%(key,hash_val)) - j=loads(html) - url=j['url'] + #url_sample:http://www.kugou.com/song/#hash=93F7D2FC6E95424739448218B591AEAF&album_id=9019462 + hash_val = match1(url,'hash=(\w+)') + album_id = match1(url,'album_id=(\d+)') + html = get_html("http://www.kugou.com/yy/index.php?r=play/getdata&hash={}&album_id={}".format(hash_val,album_id)) + j =loads(html) + url = j['data']['play_url'] + title = j['data']['audio_name'] + # some songs cann't play because of copyright protection + if(url == ''): + return songtype, ext, size = url_info(url) print_info(site_info, title, songtype, size) if not info_only: download_urls([url], title, ext, size, output_dir, merge=merge) def kugou_download_playlist(url, output_dir = '.', merge = True, info_only = False, **kwargs): - html=get_html(url) - pattern=re.compile('title="(.*?)".* data="(\w*)\|.*?"') - pairs=pattern.findall(html) - for title,hash_val in pairs: - kugou_download_by_hash(title,hash_val,output_dir,merge,info_only) + urls=[] + + #download music leaderboard + #sample: http://www.kugou.com/yy/html/rank.html + if url.lower().find('rank') !=-1: + html=get_html(url) + pattern = re.compile(' Date: Sat, 4 Aug 2018 10:15:59 +0800 Subject: [PATCH 070/271] [bilibili] add subtitle for downloading single episode --- src/you_get/extractors/bilibili.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 93749596..103c5c95 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -137,9 +137,18 @@ class Bilibili(VideoExtractor): m = re.search(r'property="og:title" content="([^"]+)"', self.page) if m is not None: self.title = m.group(1) + if 'subtitle' in kwargs: subtitle = kwargs['subtitle'] self.title = '{} {}'.format(self.title, subtitle) + else: + m_pages = re.search(r'"pages":(\[[^\]]+])', self.page) + if m_pages is not None: + pages = json.loads(m_pages.group(1)) + if len(pages) > 1: + qs = dict(parse.parse_qsl(urllib.parse.urlparse(self.url).query)) + page = pages[int(qs.get('p', 1)) - 1] + self.title = '{} #{}. {}'.format(self.title, page['page'], page['part']) if 'bangumi.bilibili.com/movie' in self.url: self.movie_entry(**kwargs) From da8c982608c9308765e0960e08fc28cccb74b215 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 8 Aug 2018 16:21:49 +0200 Subject: [PATCH 071/271] [twitter] fix #2609 --- src/you_get/extractors/twitter.py | 50 +++++++++++-------------------- 1 file changed, 17 insertions(+), 33 deletions(-) diff --git a/src/you_get/extractors/twitter.py b/src/you_get/extractors/twitter.py index 9cc3c5c7..8ed400db 100644 --- a/src/you_get/extractors/twitter.py +++ b/src/you_get/extractors/twitter.py @@ -64,42 +64,26 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs) output_dir=output_dir) except: # extract video - # always use i/cards or videos url - if not re.match(r'https?://twitter.com/i/', url): - url = r1(r'\s*', vmap) - item_id = i['tweet_id'] - page_title = "{} [{}]".format(screen_name, item_id) - elif 'scribe_playlist_url' in i: - scribe_playlist_url = i['scribe_playlist_url'] - return vine_download(scribe_playlist_url, output_dir, merge=merge, info_only=info_only) + ga_url = 'https://api.twitter.com/1.1/guest/activate.json' + ga_content = post_content(ga_url, headers={'authorization': authorization}) + guest_token = json.loads(ga_content)['guest_token'] - try: - urls = extract_m3u(source) - except: - urls = [source] + api_url = 'https://api.twitter.com/2/timeline/conversation/%s.json?tweet_mode=extended' % item_id + api_content = get_content(api_url, headers={'authorization': authorization, 'x-guest-token': guest_token}) + + info = json.loads(api_content) + variants = info['globalObjects']['tweets'][item_id]['extended_entities']['media'][0]['video_info']['variants'] + variants = sorted(variants, key=lambda kv: kv.get('bitrate', 0)) + urls = [ variants[-1]['url'] ] size = urls_size(urls) - mime, ext = 'video/mp4', 'mp4' + mime, ext = variants[-1]['content_type'], 'mp4' print_info(site_info, page_title, mime, size) if not info_only: From 711b3621d0a03069ddd8332177685a0ed3bcc9fd Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 8 Aug 2018 16:26:22 +0200 Subject: [PATCH 072/271] version 0.4.1118 --- src/you_get/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/version.py b/src/you_get/version.py index 9f3287d9..93d636e8 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1099' +__version__ = '0.4.1118' From 0ce55a2cf6f1feba51768c57887d952f53580005 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 8 Aug 2018 17:06:41 +0200 Subject: [PATCH 073/271] [bilibili] nah (#2627) --- src/you_get/extractors/bilibili.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 103c5c95..f3115ed2 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -142,9 +142,9 @@ class Bilibili(VideoExtractor): subtitle = kwargs['subtitle'] self.title = '{} {}'.format(self.title, subtitle) else: - m_pages = re.search(r'"pages":(\[[^\]]+])', self.page) - if m_pages is not None: - pages = json.loads(m_pages.group(1)) + playinfo = re.search(r'__INITIAL_STATE__=(.*?);\(function\(\)', self.page) + if playinfo is not None: + pages = json.loads(playinfo.group(1))['videoData']['pages'] if len(pages) > 1: qs = dict(parse.parse_qsl(urllib.parse.urlparse(self.url).query)) page = pages[int(qs.get('p', 1)) - 1] From c6d71ddf102af848741c3f5ca97fa46ff07d8806 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 8 Aug 2018 17:12:11 +0200 Subject: [PATCH 074/271] version 0.4.1120 --- src/you_get/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/version.py b/src/you_get/version.py index 93d636e8..93a759de 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1118' +__version__ = '0.4.1120' From 3a41c3d22c52aa5a08b63fdc1335d6b1d1b0b4fa Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 10 Aug 2018 03:05:14 +0200 Subject: [PATCH 075/271] [processor.ffmpeg] shut the f up --- src/you_get/processor/ffmpeg.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/you_get/processor/ffmpeg.py b/src/you_get/processor/ffmpeg.py index 1e3bd7eb..a18188da 100755 --- a/src/you_get/processor/ffmpeg.py +++ b/src/you_get/processor/ffmpeg.py @@ -22,12 +22,10 @@ def get_usable_ffmpeg(cmd): out, err = p.communicate() vers = str(out, 'utf-8').split('\n')[0].split() assert (vers[0] == 'ffmpeg' and vers[2][0] > '0') or (vers[0] == 'avconv') - #set version to 1.0 for nightly build and print warning try: - version = [int(i) for i in vers[2].split('.')] + v = vers[2][1:] if vers[2][0] == 'n' else vers[2] + version = [int(i) for i in v.split('.')] except: - print('It seems that your ffmpeg is a nightly build.', file=sys.stderr) - print('Please switch to the latest stable if merging failed.', file=sys.stderr) version = [1, 0] return cmd, 'ffprobe', version except: From b554f9f9bcf8f689a2ddf717361a0676f4cbd039 Mon Sep 17 00:00:00 2001 From: "edward.gao" Date: Fri, 10 Aug 2018 17:26:29 +0800 Subject: [PATCH 076/271] Feature: support download bilibili space favorite folder files --- src/you_get/extractors/bilibili.py | 47 +++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 7 deletions(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index f3115ed2..e0ebbac8 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -32,13 +32,13 @@ class Bilibili(VideoExtractor): SEC1 = '94aba54af9065f71de72f5508f1cd42e' SEC2 = '9b288147e5474dd2aa67085f716c560d' stream_types = [ - {'id': 'hdflv'}, - {'id': 'flv720'}, - {'id': 'flv'}, - {'id': 'hdmp4'}, - {'id': 'mp4'}, - {'id': 'live'}, - {'id': 'vc'} + {'id': 'hdflv'}, + {'id': 'flv720'}, + {'id': 'flv'}, + {'id': 'hdmp4'}, + {'id': 'mp4'}, + {'id': 'live'}, + {'id': 'vc'} ] fmt2qlt = dict(hdflv=4, flv=3, hdmp4=2, mp4=1) @@ -348,6 +348,36 @@ def parse_cid_playurl(xml): log.w(e) return [], 0 +def download_video_from_favlist(url, **kwargs): + # the url has format: https://space.bilibili.com/64169458/#/favlist?fid=1840028 + + m = re.search(r'space\.bilibili\.com/(\d+)/.*?fid=(\d+).*?', url) + vmid = "" + favid = "" + if m is not None: + vmid = m.group(1) + favid = m.group(2) + jsonresult = json.loads(get_content("https://api.bilibili.com/x/space/fav/arc?vmid={}&ps=300&fid={}&order=fav_time&tid=0&keyword=&pn=1&jsonp=jsonp".format(vmid, favid))) + print(jsonresult) + # log.wtf("Got files list for vmid" + vmid + " favid:" + favid) + if jsonresult['code'] != 0: + log.wtf("Fail to get the files of page " + jsonresult) + sys.exit(2) + + else: + videos = jsonresult['data']['archives'] + videocount = len(videos) + for i in range(videocount): + videoid = videos[i]["aid"] + videotitle = videos[i]["title"] + videourl = "https://www.bilibili.com/video/av{}".format(videoid) + print("Start downloading ", videotitle, " video ", videotitle) + Bilibili().download_by_url(videourl, subtitle=videotitle, **kwargs) + + else: + log.wtf("Fail to parse the fav title" + url, "") + + def bilibili_download_playlist_by_url(url, **kwargs): url = url_locations([url])[0] kwargs['playlist'] = True @@ -363,6 +393,9 @@ def bilibili_download_playlist_by_url(url, **kwargs): for ep_id in ep_ids: ep_url = '#'.join([base_url, ep_id]) Bilibili().download_by_url(ep_url, **kwargs) + elif 'favlist' in url: + # this a fav list folder + download_video_from_favlist(url, **kwargs) else: aid = re.search(r'av(\d+)', url).group(1) page_list = json.loads(get_content('http://www.bilibili.com/widget/getPageList?aid={}'.format(aid))) From 611cb6acdffac4a1760173eecb14e547a0ce586e Mon Sep 17 00:00:00 2001 From: "edward.gao" Date: Fri, 10 Aug 2018 18:15:10 +0800 Subject: [PATCH 077/271] The bilibili got 502 occasionally --- tests/test.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/tests/test.py b/tests/test.py index 6562d7ca..4a402f1e 100644 --- a/tests/test.py +++ b/tests/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python import unittest +import urllib from you_get.extractors import ( imgur, @@ -32,12 +33,23 @@ class YouGetTests(unittest.TestCase): ) def test_bilibili(self): - bilibili.download( - 'https://www.bilibili.com/video/av16907446/', info_only=True - ) - bilibili.download( - 'https://www.bilibili.com/video/av13228063/', info_only=True - ) + maxRetry = 3 + + # the bilibi is not stable, it got 502 occasionally + for i in range(maxRetry): + try: + bilibili.download( + 'https://www.bilibili.com/video/av16907446/', info_only=True + ) + bilibili.download( + 'https://www.bilibili.com/video/av13228063/', info_only=True + ) + return + except urllib.error.HTTPError as e: + if e.error == 502 and i + 1 < maxRetry: + continue + else: + raise e if __name__ == '__main__': From 93655cf9f4dbca877752a4febe1ba4acc8daf1c1 Mon Sep 17 00:00:00 2001 From: "edward.gao" Date: Fri, 10 Aug 2018 20:35:55 +0800 Subject: [PATCH 078/271] Reduce logging message --- src/you_get/extractors/bilibili.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index e0ebbac8..96fc60c8 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -358,7 +358,7 @@ def download_video_from_favlist(url, **kwargs): vmid = m.group(1) favid = m.group(2) jsonresult = json.loads(get_content("https://api.bilibili.com/x/space/fav/arc?vmid={}&ps=300&fid={}&order=fav_time&tid=0&keyword=&pn=1&jsonp=jsonp".format(vmid, favid))) - print(jsonresult) + # log.wtf("Got files list for vmid" + vmid + " favid:" + favid) if jsonresult['code'] != 0: log.wtf("Fail to get the files of page " + jsonresult) From 170dee568bd5882fff8dfae0ff10f35796fc7b57 Mon Sep 17 00:00:00 2001 From: "edward.gao" Date: Fri, 10 Aug 2018 20:39:25 +0800 Subject: [PATCH 079/271] Remove retry for testing bilibili --- tests/test.py | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/tests/test.py b/tests/test.py index 4a402f1e..6562d7ca 100644 --- a/tests/test.py +++ b/tests/test.py @@ -1,7 +1,6 @@ #!/usr/bin/env python import unittest -import urllib from you_get.extractors import ( imgur, @@ -33,23 +32,12 @@ class YouGetTests(unittest.TestCase): ) def test_bilibili(self): - maxRetry = 3 - - # the bilibi is not stable, it got 502 occasionally - for i in range(maxRetry): - try: - bilibili.download( - 'https://www.bilibili.com/video/av16907446/', info_only=True - ) - bilibili.download( - 'https://www.bilibili.com/video/av13228063/', info_only=True - ) - return - except urllib.error.HTTPError as e: - if e.error == 502 and i + 1 < maxRetry: - continue - else: - raise e + bilibili.download( + 'https://www.bilibili.com/video/av16907446/', info_only=True + ) + bilibili.download( + 'https://www.bilibili.com/video/av13228063/', info_only=True + ) if __name__ == '__main__': From f80a1dd905f7f91d25568b3344b8e32e0230f6ab Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 10 Aug 2018 16:38:37 +0200 Subject: [PATCH 080/271] [youku] I squanch your ccode (close #2611) --- src/you_get/extractors/youku.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/youku.py b/src/you_get/extractors/youku.py index bfdb014f..d5186328 100644 --- a/src/you_get/extractors/youku.py +++ b/src/you_get/extractors/youku.py @@ -78,8 +78,8 @@ class Youku(VideoExtractor): self.api_error_code = None self.api_error_msg = None - self.ccode = '0510' - # Found in http://g.alicdn.com/player/ykplayer/0.5.28/youku-player.min.js + self.ccode = '0508' + # Found in http://g.alicdn.com/player/ykplayer/0.5.64/youku-player.min.js # grep -oE '"[0-9a-zA-Z+/=]{256}"' youku-player.min.js self.ckey = 'DIl58SLFxFNndSV1GFNnMQVYkx1PP5tKe1siZu/86PR1u/Wh1Ptd+WOZsHHWxysSfAOhNJpdVWsdVJNsfJ8Sxd8WKVvNfAS8aS8fAOzYARzPyPc3JvtnPHjTdKfESTdnuTW6ZPvk2pNDh4uFzotgdMEFkzQ5wZVXl2Pf1/Y6hLK0OnCNxBj3+nb0v72gZ6b0td+WOZsHHWxysSo/0y9D2K42SaB8Y/+aD2K42SaB8Y/+ahU+WOZsHcrxysooUeND' self.utid = None From a0b97e56e5658635b5c5f2f32e8b8c14bd35aeb3 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 10 Aug 2018 16:40:12 +0200 Subject: [PATCH 081/271] version 0.4.1128 --- src/you_get/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/version.py b/src/you_get/version.py index 93a759de..76969dc3 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1120' +__version__ = '0.4.1128' From bda3b940f42bfec967a00317537d4d92870a66d0 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 10 Aug 2018 16:56:51 +0200 Subject: [PATCH 082/271] [tests] screw it --- tests/test.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/tests/test.py b/tests/test.py index 6562d7ca..047cdb0f 100644 --- a/tests/test.py +++ b/tests/test.py @@ -31,14 +31,5 @@ class YouGetTests(unittest.TestCase): info_only=True ) - def test_bilibili(self): - bilibili.download( - 'https://www.bilibili.com/video/av16907446/', info_only=True - ) - bilibili.download( - 'https://www.bilibili.com/video/av13228063/', info_only=True - ) - - if __name__ == '__main__': unittest.main() From 935a4233cda2b506859f51137ab5eed28ddff77c Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 15 Aug 2018 13:19:25 +0200 Subject: [PATCH 083/271] [youku] get schwifty --- src/you_get/extractors/youku.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/youku.py b/src/you_get/extractors/youku.py index d5186328..d7c79385 100644 --- a/src/you_get/extractors/youku.py +++ b/src/you_get/extractors/youku.py @@ -78,7 +78,7 @@ class Youku(VideoExtractor): self.api_error_code = None self.api_error_msg = None - self.ccode = '0508' + self.ccode = '0511' # Found in http://g.alicdn.com/player/ykplayer/0.5.64/youku-player.min.js # grep -oE '"[0-9a-zA-Z+/=]{256}"' youku-player.min.js self.ckey = 'DIl58SLFxFNndSV1GFNnMQVYkx1PP5tKe1siZu/86PR1u/Wh1Ptd+WOZsHHWxysSfAOhNJpdVWsdVJNsfJ8Sxd8WKVvNfAS8aS8fAOzYARzPyPc3JvtnPHjTdKfESTdnuTW6ZPvk2pNDh4uFzotgdMEFkzQ5wZVXl2Pf1/Y6hLK0OnCNxBj3+nb0v72gZ6b0td+WOZsHHWxysSo/0y9D2K42SaB8Y/+aD2K42SaB8Y/+ahU+WOZsHcrxysooUeND' From 28e1f6cc15568004f8977ab1cb0f3c171cabeeaf Mon Sep 17 00:00:00 2001 From: Mao Chang <1702190+moaix@users.noreply.github.com> Date: Wed, 15 Aug 2018 22:58:17 +0800 Subject: [PATCH 084/271] fix lizhi --- src/you_get/extractors/lizhi.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/lizhi.py b/src/you_get/extractors/lizhi.py index 65988a9f..4991df31 100644 --- a/src/you_get/extractors/lizhi.py +++ b/src/you_get/extractors/lizhi.py @@ -2,8 +2,17 @@ __all__ = ['lizhi_download'] import json +import datetime from ..common import * +# +# Worked well but not perfect. +# TODO: add option --format={sd|hd} +# +def get_url(ep): + readable = datetime.datetime.fromtimestamp(int(ep['create_time']) / 1000).strftime('%Y/%m/%d') + return 'http://cdn5.lizhi.fm/audio/{}/{}_hd.mp3'.format(readable, ep['id']) + # radio_id: e.g. 549759 from http://www.lizhi.fm/549759/ # # Returns a list of tuples (audio_id, title, url) for each episode @@ -23,7 +32,7 @@ def lizhi_extract_playlist_info(radio_id): # (au_cnt), then handle pagination properly. api_url = 'http://www.lizhi.fm/api/radio_audios?s=0&l=65535&band=%s' % radio_id api_response = json.loads(get_content(api_url)) - return [(ep['id'], ep['name'], ep['url']) for ep in api_response] + return [(ep['id'], ep['name'], get_url(ep)) for ep in api_response] def lizhi_download_audio(audio_id, title, url, output_dir='.', info_only=False): filetype, ext, size = url_info(url) From 82db2fe8f07e65616d3aff6faf5ec7b61d430534 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 15 Aug 2018 21:42:40 +0200 Subject: [PATCH 085/271] [baidu] you got this --- src/you_get/extractors/baidu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/baidu.py b/src/you_get/extractors/baidu.py index b30c9d86..65e62098 100644 --- a/src/you_get/extractors/baidu.py +++ b/src/you_get/extractors/baidu.py @@ -130,7 +130,7 @@ def baidu_download(url, output_dir='.', stream_type=None, merge=True, info_only= title = r1(r'title:"([^"]+)"', html) vhsrc = re.findall(r'"BDE_Image"[^>]+src="([^"]+)"', html) or re.findall(r'vhsrc="([^"]+)"', html) - if vhsrc is not None: + if len(vhsrc) > 0: ext = 'mp4' size = url_size(vhsrc[0]) print_info(site_info, title, ext, size) From e36404cf2243d8de52062d834f0676a0f95966a7 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 25 Aug 2018 15:41:07 +0200 Subject: [PATCH 086/271] [youku] fire in the hole! --- src/you_get/extractors/youku.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/youku.py b/src/you_get/extractors/youku.py index d7c79385..f2e67336 100644 --- a/src/you_get/extractors/youku.py +++ b/src/you_get/extractors/youku.py @@ -78,7 +78,7 @@ class Youku(VideoExtractor): self.api_error_code = None self.api_error_msg = None - self.ccode = '0511' + self.ccode = '0515' # Found in http://g.alicdn.com/player/ykplayer/0.5.64/youku-player.min.js # grep -oE '"[0-9a-zA-Z+/=]{256}"' youku-player.min.js self.ckey = 'DIl58SLFxFNndSV1GFNnMQVYkx1PP5tKe1siZu/86PR1u/Wh1Ptd+WOZsHHWxysSfAOhNJpdVWsdVJNsfJ8Sxd8WKVvNfAS8aS8fAOzYARzPyPc3JvtnPHjTdKfESTdnuTW6ZPvk2pNDh4uFzotgdMEFkzQ5wZVXl2Pf1/Y6hLK0OnCNxBj3+nb0v72gZ6b0td+WOZsHHWxysSo/0y9D2K42SaB8Y/+aD2K42SaB8Y/+ahU+WOZsHcrxysooUeND' From 2a1bb6978c6e58995e89e055bd3a16042f5c5636 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 27 Aug 2018 14:49:37 +0200 Subject: [PATCH 087/271] [twitter] match correct screen_name and item_id in a conversation --- src/you_get/extractors/twitter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/twitter.py b/src/you_get/extractors/twitter.py index 8ed400db..1c027973 100644 --- a/src/you_get/extractors/twitter.py +++ b/src/you_get/extractors/twitter.py @@ -30,9 +30,9 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs) return html = get_html(url, faker=True) - screen_name = r1(r'data-screen-name="([^"]*)"', html) or \ + screen_name = r1(r'twitter\.com/([^/]+)', url) or r1(r'data-screen-name="([^"]*)"', html) or \ r1(r' Date: Wed, 29 Aug 2018 16:44:13 +0200 Subject: [PATCH 088/271] [[util.os] fix Android termux compatibility (no permission to access /proc) --- src/you_get/util/os.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/you_get/util/os.py b/src/you_get/util/os.py index 11730e28..1a00d2b5 100644 --- a/src/you_get/util/os.py +++ b/src/you_get/util/os.py @@ -19,9 +19,11 @@ def detect_os(): elif 'linux' in syst: os = 'linux' # detect WSL https://github.com/Microsoft/BashOnWindows/issues/423 - with open('/proc/version', 'r') as f: - if 'microsoft' in f.read().lower(): - os = 'wsl' + try: + with open('/proc/version', 'r') as f: + if 'microsoft' in f.read().lower(): + os = 'wsl' + except: pass elif 'windows' in syst: os = 'windows' elif 'bsd' in syst: From 9ba7690cb9b33a21ec3e068d8b652b2c59d12797 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 29 Aug 2018 17:15:47 +0200 Subject: [PATCH 089/271] [bilibili] as you can see --- src/you_get/extractors/bilibili.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 96fc60c8..7234340a 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -144,11 +144,13 @@ class Bilibili(VideoExtractor): else: playinfo = re.search(r'__INITIAL_STATE__=(.*?);\(function\(\)', self.page) if playinfo is not None: - pages = json.loads(playinfo.group(1))['videoData']['pages'] - if len(pages) > 1: - qs = dict(parse.parse_qsl(urllib.parse.urlparse(self.url).query)) - page = pages[int(qs.get('p', 1)) - 1] - self.title = '{} #{}. {}'.format(self.title, page['page'], page['part']) + jsonPlayinfo = json.loads(playinfo.group(1)) + if 'videoData' in jsonPlayinfo: + pages = jsonPlayinfo['videoData']['pages'] + if len(pages) > 1: + qs = dict(parse.parse_qsl(urllib.parse.urlparse(self.url).query)) + page = pages[int(qs.get('p', 1)) - 1] + self.title = '{} #{}. {}'.format(self.title, page['page'], page['part']) if 'bangumi.bilibili.com/movie' in self.url: self.movie_entry(**kwargs) From bd47cb656ec5a7d6ca43ec8664f7c3908b3a8286 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 7 Sep 2018 22:21:24 +0200 Subject: [PATCH 090/271] [youtube] fix for new base.js (close #2641) --- src/you_get/extractors/youtube.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py index 33dc470e..5482f1e4 100644 --- a/src/you_get/extractors/youtube.py +++ b/src/you_get/extractors/youtube.py @@ -37,6 +37,9 @@ class YouTube(VideoExtractor): ] def decipher(js, s): + # Examples: + # - https://www.youtube.com/yts/jsbin/player-da_DK-vflWlK-zq/base.js + # - https://www.youtube.com/yts/jsbin/player-vflvABTsY/da_DK/base.js def tr_js(code): code = re.sub(r'function', r'def', code) code = re.sub(r'(\W)(as|if|in|is|or)\(', r'\1_\2(', code) @@ -52,7 +55,8 @@ class YouTube(VideoExtractor): return code js = js.replace('\n', ' ') - f1 = match1(js, r'"signature",([$\w]+)\(\w+\.\w+\)') + f1 = match1(js, r'\.set\(\w+\.sp,([$\w]+)\(\w+\.s\)\)') or \ + match1(js, r'"signature",([$\w]+)\(\w+\.\w+\)') f1def = match1(js, r'function %s(\(\w+\)\{[^\{]+\})' % re.escape(f1)) or \ match1(js, r'\W%s=function(\(\w+\)\{[^\{]+\})' % re.escape(f1)) f1def = re.sub(r'([$\w]+\.)([$\w]+\(\w+,\d+\))', r'\2', f1def) From 47b2164b2e21f11de2acbb28303f13f45ddacd6a Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 7 Sep 2018 22:26:51 +0200 Subject: [PATCH 091/271] [youku] +1 --- src/you_get/extractors/youku.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/youku.py b/src/you_get/extractors/youku.py index f2e67336..e86b53b9 100644 --- a/src/you_get/extractors/youku.py +++ b/src/you_get/extractors/youku.py @@ -78,7 +78,7 @@ class Youku(VideoExtractor): self.api_error_code = None self.api_error_msg = None - self.ccode = '0515' + self.ccode = '0516' # Found in http://g.alicdn.com/player/ykplayer/0.5.64/youku-player.min.js # grep -oE '"[0-9a-zA-Z+/=]{256}"' youku-player.min.js self.ckey = 'DIl58SLFxFNndSV1GFNnMQVYkx1PP5tKe1siZu/86PR1u/Wh1Ptd+WOZsHHWxysSfAOhNJpdVWsdVJNsfJ8Sxd8WKVvNfAS8aS8fAOzYARzPyPc3JvtnPHjTdKfESTdnuTW6ZPvk2pNDh4uFzotgdMEFkzQ5wZVXl2Pf1/Y6hLK0OnCNxBj3+nb0v72gZ6b0td+WOZsHHWxysSo/0y9D2K42SaB8Y/+aD2K42SaB8Y/+ahU+WOZsHcrxysooUeND' From 932fc7a7b5a70e466a11cef0df92aa18aca9d18f Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 8 Sep 2018 00:46:15 +0200 Subject: [PATCH 092/271] [baidu] as you can see --- src/you_get/extractors/baidu.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/baidu.py b/src/you_get/extractors/baidu.py index 65e62098..a8cb3d5d 100644 --- a/src/you_get/extractors/baidu.py +++ b/src/you_get/extractors/baidu.py @@ -129,7 +129,8 @@ def baidu_download(url, output_dir='.', stream_type=None, merge=True, info_only= html = get_html(url) title = r1(r'title:"([^"]+)"', html) - vhsrc = re.findall(r'"BDE_Image"[^>]+src="([^"]+)"', html) or re.findall(r'vhsrc="([^"]+)"', html) + vhsrc = re.findall(r'"BDE_Image"[^>]+src="([^"]+\.mp4)"', html) or \ + re.findall(r'vhsrc="([^"]+)"', html) if len(vhsrc) > 0: ext = 'mp4' size = url_size(vhsrc[0]) From a062be55f8eda54f4154870c0dadab4c866cb9cc Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 8 Sep 2018 00:48:10 +0200 Subject: [PATCH 093/271] version 0.4.1140 --- src/you_get/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/version.py b/src/you_get/version.py index 76969dc3..27ed8849 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1128' +__version__ = '0.4.1140' From 2d8bf0e556fc509fc4dbfc0c884e602962ec837c Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 11 Sep 2018 03:08:26 +0200 Subject: [PATCH 094/271] [bilibili] there's a solution you're not seeing (close #2642) --- src/you_get/extractors/bilibili.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 7234340a..ceeba3ef 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -115,7 +115,7 @@ class Bilibili(VideoExtractor): self.url = 'http://www.bilibili.com/video/av{}/'.format(aid) self.ua = fake_headers['User-Agent'] - self.url = url_locations([self.url])[0] + self.url = url_locations([self.url], faker=True)[0] frag = urllib.parse.urlparse(self.url).fragment # http://www.bilibili.com/video/av3141144/index_2.html#page=3 if frag: @@ -125,7 +125,7 @@ class Bilibili(VideoExtractor): aid = re.search(r'av(\d+)', self.url).group(1) self.url = 'http://www.bilibili.com/video/av{}/index_{}.html'.format(aid, page) self.referer = self.url - self.page = get_content(self.url) + self.page = get_content(self.url, headers=fake_headers) m = re.search(r'(.*?)

', self.page) or re.search(r'

', self.page) if m is not None: From 3e6387e51c14b9ef6dd437367723dbb8919812ef Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 11 Sep 2018 03:14:53 +0200 Subject: [PATCH 095/271] [bilibili] duang duang --- src/you_get/extractors/bilibili.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index ceeba3ef..94eed2ea 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -381,7 +381,7 @@ def download_video_from_favlist(url, **kwargs): def bilibili_download_playlist_by_url(url, **kwargs): - url = url_locations([url])[0] + url = url_locations([url], faker=True)[0] kwargs['playlist'] = True # a bangumi here? possible? if 'live.bilibili' in url: From 89844858199bfc3b3a3317e686e5982c74949777 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 11 Sep 2018 17:31:47 +0200 Subject: [PATCH 096/271] [youtube] faster than light --- src/you_get/common.py | 137 ++++++++++++++++-------------- src/you_get/extractors/youtube.py | 26 +++++- 2 files changed, 97 insertions(+), 66 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index b19d602f..d212b62b 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -602,7 +602,12 @@ def url_save( # the key must be 'Referer' for the hack here if refer is not None: tmp_headers['Referer'] = refer - file_size = url_size(url, faker=faker, headers=tmp_headers) + if type(url) is list: + file_size = urls_size(url, faker=faker, headers=tmp_headers) + is_chunked, urls = True, url + else: + file_size = url_size(url, faker=faker, headers=tmp_headers) + is_chunked, urls = False, [url] continue_renameing = True while continue_renameing: @@ -655,70 +660,78 @@ def url_save( else: open_mode = 'wb' - if received < file_size: - if faker: - tmp_headers = fake_headers - ''' - if parameter headers passed in, we have it copied as tmp_header - elif headers: - headers = headers - else: - headers = {} - ''' - if received: - tmp_headers['Range'] = 'bytes=' + str(received) + '-' - if refer: - tmp_headers['Referer'] = refer + for url in urls: + received_chunk = 0 + if received < file_size: + if faker: + tmp_headers = fake_headers + ''' + if parameter headers passed in, we have it copied as tmp_header + elif headers: + headers = headers + else: + headers = {} + ''' + if received and not is_chunked: # only request a range when not chunked + tmp_headers['Range'] = 'bytes=' + str(received) + '-' + if refer: + tmp_headers['Referer'] = refer - if timeout: - response = urlopen_with_retry( - request.Request(url, headers=tmp_headers), timeout=timeout - ) - else: - response = urlopen_with_retry( - request.Request(url, headers=tmp_headers) - ) - try: - range_start = int( - response.headers[ - 'content-range' - ][6:].split('/')[0].split('-')[0] - ) - end_length = int( - response.headers['content-range'][6:].split('/')[1] - ) - range_length = end_length - range_start - except: - content_length = response.headers['content-length'] - range_length = int(content_length) if content_length is not None \ - else float('inf') + if timeout: + response = urlopen_with_retry( + request.Request(url, headers=tmp_headers), timeout=timeout + ) + else: + response = urlopen_with_retry( + request.Request(url, headers=tmp_headers) + ) + try: + range_start = int( + response.headers[ + 'content-range' + ][6:].split('/')[0].split('-')[0] + ) + end_length = int( + response.headers['content-range'][6:].split('/')[1] + ) + range_length = end_length - range_start + except: + content_length = response.headers['content-length'] + range_length = int(content_length) if content_length is not None \ + else float('inf') - if file_size != received + range_length: - received = 0 - if bar: - bar.received = 0 - open_mode = 'wb' - - with open(temp_filepath, open_mode) as output: - while True: - buffer = None - try: - buffer = response.read(1024 * 256) - except socket.timeout: - pass - if not buffer: - if received == file_size: # Download finished - break - # Unexpected termination. Retry request - tmp_headers['Range'] = 'bytes=' + str(received) + '-' - response = urlopen_with_retry( - request.Request(url, headers=tmp_headers) - ) - continue - output.write(buffer) - received += len(buffer) + if is_chunked: # always append if chunked + open_mode = 'ab' + elif file_size != received + range_length: # is it ever necessary? + received = 0 if bar: - bar.update_received(len(buffer)) + bar.received = 0 + open_mode = 'wb' + + with open(temp_filepath, open_mode) as output: + while True: + buffer = None + try: + buffer = response.read(1024 * 256) + except socket.timeout: + pass + if not buffer: + if is_chunked and received_chunk == range_length: + break + elif not is_chunked and received == file_size: # Download finished + break + # Unexpected termination. Retry request + if not is_chunked: # when + tmp_headers['Range'] = 'bytes=' + str(received) + '-' + response = urlopen_with_retry( + request.Request(url, headers=tmp_headers) + ) + continue + output.write(buffer) + received += len(buffer) + received_chunk += len(buffer) + if bar: + bar.update_received(len(buffer)) assert received == os.path.getsize(temp_filepath), '%s == %s == %s' % ( received, os.path.getsize(temp_filepath), temp_filepath diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py index 5482f1e4..19864590 100644 --- a/src/you_get/extractors/youtube.py +++ b/src/you_get/extractors/youtube.py @@ -81,6 +81,16 @@ class YouTube(VideoExtractor): exec(code, globals(), locals()) return locals()['sig'] + def chunk_by_range(url, size): + urls = [] + chunk_size = 10485760 + start, end = 0, chunk_size - 1 + urls.append('%s&range=%s-%s' % (url, start, end)) + while end + 1 < size: # processed size < expected size + start, end = end + 1, end + chunk_size + urls.append('%s&range=%s-%s' % (url, start, end)) + return urls + def get_url_from_vid(vid): return 'https://youtu.be/{}'.format(vid) @@ -290,13 +300,15 @@ class YouTube(VideoExtractor): if not dash_size: try: dash_size = url_size(dash_url) except: continue + dash_urls = self.__class__.chunk_by_range(dash_url, int(dash_size)) + dash_mp4_a_urls = self.__class__.chunk_by_range(dash_mp4_a_url, int(dash_mp4_a_size)) self.dash_streams[itag] = { 'quality': '%sx%s' % (w, h), 'itag': itag, 'type': mimeType, 'mime': mimeType, 'container': 'mp4', - 'src': [dash_url, dash_mp4_a_url], + 'src': [dash_urls, dash_mp4_a_urls], 'size': int(dash_size) + int(dash_mp4_a_size) } elif mimeType == 'video/webm': @@ -310,13 +322,15 @@ class YouTube(VideoExtractor): if not dash_size: try: dash_size = url_size(dash_url) except: continue + dash_urls = self.__class__.chunk_by_range(dash_url, int(dash_size)) + dash_webm_a_urls = self.__class__.chunk_by_range(dash_webm_a_url, int(dash_webm_a_size)) self.dash_streams[itag] = { 'quality': '%sx%s' % (w, h), 'itag': itag, 'type': mimeType, 'mime': mimeType, 'container': 'webm', - 'src': [dash_url, dash_webm_a_url], + 'src': [dash_urls, dash_webm_a_urls], 'size': int(dash_size) + int(dash_webm_a_size) } except: @@ -353,13 +367,15 @@ class YouTube(VideoExtractor): dash_url += '&signature={}'.format(sig) dash_size = stream['clen'] itag = stream['itag'] + dash_urls = self.__class__.chunk_by_range(dash_url, int(dash_size)) + dash_mp4_a_urls = self.__class__.chunk_by_range(dash_mp4_a_url, int(dash_mp4_a_size)) self.dash_streams[itag] = { 'quality': stream['size'], 'itag': itag, 'type': mimeType, 'mime': mimeType, 'container': 'mp4', - 'src': [dash_url, dash_mp4_a_url], + 'src': [dash_urls, dash_mp4_a_urls], 'size': int(dash_size) + int(dash_mp4_a_size) } elif stream['type'].startswith('video/webm'): @@ -378,13 +394,15 @@ class YouTube(VideoExtractor): except UnboundLocalError as e: audio_url = dash_mp4_a_url audio_size = int(dash_mp4_a_size) + dash_urls = self.__class__.chunk_by_range(dash_url, int(dash_size)) + audio_urls = self.__class__.chunk_by_range(audio_url, int(audio_size)) self.dash_streams[itag] = { 'quality': stream['size'], 'itag': itag, 'type': mimeType, 'mime': mimeType, 'container': 'webm', - 'src': [dash_url, audio_url], + 'src': [dash_urls, audio_urls], 'size': int(dash_size) + int(audio_size) } From f8c39fbe4cbe83d8c1f316d3c221808dbfc22931 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 11 Sep 2018 23:18:39 +0200 Subject: [PATCH 097/271] [common] post_content: allow post_data_raw --- src/you_get/common.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index d212b62b..5ce52990 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -439,7 +439,7 @@ def get_content(url, headers={}, decoded=True): return data -def post_content(url, headers={}, post_data={}, decoded=True): +def post_content(url, headers={}, post_data={}, decoded=True, **kwargs): """Post the content of a URL via sending a HTTP POST request. Args: @@ -457,7 +457,10 @@ def post_content(url, headers={}, post_data={}, decoded=True): if cookies: cookies.add_cookie_header(req) req.headers.update(req.unredirected_hdrs) - post_data_enc = bytes(parse.urlencode(post_data), 'utf-8') + if kwargs.get('post_data_raw'): + post_data_enc = bytes(kwargs['post_data_raw'], 'utf-8') + else: + post_data_enc = bytes(parse.urlencode(post_data), 'utf-8') response = urlopen_with_retry(req, data=post_data_enc) data = response.read() From f3cb2512a32f5fd14e91f0cded96cb5677a1b7fa Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 11 Sep 2018 23:23:50 +0200 Subject: [PATCH 098/271] [tumblr] take my consent --- src/you_get/extractors/tumblr.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/tumblr.py b/src/you_get/extractors/tumblr.py index fe4973be..f01c3352 100644 --- a/src/you_get/extractors/tumblr.py +++ b/src/you_get/extractors/tumblr.py @@ -13,7 +13,29 @@ def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs): universal_download(url, output_dir, merge=merge, info_only=info_only) return - html = parse.unquote(get_html(url)).replace('\/', '/') + import ssl + ssl_context = request.HTTPSHandler(context=ssl.SSLContext(ssl.PROTOCOL_TLSv1)) + cookie_handler = request.HTTPCookieProcessor() + opener = request.build_opener(ssl_context, cookie_handler) + request.install_opener(opener) + + page = get_html(url) + form_key = match1(page, r'id="tumblr_form_key" content="([^"]+)"') + if form_key is not None: + # bypass GDPR consent page + referer = 'https://www.tumblr.com/privacy/consent?redirect=%s' % parse.quote_plus(url) + post_content('https://www.tumblr.com/svc/privacy/consent', + headers={ + 'Content-Type': 'application/json', + 'User-Agent': fake_headers['User-Agent'], + 'Referer': referer, + 'X-tumblr-form-key': form_key, + 'X-Requested-With': 'XMLHttpRequest' + }, + post_data_raw='{"eu_resident":true,"gdpr_is_acceptable_age":true,"gdpr_consent_core":true,"gdpr_consent_first_party_ads":true,"gdpr_consent_third_party_ads":true,"gdpr_consent_search_history":true,"redirect_to":"%s","gdpr_reconsent":false}' % url) + page = get_html(url) + + html = parse.unquote(page).replace('\/', '/') feed = r1(r'', html) if feed in ['photo', 'photoset', 'entry'] or feed is None: From cc69f0945aaaff6535af020d84effa7d3c89ffab Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 11 Sep 2018 23:44:51 +0200 Subject: [PATCH 099/271] [universal] let Pinterest go suck a lemon --- src/you_get/extractors/universal.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/you_get/extractors/universal.py b/src/you_get/extractors/universal.py index 57b9b2d1..43272cb8 100644 --- a/src/you_get/extractors/universal.py +++ b/src/you_get/extractors/universal.py @@ -106,6 +106,9 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg title = '%s' % i i += 1 + if r1(r'(https://pinterest.com/pin/)', url): + continue + candies.append({'url': url, 'title': title}) From 5dfee49688645497061841d981c50fe577e4ba32 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 11 Sep 2018 23:49:39 +0200 Subject: [PATCH 100/271] [common] post_content: make log right --- src/you_get/common.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 5ce52990..f40b0220 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -450,8 +450,10 @@ def post_content(url, headers={}, post_data={}, decoded=True, **kwargs): Returns: The content as a string. """ - - logging.debug('post_content: %s \n post_data: %s' % (url, post_data)) + if kwargs.get('post_data_raw'): + logging.debug('post_content: %s\npost_data_raw: %s' % (url, kwargs['post_data_raw'])) + else: + logging.debug('post_content: %s\npost_data: %s' % (url, post_data)) req = request.Request(url, headers=headers) if cookies: From 76e831d443e9ca8e4344ad0ab5130ffc25eb9a73 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 11 Sep 2018 23:51:43 +0200 Subject: [PATCH 101/271] version 0.4.1148 --- src/you_get/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/version.py b/src/you_get/version.py index 27ed8849..4f5b1645 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1140' +__version__ = '0.4.1148' From fcdfce68d29d2f56b552fb3883f41fea7b7bf9de Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 16 Sep 2018 14:18:31 +0200 Subject: [PATCH 102/271] [tumblr] squanch this --- src/you_get/extractors/tumblr.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/you_get/extractors/tumblr.py b/src/you_get/extractors/tumblr.py index f01c3352..bc37fa43 100644 --- a/src/you_get/extractors/tumblr.py +++ b/src/you_get/extractors/tumblr.py @@ -33,7 +33,7 @@ def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 'X-Requested-With': 'XMLHttpRequest' }, post_data_raw='{"eu_resident":true,"gdpr_is_acceptable_age":true,"gdpr_consent_core":true,"gdpr_consent_first_party_ads":true,"gdpr_consent_third_party_ads":true,"gdpr_consent_search_history":true,"redirect_to":"%s","gdpr_reconsent":false}' % url) - page = get_html(url) + page = get_html(url, faker=True) html = parse.unquote(page).replace('\/', '/') feed = r1(r'', html) @@ -43,9 +43,9 @@ def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs): page_title = r1(r'([^<\n]*)', html) - urls = re.findall(r'(https?://[^;"&]+/tumblr_[^;"]+_\d+\.jpg)', html) +\ - re.findall(r'(https?://[^;"&]+/tumblr_[^;"]+_\d+\.png)', html) +\ - re.findall(r'(https?://[^;"&]+/tumblr_[^";]+_\d+\.gif)', html) + urls = re.findall(r'(https?://[^;"&]+/tumblr_[^;"&]+_\d+\.jpg)', html) +\ + re.findall(r'(https?://[^;"&]+/tumblr_[^;"&]+_\d+\.png)', html) +\ + re.findall(r'(https?://[^;"&]+/tumblr_[^";&]+_\d+\.gif)', html) tuggles = {} for url in urls: From caabb083f7c78f9170347ed0d4f60330c26b6da9 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 16 Sep 2018 14:34:48 +0200 Subject: [PATCH 103/271] [tumblr] squanch that --- src/you_get/extractors/tumblr.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/tumblr.py b/src/you_get/extractors/tumblr.py index bc37fa43..d63aee72 100644 --- a/src/you_get/extractors/tumblr.py +++ b/src/you_get/extractors/tumblr.py @@ -121,11 +121,15 @@ def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs): r1(r'', html) or r1(r'([^<\n]*)', html) or url.split("/")[4]).replace('\n', '') - type, ext, size = url_info(real_url) + # this is better + vcode = r1(r'tumblr_(\w+)', real_url) + real_url = 'https://vt.media.tumblr.com/tumblr_%s.mp4' % vcode + + type, ext, size = url_info(real_url, faker=True) print_info(site_info, title, type, size) if not info_only: - download_urls([real_url], title, ext, size, output_dir, merge = merge) + download_urls([real_url], title, ext, size, output_dir, merge=merge) site_info = "Tumblr.com" download = tumblr_download From 1ea4abdb779ce2073accad10fd795a4add418142 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Mon, 17 Sep 2018 13:52:04 +0200 Subject: [PATCH 104/271] [universal] lalalala --- src/you_get/extractors/universal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/universal.py b/src/you_get/extractors/universal.py index 43272cb8..a1ab1536 100644 --- a/src/you_get/extractors/universal.py +++ b/src/you_get/extractors/universal.py @@ -67,9 +67,9 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg urls = [] for i in media_exts: - urls += re.findall(r'(https?://[^ ;"\'\\]+' + i + r'[^ ;"\'\\]*)', page) + urls += re.findall(r'(https?://[^ ;&"\'\\]+' + i + r'[^ ;&"\'\\]*)', page) - p_urls = re.findall(r'(https?%3A%2F%2F[^;&]+' + i + r'[^;&]*)', page) + p_urls = re.findall(r'(https?%3A%2F%2F[^;&"]+' + i + r'[^;&"]*)', page) urls += [parse.unquote(url) for url in p_urls] q_urls = re.findall(r'(https?:\\\\/\\\\/[^ ;"\']+' + i + r'[^ ;"\']*)', page) From fc8df5eb24b0856d4b0a7c99bebb58b901cb40f1 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Mon, 17 Sep 2018 13:52:44 +0200 Subject: [PATCH 105/271] [naver] call universal_download if video extraction fails --- src/you_get/extractors/naver.py | 42 ++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/src/you_get/extractors/naver.py b/src/you_get/extractors/naver.py index d79e5245..b9eef8d7 100644 --- a/src/you_get/extractors/naver.py +++ b/src/you_get/extractors/naver.py @@ -7,31 +7,35 @@ import re from ..util import log from ..common import get_content, download_urls, print_info, playlist_not_supported, url_size +from .universal import * __all__ = ['naver_download_by_url'] -def naver_download_by_url(url, info_only=False, **kwargs): +def naver_download_by_url(url, output_dir='.', merge=True, info_only=False, **kwargs): ep = 'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{}?key={}' page = get_content(url) - og_video_url = re.search(r"<meta\s+property=\"og:video:url\"\s+content='(.+?)'>", page).group(1) - params_dict = urllib.parse.parse_qs(urllib.parse.urlparse(og_video_url).query) - vid = params_dict['vid'][0] - key = params_dict['outKey'][0] - meta_str = get_content(ep.format(vid, key)) - meta_json = json.loads(meta_str) - if 'errorCode' in meta_json: - log.wtf(meta_json['errorCode']) - title = meta_json['meta']['subject'] - videos = meta_json['videos']['list'] - video_list = sorted(videos, key=lambda video: video['encodingOption']['width']) - video_url = video_list[-1]['source'] - # size = video_list[-1]['size'] - # result wrong size - size = url_size(video_url) - print_info(site_info, title, 'mp4', size) - if not info_only: - download_urls([video_url], title, 'mp4', size, **kwargs) + try: + og_video_url = re.search(r"<meta\s+property=\"og:video:url\"\s+content='(.+?)'>", page).group(1) + params_dict = urllib.parse.parse_qs(urllib.parse.urlparse(og_video_url).query) + vid = params_dict['vid'][0] + key = params_dict['outKey'][0] + meta_str = get_content(ep.format(vid, key)) + meta_json = json.loads(meta_str) + if 'errorCode' in meta_json: + log.wtf(meta_json['errorCode']) + title = meta_json['meta']['subject'] + videos = meta_json['videos']['list'] + video_list = sorted(videos, key=lambda video: video['encodingOption']['width']) + video_url = video_list[-1]['source'] + # size = video_list[-1]['size'] + # result wrong size + size = url_size(video_url) + print_info(site_info, title, 'mp4', size) + if not info_only: + download_urls([video_url], title, 'mp4', size, **kwargs) + except: + universal_download(url, output_dir, merge=merge, info_only=info_only, **kwargs) site_info = "naver.com" download = naver_download_by_url From 286a7788d2fc57b4888185eb795ab8ab7e6d5ca6 Mon Sep 17 00:00:00 2001 From: Mateusz Piotrowski <0mp@FreeBSD.org> Date: Wed, 19 Sep 2018 09:38:26 +0200 Subject: [PATCH 106/271] Add installation instructions for FreeBSD --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index f6f8efdc..14500577 100644 --- a/README.md +++ b/README.md @@ -113,6 +113,14 @@ You can install `you-get` easily via: $ brew install you-get ``` +### Option 8: pkg (FreeBSD only) + +You can install `you-get` easily via: + +``` +# pkg install you-get +``` + ### Shell completion Completion definitions for Bash, Fish and Zsh can be found in [`contrib/completion`](https://github.com/soimort/you-get/tree/develop/contrib/completion). Please consult your shell's manual for how to take advantage of them. From 4b7753f2d43701203d82d8826966ef34f2bd29e8 Mon Sep 17 00:00:00 2001 From: mq-liu <mingquan_liu@163.com> Date: Fri, 21 Sep 2018 15:02:22 +0800 Subject: [PATCH 107/271] update sohu.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 原来的下载, 下载特别慢, 还会出错 --- src/you_get/extractors/sohu.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py index 58eb1ac7..a8c81be5 100644 --- a/src/you_get/extractors/sohu.py +++ b/src/you_get/extractors/sohu.py @@ -15,9 +15,9 @@ Changelog: new api ''' -def real_url(host,vid,tvid,new,clipURL,ck): - url = 'http://'+host+'/?prot=9&prod=flash&pt=1&file='+clipURL+'&new='+new +'&key='+ ck+'&vid='+str(vid)+'&uid='+str(int(time.time()*1000))+'&t='+str(random())+'&rb=1' - return json.loads(get_html(url))['url'] +def real_url(fileName,key,ch): + url = "https://data.vod.itc.cn/ip?new=" + fileName + "&num=1&key=" + key + "&ch=" + ch + "&pt=1&pg=2&prod=h5n" + return json.loads(get_html(url))['servers'][0]['url'] def sohu_download(url, output_dir = '.', merge = True, info_only = False, extractor_proxy=None, **kwargs): if re.match(r'http://share.vrs.sohu.com', url): @@ -51,9 +51,8 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac title = data['tvName'] size = sum(data['clipsBytes']) assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) - for new,clip,ck, in zip(data['su'], data['clipsURL'], data['ck']): - clipURL = urlparse(clip).path - urls.append(real_url(host,hqvid,tvid,new,clipURL,ck)) + for fileName,key,ch, in zip(data['su'], data['ck'], data['ch']): + urls.append(real_url(fileName,key,ch)) # assert data['clipsURL'][0].endswith('.mp4') else: @@ -66,9 +65,8 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac title = data['tvName'] size = sum(map(int,data['clipsBytes'])) assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) - for new,clip,ck, in zip(data['su'], data['clipsURL'], data['ck']): - clipURL = urlparse(clip).path - urls.append(real_url(host,vid,tvid,new,clipURL,ck)) + for fileName,key,ch, in zip(data['su'], data['ck'], data['ch']): + urls.append(real_url(fileName,key,ch)) print_info(site_info, title, 'mp4', size) if not info_only: From 251b80962a29aabc314580b8d30887d88fff007c Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Sun, 14 Oct 2018 15:34:49 +0200 Subject: [PATCH 108/271] util.fs: \t not allowed in FAT (close #2646) --- src/you_get/util/fs.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/you_get/util/fs.py b/src/you_get/util/fs.py index b6b7069a..c04a10a7 100644 --- a/src/you_get/util/fs.py +++ b/src/you_get/util/fs.py @@ -13,6 +13,7 @@ def legitimize(text, os=detect_os()): ord('|'): '-', }) + # FIXME: do some filesystem detection if os == 'windows' or os == 'cygwin' or os == 'wsl': # Windows (non-POSIX namespace) text = text.translate({ @@ -28,6 +29,7 @@ def legitimize(text, os=detect_os()): ord('>'): '-', ord('['): '(', ord(']'): ')', + ord('\t'): ' ', }) else: # *nix From f94c8d530df77254e3597dbd16a4ba40b2048e56 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Wed, 17 Oct 2018 22:26:55 +0200 Subject: [PATCH 109/271] util.log: add yes_or_no() --- src/you_get/util/log.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/you_get/util/log.py b/src/you_get/util/log.py index a2c77ab5..67b26b78 100644 --- a/src/you_get/util/log.py +++ b/src/you_get/util/log.py @@ -96,3 +96,9 @@ def wtf(message, exit_code=1): print_log(message, RED, BOLD) if exit_code is not None: sys.exit(exit_code) + +def yes_or_no(message): + ans = str(input('%s (y/N) ' % message)).lower().strip() + if ans == 'y': + return True + return False From 5026436e8a573a3a7656184738dfe6a537936291 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Wed, 17 Oct 2018 22:28:21 +0200 Subject: [PATCH 110/271] common: add proper warning and confirming before overwriting things --- src/you_get/common.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index f40b0220..88e7d8d3 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -622,7 +622,7 @@ def url_save( if not is_part: if bar: bar.done() - print( + log.w( 'Skipping {}: file already exists'.format( tr(os.path.basename(filepath)) ) @@ -648,7 +648,10 @@ def url_save( print('Changing name to %s' % tr(os.path.basename(filepath)), '...') continue_renameing = True continue - print('Overwriting %s' % tr(os.path.basename(filepath)), '...') + if log.yes_or_no('File with this name already exists. Overwrite?'): + log.w('Overwriting %s ...' % tr(os.path.basename(filepath))) + else: + return elif not os.path.exists(os.path.dirname(filepath)): os.mkdir(os.path.dirname(filepath)) @@ -925,7 +928,7 @@ def download_urls( if total_size: if not force and os.path.exists(output_filepath) and not auto_rename\ and os.path.getsize(output_filepath) >= total_size * 0.9: - print('Skipping %s: file already exists' % output_filepath) + log.w('Skipping %s: file already exists' % output_filepath) print() return bar = SimpleProgressBar(total_size, len(urls)) From fabb35a5b982d918e94abe89cd5a63a501b518cb Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Sat, 20 Oct 2018 16:22:14 +0200 Subject: [PATCH 111/271] [sohu] do not zip ch --- src/you_get/extractors/sohu.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py index a8c81be5..607bf44c 100644 --- a/src/you_get/extractors/sohu.py +++ b/src/you_get/extractors/sohu.py @@ -15,7 +15,7 @@ Changelog: new api ''' -def real_url(fileName,key,ch): +def real_url(fileName, key, ch): url = "https://data.vod.itc.cn/ip?new=" + fileName + "&num=1&key=" + key + "&ch=" + ch + "&pt=1&pg=2&prod=h5n" return json.loads(get_html(url))['servers'][0]['url'] @@ -51,8 +51,8 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac title = data['tvName'] size = sum(data['clipsBytes']) assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) - for fileName,key,ch, in zip(data['su'], data['ck'], data['ch']): - urls.append(real_url(fileName,key,ch)) + for fileName, key in zip(data['su'], data['ck']): + urls.append(real_url(fileName, key, data['ch'])) # assert data['clipsURL'][0].endswith('.mp4') else: @@ -65,8 +65,8 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac title = data['tvName'] size = sum(map(int,data['clipsBytes'])) assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) - for fileName,key,ch, in zip(data['su'], data['ck'], data['ch']): - urls.append(real_url(fileName,key,ch)) + for fileName, key in zip(data['su'], data['ck']): + urls.append(real_url(fileName, key, data['ch'])) print_info(site_info, title, 'mp4', size) if not info_only: From 50b66f3151dc63ffb7b7e216056906afff150358 Mon Sep 17 00:00:00 2001 From: kxy000 <kxy000@qq.com> Date: Mon, 22 Oct 2018 23:54:10 +0800 Subject: [PATCH 112/271] Update pptv.py add user agent --- src/you_get/extractors/pptv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/pptv.py b/src/you_get/extractors/pptv.py index 17503c1c..8d95a5a1 100644 --- a/src/you_get/extractors/pptv.py +++ b/src/you_get/extractors/pptv.py @@ -192,14 +192,14 @@ class PPTV(VideoExtractor): if self.url and not self.vid: if not re.match(r'http://v.pptv.com/show/(\w+)\.html', self.url): raise('Unknown url pattern') - page_content = get_content(self.url) + page_content = get_content(self.url,{"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"}) self.vid = match1(page_content, r'webcfg\s*=\s*{"id":\s*(\d+)') if not self.vid: raise('Cannot find id') api_url = 'http://web-play.pptv.com/webplay3-0-{}.xml'.format(self.vid) api_url += '?appplt=flp&appid=pptv.flashplayer.vod&appver=3.4.2.28&type=&version=4' - dom = parseString(get_content(api_url)) + dom = parseString(get_content(api_url,{"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"})) self.title, m_items, m_streams, m_segs = parse_pptv_xml(dom) xml_streams = merge_meta(m_items, m_streams, m_segs) for stream_id in xml_streams: From dc1581869fe819e127bd17da83fccf5fc08d1339 Mon Sep 17 00:00:00 2001 From: beyond <yangbing@gozap.com> Date: Thu, 25 Oct 2018 11:12:36 +0800 Subject: [PATCH 113/271] Update miapai api --- src/you_get/extractors/yixia.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/yixia.py b/src/you_get/extractors/yixia.py index ff45730d..d3d1ef35 100644 --- a/src/you_get/extractors/yixia.py +++ b/src/you_get/extractors/yixia.py @@ -7,6 +7,24 @@ from urllib.parse import urlparse from json import loads import re +#---------------------------------------------------------------------- +def miaopai_download_by_smid(smid, output_dir = '.', merge = True, info_only = False): + """""" + api_endpoint = 'https://n.miaopai.com/api/aj_media/info.json?smid={smid}'.format(smid = smid) + + html = get_content(api_endpoint) + + api_content = loads(html) + + video_url = api_content['data']['meta_data'][0]['play_urls']['l'] + title = api_content['data']['description'] + + type, ext, size = url_info(video_url) + + print_info(site_info, title, type, size) + if not info_only: + download_urls([video_url], title, ext, size, output_dir, merge=merge) + #---------------------------------------------------------------------- def yixia_miaopai_download_by_scid(scid, output_dir = '.', merge = True, info_only = False): """""" @@ -47,7 +65,11 @@ def yixia_xiaokaxiu_download_by_scid(scid, output_dir = '.', merge = True, info_ def yixia_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): """wrapper""" hostname = urlparse(url).hostname - if 'miaopai.com' in hostname: #Miaopai + if 'n.miaopai.com' == hostname: + smid = match1(url, r'n\.miaopai\.com/media/([^.]+)') + miaopai_download_by_smid(smid, output_dir, merge, info_only) + return + elif 'miaopai.com' in hostname: #Miaopai yixia_download_by_scid = yixia_miaopai_download_by_scid site_info = "Yixia Miaopai" From 035294e573b9397bbe2278e1666c54268562e7e0 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Fri, 26 Oct 2018 22:36:24 +0200 Subject: [PATCH 114/271] [bilibili] the production of too many useful things results in --- src/you_get/extractors/bilibili.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 94eed2ea..ed9663c0 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -192,7 +192,12 @@ class Bilibili(VideoExtractor): index_id = int(re.search(r'index_(\d+)', self.url).group(1)) cid = page_list[index_id-1]['cid'] # change cid match rule except: - cid = re.search(r'"cid":(\d+)', self.page).group(1) + page = re.search(r'p=(\d+)', self.url) + if page is None: + p = 1 + else: + p = int(page.group(1)) + cid = re.search(r'"cid":(\d+),"page":%s' % p, self.page).group(1) if cid is not None: self.download_by_vid(cid, re.search('bangumi', self.url) is not None, **kwargs) else: From 389b55b12229ebe114120b6ccd1490446b75fdb4 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Fri, 26 Oct 2018 22:48:04 +0200 Subject: [PATCH 115/271] .travis.yml: skip more of flake8 --- .travis.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 9df327b0..7e772c8c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,9 +8,10 @@ python: - "3.6" - "nightly" - "pypy3" -before_install: pip install flake8 +before_install: + - if [[ $TRAVIS_PYTHON_VERSION != '3.2'* && $TRAVIS_PYTHON_VERSION != '3.3'* ]]; then pip install flake8; fi before_script: - - if [[ $TRAVIS_PYTHON_VERSION != '3.2'* ]]; then flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics; fi + - if [[ $TRAVIS_PYTHON_VERSION != '3.2'* && $TRAVIS_PYTHON_VERSION != '3.3'* ]]; then flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics; fi script: make test sudo: false notifications: From 6d6c219a282c1887483c2a167735f802b8686467 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Sun, 28 Oct 2018 13:33:28 +0100 Subject: [PATCH 116/271] version 0.4.1164 --- src/you_get/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/version.py b/src/you_get/version.py index 4f5b1645..e1a5349d 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1148' +__version__ = '0.4.1164' From 1f70be6aa937a6533b3e990334d4138283949deb Mon Sep 17 00:00:00 2001 From: lc4t <lc4t0.0@gmail.com> Date: Thu, 1 Nov 2018 11:57:21 +0800 Subject: [PATCH 117/271] use new bilibili live api, fix None Content-Type --- src/you_get/common.py | 2 +- src/you_get/extractors/bilibili.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 88e7d8d3..3d04e8a1 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -429,7 +429,7 @@ def get_content(url, headers={}, decoded=True): # Decode the response body if decoded: charset = match1( - response.getheader('Content-Type'), r'charset=([\w-]+)' + response.getheader('Content-Type', ''), r'charset=([\w-]+)' ) if charset is not None: data = data.decode(charset) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index ed9663c0..71cc7fc2 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -22,7 +22,7 @@ from .youku import youku_download_by_vid class Bilibili(VideoExtractor): name = 'Bilibili' - live_api = 'http://live.bilibili.com/api/playurl?cid={}&otype=json' + live_api = 'https://api.live.bilibili.com/room/v1/Room/playUrl?cid={}&quality=0&platform=web' api_url = 'http://interface.bilibili.com/v2/playurl?' bangumi_api_url = 'http://bangumi.bilibili.com/player/web_api/playurl?' live_room_init_api_url = 'https://api.live.bilibili.com/room/v1/Room/room_init?id={}' @@ -233,7 +233,7 @@ class Bilibili(VideoExtractor): api_url = self.live_api.format(self.room_id) json_data = json.loads(get_content(api_url)) - urls = [json_data['durl'][0]['url']] + urls = [json_data['data']['durl'][0]['url']] self.streams['live'] = {} self.streams['live']['src'] = urls From aa221f137817d4b30611dea8860b6c625cc3f5ee Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Wed, 7 Nov 2018 16:49:18 +0100 Subject: [PATCH 118/271] [youtube] whatever this (0,window.encodeURIComponent) thing is (fix #2652) --- src/you_get/extractors/youtube.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py index 19864590..b1a680b9 100644 --- a/src/you_get/extractors/youtube.py +++ b/src/you_get/extractors/youtube.py @@ -40,6 +40,7 @@ class YouTube(VideoExtractor): # Examples: # - https://www.youtube.com/yts/jsbin/player-da_DK-vflWlK-zq/base.js # - https://www.youtube.com/yts/jsbin/player-vflvABTsY/da_DK/base.js + # - https://www.youtube.com/yts/jsbin/player-vfls4aurX/da_DK/base.js def tr_js(code): code = re.sub(r'function', r'def', code) code = re.sub(r'(\W)(as|if|in|is|or)\(', r'\1_\2(', code) @@ -55,7 +56,8 @@ class YouTube(VideoExtractor): return code js = js.replace('\n', ' ') - f1 = match1(js, r'\.set\(\w+\.sp,([$\w]+)\(\w+\.s\)\)') or \ + f1 = match1(js, r'\.set\(\w+\.sp,\(0,window\.encodeURIComponent\)\(([$\w]+)') or \ + match1(js, r'\.set\(\w+\.sp,([$\w]+)\(\w+\.s\)\)') or \ match1(js, r'"signature",([$\w]+)\(\w+\.\w+\)') f1def = match1(js, r'function %s(\(\w+\)\{[^\{]+\})' % re.escape(f1)) or \ match1(js, r'\W%s=function(\(\w+\)\{[^\{]+\})' % re.escape(f1)) From 046dcea5e805ff18473a6ecdd722ddfa437855fd Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Wed, 7 Nov 2018 16:59:58 +0100 Subject: [PATCH 119/271] version 0.4.1167 --- src/you_get/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/version.py b/src/you_get/version.py index e1a5349d..883b7dca 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1164' +__version__ = '0.4.1167' From 7d9ce6b8d01145a4d8215916d2acab29d2d08565 Mon Sep 17 00:00:00 2001 From: Yingdong Yang <storm-yyd@outlook.com> Date: Fri, 9 Nov 2018 13:36:03 +0800 Subject: [PATCH 120/271] fix iwara --- src/you_get/extractors/iwara.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/you_get/extractors/iwara.py b/src/you_get/extractors/iwara.py index 50d14fb8..a30159d7 100644 --- a/src/you_get/extractors/iwara.py +++ b/src/you_get/extractors/iwara.py @@ -17,20 +17,20 @@ headers = { def iwara_download(url, output_dir='.', merge=True, info_only=False, **kwargs): global headers - video_hash=match1(url, r'http://\w+.iwara.tv/videos/(\w+)') - video_url=match1(url, r'(http://\w+.iwara.tv)/videos/\w+') - html = get_content(url,headers=headers) + video_hash = match1(url, r'https?://\w+.iwara.tv/videos/(\w+)') + video_url = match1(url, r'(https?://\w+.iwara.tv)/videos/\w+') + html = get_content(url, headers=headers) title = r1(r'<title>(.*)', html) - api_url=video_url+'/api/video/'+video_hash - content=get_content(api_url,headers=headers) - data=json.loads(content) - type,ext,size=url_info(data[0]['uri'], headers=headers) - down_urls=data[0]['uri'] - print_info(down_urls,title+data[0]['resolution'],type,size) + api_url = video_url + '/api/video/' + video_hash + content = get_content(api_url, headers=headers) + data = json.loads(content) + down_urls = 'https:' + data[0]['uri'] + type, ext, size = url_info(down_urls, headers=headers) + print_info(site_info, title+data[0]['resolution'], type, size) if not info_only: - download_urls([down_urls], title, ext, size, output_dir, merge = merge,headers=headers) + download_urls([down_urls], title, ext, size, output_dir, merge=merge, headers=headers) -site_info = "iwara" +site_info = "Iwara" download = iwara_download download_playlist = playlist_not_supported('iwara') From 64e15159abc326dbe3afb16883e652942361cb5b Mon Sep 17 00:00:00 2001 From: Vcinly Date: Sun, 11 Nov 2018 13:03:29 +0800 Subject: [PATCH 121/271] support download bilibili uploader all videos --- src/you_get/extractors/bilibili.py | 34 ++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 71cc7fc2..053b4d19 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -384,6 +384,38 @@ def download_video_from_favlist(url, **kwargs): else: log.wtf("Fail to parse the fav title" + url, "") +def download_video_from_totallist(url, page, **kwargs): + # the url has format: https://space.bilibili.com/64169458/#/video + m = re.search(r'space\.bilibili\.com/(\d+)/.*?video', url) + mid = "" + if m is not None: + mid = m.group(1) + jsonresult = json.loads(get_content("https://space.bilibili.com/ajax/member/getSubmitVideos?mid={}&pagesize=100&tid=0&page={}&keyword=&order=pubdate&jsonp=jsonp".format(mid, page))) + if jsonresult['status']: + videos = jsonresult['data']['vlist'] + videocount = len(videos) + for i in range(videocount): + videoid = videos[i]["aid"] + videotitle = videos[i]["title"] + videourl = "https://www.bilibili.com/video/av{}".format(videoid) + print("Start downloading ", videotitle, " video ", videotitle) + kwargs["output_dir"] = kwargs["output_dir"] + '/' + str(videoid) + download_cover(videos[i]['pic'], videotitle, **kwargs) + Bilibili().download_by_url(videourl, subtitle=videotitle, **kwargs) + if page <= jsonresult['pages']: + page += 1 + download_video_from_totallist(url, page, **kwargs) + else: + log.wtf("Fail to get the files of page " + jsonresult) + sys.exit(2) + + else: + log.wtf("Fail to parse the fav title" + url, "") + +def download_cover(url, title, **kwargs): + if re.match(r'https?://', url) is None: + url = 'https:' + url + download_urls([url], title, "jpg", 0, kwargs["output_dir"]) def bilibili_download_playlist_by_url(url, **kwargs): url = url_locations([url], faker=True)[0] @@ -403,6 +435,8 @@ def bilibili_download_playlist_by_url(url, **kwargs): elif 'favlist' in url: # this a fav list folder download_video_from_favlist(url, **kwargs) + elif 'video' in url: + download_video_from_totallist(url, 1, **kwargs) else: aid = re.search(r'av(\d+)', url).group(1) page_list = json.loads(get_content('http://www.bilibili.com/widget/getPageList?aid={}'.format(aid))) From 470b74f3aff77168e0d11c4f7caa470bb1cac238 Mon Sep 17 00:00:00 2001 From: Vcinly Date: Sat, 17 Nov 2018 22:44:25 +0800 Subject: [PATCH 122/271] [bilibili] fixed space videos url detect --- src/you_get/extractors/bilibili.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 053b4d19..10077bf8 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -399,10 +399,8 @@ def download_video_from_totallist(url, page, **kwargs): videotitle = videos[i]["title"] videourl = "https://www.bilibili.com/video/av{}".format(videoid) print("Start downloading ", videotitle, " video ", videotitle) - kwargs["output_dir"] = kwargs["output_dir"] + '/' + str(videoid) - download_cover(videos[i]['pic'], videotitle, **kwargs) Bilibili().download_by_url(videourl, subtitle=videotitle, **kwargs) - if page <= jsonresult['pages']: + if page < jsonresult['data']['pages']: page += 1 download_video_from_totallist(url, page, **kwargs) else: @@ -410,12 +408,7 @@ def download_video_from_totallist(url, page, **kwargs): sys.exit(2) else: - log.wtf("Fail to parse the fav title" + url, "") - -def download_cover(url, title, **kwargs): - if re.match(r'https?://', url) is None: - url = 'https:' + url - download_urls([url], title, "jpg", 0, kwargs["output_dir"]) + log.wtf("Fail to parse the video title" + url, "") def bilibili_download_playlist_by_url(url, **kwargs): url = url_locations([url], faker=True)[0] @@ -435,7 +428,7 @@ def bilibili_download_playlist_by_url(url, **kwargs): elif 'favlist' in url: # this a fav list folder download_video_from_favlist(url, **kwargs) - elif 'video' in url: + elif re.match(r'https?://space.bilibili.com/\d+/#/video', url): download_video_from_totallist(url, 1, **kwargs) else: aid = re.search(r'av(\d+)', url).group(1) From 9f68d3c37a5c98e75cd884332f92fd27d6246c82 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 18 Nov 2018 23:50:38 +0100 Subject: [PATCH 123/271] [bilibili] fix bangumi thing --- src/you_get/extractors/bilibili.py | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 10077bf8..9ae54640 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -265,22 +265,9 @@ class Bilibili(VideoExtractor): episode_id = frag else: episode_id = re.search(r'first_ep_id\s*=\s*"(\d+)"', self.page) or re.search(r'\/ep(\d+)', self.url).group(1) - # cont = post_content('http://bangumi.bilibili.com/web_api/get_source', post_data=dict(episode_id=episode_id)) - # cid = json.loads(cont)['result']['cid'] - cont = get_content('http://bangumi.bilibili.com/web_api/episode/{}.json'.format(episode_id)) - ep_info = json.loads(cont)['result']['currentEpisode'] - - bangumi_data = get_bangumi_info(str(ep_info['seasonId'])) - bangumi_payment = bangumi_data.get('payment') - if bangumi_payment and bangumi_payment['price'] != '0': - log.w("It's a paid item") - # ep_ids = collect_bangumi_epids(bangumi_data) - - index_title = ep_info['indexTitle'] - long_title = ep_info['longTitle'].strip() - cid = ep_info['danmaku'] - - self.title = '{} [{} {}]'.format(self.title, index_title, long_title) + data = json.loads(re.search(r'__INITIAL_STATE__=(.+);\(function', self.page).group(1)) + cid = data['epInfo']['cid'] + # index_title = data['epInfo']['index_title'] self.download_by_vid(cid, bangumi=True, **kwargs) From 98d9580dd344b36c65e59652292b63552ddf21cc Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 19 Nov 2018 00:14:04 +0100 Subject: [PATCH 124/271] [common] fix google_search --- src/you_get/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 3d04e8a1..6a239154 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -1575,9 +1575,9 @@ def google_search(url): url = 'https://www.google.com/search?tbm=vid&q=%s' % parse.quote(keywords) page = get_content(url, headers=fake_headers) videos = re.findall( - r'([^<]+)<', page + r'

([^<]+)<', page ) - vdurs = re.findall(r'([^<]+)<', page) + vdurs = re.findall(r'([^<]+)<', page) durs = [r1(r'(\d+:\d+)', unescape_html(dur)) for dur in vdurs] print('Google Videos search:') for v in zip(videos, durs): From e14f21f323c5210ab2f04a0a861d1515c2178092 Mon Sep 17 00:00:00 2001 From: bitdust Date: Wed, 21 Nov 2018 01:56:57 +0800 Subject: [PATCH 125/271] fix bilibili title regex match '' with html attribute --- src/you_get/extractors/bilibili.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 9ae54640..079501c6 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -130,7 +130,7 @@ class Bilibili(VideoExtractor): m = re.search(r'(.*?)

', self.page) or re.search(r'

', self.page) if m is not None: self.title = m.group(1) - s = re.search(r'([^<]+)', m.group(1)) + s = re.search(r'([^<]+)', m.group(1)) if s: self.title = unescape_html(s.group(1)) if self.title is None: From 5946a545751ae8376beec54032ea92e2fc6e710d Mon Sep 17 00:00:00 2001 From: FengLi666 Date: Thu, 22 Nov 2018 13:45:00 +0800 Subject: [PATCH 126/271] fix acfun bangumi page --- src/you_get/extractors/acfun.py | 35 +++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/src/you_get/extractors/acfun.py b/src/you_get/extractors/acfun.py index 4b45c5e9..772132fe 100644 --- a/src/you_get/extractors/acfun.py +++ b/src/you_get/extractors/acfun.py @@ -105,27 +105,42 @@ def acfun_download_by_vid(vid, title, output_dir='.', merge=True, info_only=Fals pass def acfun_download(url, output_dir='.', merge=True, info_only=False, **kwargs): - assert re.match(r'http://[^\.]*\.*acfun\.[^\.]+/\D/\D\D(\d+)', url) - html = get_content(url) + assert re.match(r'http://[^\.]*\.*acfun\.[^\.]+/(\D|bangumi)/\D\D(\d+)', url) - title = r1(r'data-title="([^"]+)"', html) + if re.match(r'http://[^\.]*\.*acfun\.[^\.]+/\D/\D\D(\d+)', url): + html = get_content(url) + title = r1(r'data-title="([^"]+)"', html) + if match1(url, r'_(\d+)$'): # current P + title = title + " " + r1(r'active">([^<]*)', html) + vid = r1('data-vid="(\d+)"', html) + up = r1('data-name="([^"]+)"', html) + # bangumi + elif re.match("http://[^\.]*\.*acfun\.[^\.]+/bangumi/ab(\d+)", url): + html = get_content(url) + title = match1(html, r'"newTitle"\s*:\s*"([^"]+)"') + if match1(url, r'_(\d+)$'): # current P + title = title + " " + r1(r'active">([^<]*)', html) + vid = match1(html, r'videoId="(\d+)"') + up = "acfun" + else: + raise NotImplemented + + assert title and vid title = unescape_html(title) title = escape_file_path(title) - assert title - if match1(url, r'_(\d+)$'): # current P - title = title + " " + r1(r'active">([^<]*)', html) - - vid = r1('data-vid="(\d+)"', html) - up = r1('data-name="([^"]+)"', html) p_title = r1('active">([^<]+)', html) title = '%s (%s)' % (title, up) - if p_title: title = '%s - %s' % (title, p_title) + if p_title: + title = '%s - %s' % (title, p_title) + + acfun_download_by_vid(vid, title, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs) + site_info = "AcFun.tv" download = acfun_download download_playlist = playlist_not_supported('acfun') From ab8a3a2ccff0292f38fa8f229b4cfb8784d6bcd6 Mon Sep 17 00:00:00 2001 From: URenko <18209292+URenko@users.noreply.github.com> Date: Sun, 25 Nov 2018 20:07:52 +0800 Subject: [PATCH 127/271] fix acfun flv support --- src/you_get/extractors/acfun.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/acfun.py b/src/you_get/extractors/acfun.py index 772132fe..200a3f54 100644 --- a/src/you_get/extractors/acfun.py +++ b/src/you_get/extractors/acfun.py @@ -85,9 +85,13 @@ def acfun_download_by_vid(vid, title, output_dir='.', merge=True, info_only=Fals _, _, seg_size = url_info(url) size += seg_size #fallback to flvhd is not quite possible - print_info(site_info, title, 'mp4', size) + if re.search(r'fid=[0-9A-Z\-]*.flv', preferred[0][0]): + ext = 'flv' + else: + ext = 'mp4' + print_info(site_info, title, ext, size) if not info_only: - download_urls(preferred[0], title, 'mp4', size, output_dir=output_dir, merge=merge) + download_urls(preferred[0], title, ext, size, output_dir=output_dir, merge=merge) else: raise NotImplementedError(sourceType) From 1f52bd01ae062c2b51511aa76cd56c939dc0e02d Mon Sep 17 00:00:00 2001 From: astronaut <519537870@qq.com> Date: Mon, 26 Nov 2018 20:57:46 +0800 Subject: [PATCH 128/271] support bilibili audio --- src/you_get/extractor.py | 2 +- src/you_get/extractors/bilibili.py | 78 +++++++++++++++++++++++++++++- 2 files changed, 78 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractor.py b/src/you_get/extractor.py index 4c9ccaa5..8aeed560 100644 --- a/src/you_get/extractor.py +++ b/src/you_get/extractor.py @@ -211,7 +211,7 @@ class VideoExtractor(): ext = self.dash_streams[stream_id]['container'] total_size = self.dash_streams[stream_id]['size'] - if ext == 'm3u8': + if ext == 'm3u8' or ext == 'm4a': ext = 'mp4' if not urls: diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 079501c6..24821d77 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -137,7 +137,6 @@ class Bilibili(VideoExtractor): m = re.search(r'property="og:title" content="([^"]+)"', self.page) if m is not None: self.title = m.group(1) - if 'subtitle' in kwargs: subtitle = kwargs['subtitle'] self.title = '{} {}'.format(self.title, subtitle) @@ -162,6 +161,8 @@ class Bilibili(VideoExtractor): self.live_entry(**kwargs) elif 'vc.bilibili.com' in self.url: self.vc_entry(**kwargs) + elif 'audio/au' in self.url: + self.audio_entry(**kwargs) else: self.entry(**kwargs) @@ -173,6 +174,30 @@ class Bilibili(VideoExtractor): self.title = page_list[0]['pagename'] self.download_by_vid(page_list[0]['cid'], True, bangumi_movie=True, **kwargs) + def audio_entry(self, **kwargs): + assert re.match(r'https?://www.bilibili.com/audio/au\d+', self.url) + patt = r"(\d+)" + audio_id = re.search(patt, self.url).group(1) + audio_info_url = \ + 'https://www.bilibili.com/audio/music-service-c/web/song/info?sid={}'.format(audio_id) + audio_info_response = json.loads(get_content(audio_info_url)) + if audio_info_response['msg'] != 'success': + log.wtf('fetch audio information failed!') + sys.exit(2) + self.title = audio_info_response['data']['title'] + # TODO:there is no quality option for now + audio_download_url = \ + 'https://www.bilibili.com/audio/music-service-c/web/url?sid={}&privilege=2&quality=2'.format(audio_id) + audio_download_response = json.loads(get_content(audio_download_url)) + if audio_download_response['msg'] != 'success': + log.wtf('fetch audio resource failed!') + sys.exit(2) + self.streams['mp4'] = {} + self.streams['mp4']['src'] = [audio_download_response['data']['cdns'][0]] + self.streams['mp4']['container'] = 'm4a' + self.streams['mp4']['size'] = audio_download_response['data']['size'] + + def entry(self, **kwargs): # tencent player tc_flashvars = re.search(r'"bili-cid=\d+&bili-aid=\d+&vid=([^"]+)"', self.page) @@ -370,6 +395,29 @@ def download_video_from_favlist(url, **kwargs): else: log.wtf("Fail to parse the fav title" + url, "") +def download_music_from_favlist(url, page, **kwargs): + m = re.search(r'https?://www.bilibili.com/audio/mycollection/(\d+)', url) + if m is not None: + sid = m.group(1) + json_result = json.loads(get_content("https://www.bilibili.com/audio/music-service-c/web/song/of-coll?" + "sid={}&pn={}&ps=100".format(sid, page))) + if json_result['msg'] == 'success': + music_list = json_result['data']['data'] + music_count = len(music_list) + for i in range(music_count): + audio_id = music_list[i]['id'] + audio_title = music_list[i]['title'] + audio_url = "https://www.bilibili.com/audio/au{}".format(audio_id) + print("Start downloading music ", audio_title) + Bilibili().download_by_url(audio_url, **kwargs) + if page < json_result['data']['pageCount']: + page += 1 + download_music_from_favlist(url, page, **kwargs) + else: + log.wtf("Fail to get music list of page " + json_result) + sys.exit(2) + else: + log.wtf("Fail to parse the sid from " + url, "") def download_video_from_totallist(url, page, **kwargs): # the url has format: https://space.bilibili.com/64169458/#/video @@ -397,6 +445,30 @@ def download_video_from_totallist(url, page, **kwargs): else: log.wtf("Fail to parse the video title" + url, "") +def download_music_from_totallist(url, page, **kwargs): + m = re.search(r'https?://www.bilibili.com/audio/am(\d+)\?type=\d', url) + if m is not None: + sid = m.group(1) + json_result = json.loads(get_content("https://www.bilibili.com/audio/music-service-c/web/song/of-menu?" + "sid={}&pn={}&ps=100".format(sid, page))) + if json_result['msg'] == 'success': + music_list = json_result['data']['data'] + music_count = len(music_list) + for i in range(music_count): + audio_id = music_list[i]['id'] + audio_title = music_list[i]['title'] + audio_url = "https://www.bilibili.com/audio/au{}".format(audio_id) + print("Start downloading music ",audio_title) + Bilibili().download_by_url(audio_url, **kwargs) + if page < json_result['data']['pageCount']: + page += 1 + download_music_from_totallist(url, page, **kwargs) + else: + log.wtf("Fail to get music list of page " + json_result) + sys.exit(2) + else: + log.wtf("Fail to parse the sid from " + url, "") + def bilibili_download_playlist_by_url(url, **kwargs): url = url_locations([url], faker=True)[0] kwargs['playlist'] = True @@ -417,6 +489,10 @@ def bilibili_download_playlist_by_url(url, **kwargs): download_video_from_favlist(url, **kwargs) elif re.match(r'https?://space.bilibili.com/\d+/#/video', url): download_video_from_totallist(url, 1, **kwargs) + elif re.match(r'https://www.bilibili.com/audio/mycollection/\d+', url): + download_music_from_favlist(url, 1, **kwargs) + elif re.match(r'https?://www.bilibili.com/audio/am\d+\?type=\d', url): + download_music_from_totallist(url, 1, **kwargs) else: aid = re.search(r'av(\d+)', url).group(1) page_list = json.loads(get_content('http://www.bilibili.com/widget/getPageList?aid={}'.format(aid))) From 32a2e24785e835a790754eb58f3eaaf024db056a Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 30 Nov 2018 14:12:43 +0100 Subject: [PATCH 129/271] [youku] sometimes naive --- src/you_get/extractors/youku.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/youku.py b/src/you_get/extractors/youku.py index e86b53b9..75a49c70 100644 --- a/src/you_get/extractors/youku.py +++ b/src/you_get/extractors/youku.py @@ -78,7 +78,7 @@ class Youku(VideoExtractor): self.api_error_code = None self.api_error_msg = None - self.ccode = '0516' + self.ccode = '0590' # Found in http://g.alicdn.com/player/ykplayer/0.5.64/youku-player.min.js # grep -oE '"[0-9a-zA-Z+/=]{256}"' youku-player.min.js self.ckey = 'DIl58SLFxFNndSV1GFNnMQVYkx1PP5tKe1siZu/86PR1u/Wh1Ptd+WOZsHHWxysSfAOhNJpdVWsdVJNsfJ8Sxd8WKVvNfAS8aS8fAOzYARzPyPc3JvtnPHjTdKfESTdnuTW6ZPvk2pNDh4uFzotgdMEFkzQ5wZVXl2Pf1/Y6hLK0OnCNxBj3+nb0v72gZ6b0td+WOZsHHWxysSo/0y9D2K42SaB8Y/+aD2K42SaB8Y/+ahU+WOZsHcrxysooUeND' From fdb021371487955318fdec7b94cb48f483c90f76 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 30 Nov 2018 17:36:59 +0100 Subject: [PATCH 130/271] [youtube] don't fail the whole playlist --- src/you_get/extractors/youtube.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py index b1a680b9..bc1bc469 100644 --- a/src/you_get/extractors/youtube.py +++ b/src/you_get/extractors/youtube.py @@ -144,7 +144,10 @@ class YouTube(VideoExtractor): for video in videos: vid = parse_query_param(video, 'v') index = parse_query_param(video, 'index') - self.__class__().download_by_url(self.__class__.get_url_from_vid(vid), index=index, **kwargs) + try: + self.__class__().download_by_url(self.__class__.get_url_from_vid(vid), index=index, **kwargs) + except: + pass def prepare(self, **kwargs): assert self.url or self.vid @@ -160,7 +163,8 @@ class YouTube(VideoExtractor): ytplayer_config = None if 'status' not in video_info: - log.wtf('[Failed] Unknown status.') + log.wtf('[Failed] Unknown status.', exit_code=None) + raise elif video_info['status'] == ['ok']: if 'use_cipher_signature' not in video_info or video_info['use_cipher_signature'] == ['False']: self.title = parse.unquote_plus(video_info['title'][0]) @@ -192,7 +196,8 @@ class YouTube(VideoExtractor): ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+});ytplayer', video_page).group(1)) except: msg = re.search('class="message">([^<]+)<', video_page).group(1) - log.wtf('[Failed] "%s"' % msg.strip()) + log.wtf('[Failed] "%s"' % msg.strip(), exit_code=None) + raise if 'title' in ytplayer_config['args']: # 150 Restricted from playback on certain sites @@ -201,18 +206,22 @@ class YouTube(VideoExtractor): self.html5player = 'https://www.youtube.com' + ytplayer_config['assets']['js'] stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',') else: - log.wtf('[Error] The uploader has not made this video available in your country.') + log.wtf('[Error] The uploader has not made this video available in your country.', exit_code=None) + raise #self.title = re.search(' Date: Fri, 30 Nov 2018 18:29:22 +0100 Subject: [PATCH 131/271] [tiktok] new site support --- src/you_get/common.py | 1 + src/you_get/extractors/__init__.py | 3 ++- src/you_get/extractors/tiktok.py | 21 +++++++++++++++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 src/you_get/extractors/tiktok.py diff --git a/src/you_get/common.py b/src/you_get/common.py index 6a239154..47893910 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -102,6 +102,7 @@ SITES = { 'soundcloud' : 'soundcloud', 'ted' : 'ted', 'theplatform' : 'theplatform', + 'tiktok' : 'tiktok', 'tucao' : 'tucao', 'tudou' : 'tudou', 'tumblr' : 'tumblr', diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index 649a911f..302433c0 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -67,6 +67,7 @@ from .sohu import * from .soundcloud import * from .suntv import * from .theplatform import * +from .tiktok import * from .tucao import * from .tudou import * from .tumblr import * @@ -88,4 +89,4 @@ from .ted import * from .khan import * from .zhanqi import * from .kuaishou import * -from .zhibo import * \ No newline at end of file +from .zhibo import * diff --git a/src/you_get/extractors/tiktok.py b/src/you_get/extractors/tiktok.py new file mode 100644 index 00000000..9718abde --- /dev/null +++ b/src/you_get/extractors/tiktok.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python + +__all__ = ['tiktok_download'] + +from ..common import * + +def tiktok_download(url, output_dir='.', merge=True, info_only=False, **kwargs): + html = get_html(url) + title = r1(r'(.*?)', html) + dataText = r1(r'var data = \[(.*)\] ', html) + data = json.loads(dataText) + source = 'http:' + data['video']['play_addr']['url_list'][0] + mime, ext, size = url_info(source) + + print_info(site_info, title, mime, size) + if not info_only: + download_urls([source], title, ext, size, output_dir, merge=merge) + +site_info = "TikTok.com" +download = tiktok_download +download_playlist = playlist_not_supported('tiktok') From 5fece0bd1cb1e68f19993e024bab968de2778d83 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 30 Nov 2018 18:54:05 +0100 Subject: [PATCH 132/271] [tiktok] more URL patterns --- src/you_get/extractors/tiktok.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/tiktok.py b/src/you_get/extractors/tiktok.py index 9718abde..e9ff63ab 100644 --- a/src/you_get/extractors/tiktok.py +++ b/src/you_get/extractors/tiktok.py @@ -7,7 +7,9 @@ from ..common import * def tiktok_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = get_html(url) title = r1(r'(.*?)', html) - dataText = r1(r'var data = \[(.*)\] ', html) + video_id = r1(r'/video/(\d+)', url) or r1(r'musical\?id=(\d+)', html) + title = '%s [%s]' % (title, video_id) + dataText = r1(r'var data = \[(.*)\] ', html) or r1(r'var data = (\{.*\})', html) data = json.loads(dataText) source = 'http:' + data['video']['play_addr']['url_list'][0] mime, ext, size = url_info(source) From 0e90b9b00053e178eab032909fa8f1af16a55f90 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 30 Nov 2018 21:51:11 +0100 Subject: [PATCH 133/271] version 0.4.1181 --- src/you_get/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/version.py b/src/you_get/version.py index 883b7dca..e0a79a8e 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1167' +__version__ = '0.4.1181' From fe3eeacd543f2850f47cc9cbe8efe425129c3084 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 30 Nov 2018 22:01:26 +0100 Subject: [PATCH 134/271] claim to support Python 3.7 --- you-get.json | 1 + 1 file changed, 1 insertion(+) diff --git a/you-get.json b/you-get.json index 594742c2..56f8212a 100644 --- a/you-get.json +++ b/you-get.json @@ -25,6 +25,7 @@ "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", "Topic :: Internet", "Topic :: Internet :: WWW/HTTP", "Topic :: Multimedia", From 0930e0784e59a049ae82d29f4a44ad8471cbd622 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 30 Nov 2018 22:07:04 +0100 Subject: [PATCH 135/271] update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 14500577..856f6c80 100644 --- a/README.md +++ b/README.md @@ -424,6 +424,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the | 西瓜视频 | |✓| | | | 快手 | |✓|✓| | | 抖音 | |✓| | | +| TikTok | |✓| | | | 中国体育(TV) |
|✓| | | For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page. From 265818d39bcb1afae231ad108f4492b022ca9bbc Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 30 Nov 2018 22:08:38 +0100 Subject: [PATCH 136/271] .travis.yml: add 3.7-dev --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 7e772c8c..c11cbe34 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,6 +6,7 @@ python: - "3.4" - "3.5" - "3.6" + - "3.7-dev" - "nightly" - "pypy3" before_install: From 45f951b9b3267279f53a956454010decbbdef0ae Mon Sep 17 00:00:00 2001 From: FengLi666 Date: Sat, 1 Dec 2018 22:34:49 +0800 Subject: [PATCH 137/271] fix bilibili bangumi page delete out of date regex which causes error --- src/you_get/extractors/bilibili.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 24821d77..5ed7f28d 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -284,12 +284,6 @@ class Bilibili(VideoExtractor): self.streams['vc']['size'] = int(item['video_size']) def bangumi_entry(self, **kwargs): - bangumi_id = re.search(r'(\d+)', self.url).group(1) - frag = urllib.parse.urlparse(self.url).fragment - if frag: - episode_id = frag - else: - episode_id = re.search(r'first_ep_id\s*=\s*"(\d+)"', self.page) or re.search(r'\/ep(\d+)', self.url).group(1) data = json.loads(re.search(r'__INITIAL_STATE__=(.+);\(function', self.page).group(1)) cid = data['epInfo']['cid'] # index_title = data['epInfo']['index_title'] From e37291dfd86a3cb6bf780585ab51cd308bf8de26 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 5 Dec 2018 14:11:59 +0100 Subject: [PATCH 138/271] [instagram] hey ho --- src/you_get/extractors/instagram.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/instagram.py b/src/you_get/extractors/instagram.py index 65fc01f5..6537b606 100755 --- a/src/you_get/extractors/instagram.py +++ b/src/you_get/extractors/instagram.py @@ -29,7 +29,7 @@ def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwarg image_url = edge['node']['display_url'] if 'video_url' in edge['node']: image_url = edge['node']['video_url'] - image_url = image_url.split('?')[0] + image_url = image_url.split('?')[0] # (not here: '?_nc_ht=instagram.fcph1-1.fna.fbcdn.net') ext = image_url.split('.')[-1] size = int(get_head(image_url)['Content-Length']) print_info(site_info, title, ext, size) @@ -44,7 +44,7 @@ def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwarg image_url = info['entry_data']['PostPage'][0]['graphql']['shortcode_media']['display_url'] if 'video_url' in info['entry_data']['PostPage'][0]['graphql']['shortcode_media']: image_url =info['entry_data']['PostPage'][0]['graphql']['shortcode_media']['video_url'] - image_url = image_url.split('?')[0] + image_url = image_url.split('?')[0] + '?_nc_ht=instagram.fcph1-1.fna.fbcdn.net' ext = image_url.split('.')[-1] size = int(get_head(image_url)['Content-Length']) print_info(site_info, title, ext, size) From 926c7b283a640532044731d2b5ba6d700bb81702 Mon Sep 17 00:00:00 2001 From: perror <15058342792@163.com> Date: Thu, 6 Dec 2018 00:55:08 +0800 Subject: [PATCH 139/271] fix ixigua downloading failure --- src/you_get/extractors/ixigua.py | 128 +++++++++++++++++++++++++++++-- 1 file changed, 123 insertions(+), 5 deletions(-) diff --git a/src/you_get/extractors/ixigua.py b/src/you_get/extractors/ixigua.py index 59133442..3cf07b09 100644 --- a/src/you_get/extractors/ixigua.py +++ b/src/you_get/extractors/ixigua.py @@ -1,14 +1,132 @@ #!/usr/bin/env python -__all__ = ['ixigua_download'] +import base64 -from .toutiao import download as toutiao_download -from .toutiao import download_playlist as toutiao_download_playlist +import binascii + +from ..common import * +import random +import ctypes +from json import loads + +__all__ = ['ixigua_download', 'ixigua_download_playlist_by_url'] + +headers = { + "user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 " + "Safari/537.36", +} + + +def int_overflow(val): + maxint = 2147483647 + if not -maxint - 1 <= val <= maxint: + val = (val + (maxint + 1)) % (2 * (maxint + 1)) - maxint - 1 + return val + + +def unsigned_right_shitf(n, i): + if n < 0: + n = ctypes.c_uint32(n).value + if i < 0: + return -int_overflow(n << abs(i)) + return int_overflow(n >> i) + + +def get_video_url_from_video_id(video_id): + """Splicing URLs according to video ID to get video details""" + # from js + data = [""] * 256 + for index, _ in enumerate(data): + t = index + for i in range(8): + t = -306674912 ^ unsigned_right_shitf(t, 1) if 1 & t else unsigned_right_shitf(t, 1) + data[index] = t + + def tmp(): + rand_num = random.random() + path = "/video/urls/v/1/toutiao/mp4/{video_id}?r={random_num}".format(video_id=video_id, + random_num=str(rand_num)[2:]) + e = o = r = -1 + i, a = 0, len(path) + while i < a: + e = ord(path[i]) + i += 1 + if e < 128: + r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ e)] + else: + if e < 2048: + r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (192 | e >> 6 & 31))] + r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | 63 & e))] + else: + if 55296 <= e < 57344: + e = (1023 & e) + 64 + i += 1 + o = 1023 & t.url(i) + r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (240 | e >> 8 & 7))] + r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | e >> 2 & 63))] + r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | o >> 6 & 15 | (3 & e) << 4))] + r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | 63 & o))] + else: + r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (224 | e >> 12 & 15))] + r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | e >> 6 & 63))] + r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | 63 & e))] + + return "https://ib.365yg.com{path}&s={param}".format(path=path, param=unsigned_right_shitf(r ^ -1, 0)) + + while 1: + url = tmp() + if url.split("=")[-1][0] != "-": # 参数s不能为负数 + return url def ixigua_download(url, output_dir='.', merge=True, info_only=False, **kwargs): - return toutiao_download(url.replace('ixigua', '365yg')) + # example url: https://www.ixigua.com/i6631065141750268420/#mid=63024814422 + html = get_html(url, faker=True) + video_id = match1(html, r"videoId\s*:\s*'([^']+)'") + title = match1(html, r"title: '(\S+)',") + if not video_id: + log.e("video_id not found, url:{}".format(url)) + return + video_info_url = get_video_url_from_video_id(video_id) + video_info = loads(get_content(video_info_url)) + if video_info.get("code", 1) != 0: + log.e("Get video info from {} error: server return code {}".format(video_info_url, video_info.get("code", 1))) + return + if not video_info.get("data", None): + log.e("Get video info from {} error: The server returns JSON value" + " without data or data is empty".format(video_info_url)) + return + if not video_info["data"].get("video_list", None): + log.e("Get video info from {} error: The server returns JSON value" + " without data.video_list or data.video_list is empty".format(video_info_url)) + return + if not video_info["data"]["video_list"].get("video_1", None): + log.e("Get video info from {} error: The server returns JSON value" + " without data.video_list.video_1 or data.video_list.video_1 is empty".format(video_info_url)) + return + size = int(video_info["data"]["video_list"]["video_1"]["size"]) + print_info(site_info=site_info, title=title, type="mp4", size=size) # 该网站只有mp4类型文件 + if not info_only: + video_url = base64.b64decode(video_info["data"]["video_list"]["video_1"]["main_url"].encode("utf-8")) + download_urls([video_url.decode("utf-8")], title, "mp4", size, output_dir, merge=merge, headers=headers, **kwargs) + + +def ixigua_download_playlist_by_url(url, output_dir='.', merge=True, info_only=False, **kwargs): + assert "user" in url, "Only support users to publish video list,Please provide a similar url:" \ + "https://www.ixigua.com/c/user/6907091136/" + + user_id = url.split("/")[-2] if url[-1] == "/" else url.split("/")[-1] + params = {"max_behot_time": "0", "max_repin_time": "0", "count": "20", "page_type": "0", "user_id": user_id} + while 1: + url = "https://www.ixigua.com/c/user/article/?" + "&".join(["{}={}".format(k, v) for k, v in params.items()]) + video_list = loads(get_content(url, headers=headers)) + params["max_behot_time"] = video_list["next"]["max_behot_time"] + for video in video_list["data"]: + ixigua_download("https://www.ixigua.com/i{}/".format(video["item_id"]), output_dir, merge, info_only, + **kwargs) + if video_list["next"]["max_behot_time"] == 0: + break site_info = "ixigua.com" download = ixigua_download -download_playlist = toutiao_download_playlist +download_playlist = ixigua_download_playlist_by_url From 58e806d72e57f919d3a4f9fd6a30c9691fa46903 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 5 Dec 2018 23:24:24 +0100 Subject: [PATCH 140/271] [youtube] use prefix to avoid potential namespace conflict (fix #2666) --- src/you_get/extractors/youtube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py index bc1bc469..9f2d2863 100644 --- a/src/you_get/extractors/youtube.py +++ b/src/you_get/extractors/youtube.py @@ -62,7 +62,7 @@ class YouTube(VideoExtractor): f1def = match1(js, r'function %s(\(\w+\)\{[^\{]+\})' % re.escape(f1)) or \ match1(js, r'\W%s=function(\(\w+\)\{[^\{]+\})' % re.escape(f1)) f1def = re.sub(r'([$\w]+\.)([$\w]+\(\w+,\d+\))', r'\2', f1def) - f1def = 'function %s%s' % (f1, f1def) + f1def = 'function main_%s%s' % (f1, f1def) # prefix to avoid potential namespace conflict code = tr_js(f1def) f2s = set(re.findall(r'([$\w]+)\(\w+,\d+\)', f1def)) for f2 in f2s: @@ -79,7 +79,7 @@ class YouTube(VideoExtractor): f1 = re.sub(r'(as|if|in|is|or)', r'_\1', f1) f1 = re.sub(r'\$', '_dollar', f1) - code = code + 'sig=%s(s)' % f1 + code = code + 'sig=main_%s(s)' % f1 # prefix to avoid potential namespace conflict exec(code, globals(), locals()) return locals()['sig'] From 7dbfece21ffbe586ae08731a0f2a86e0882c38ad Mon Sep 17 00:00:00 2001 From: lniwn Date: Thu, 6 Dec 2018 21:24:10 +0800 Subject: [PATCH 141/271] [miaopai] fix weibo.com download error --- src/you_get/extractors/miaopai.py | 52 ++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/miaopai.py b/src/you_get/extractors/miaopai.py index f37d45b0..b827024b 100644 --- a/src/you_get/extractors/miaopai.py +++ b/src/you_get/extractors/miaopai.py @@ -2,9 +2,12 @@ __all__ = ['miaopai_download'] +import string +import random from ..common import * import urllib.error import urllib.parse +from ..util import fs fake_headers_mobile = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', @@ -20,6 +23,10 @@ def miaopai_download_by_fid(fid, output_dir = '.', merge = False, info_only = Fa mobile_page = get_content(page_url, headers=fake_headers_mobile) url = match1(mobile_page, r'
Date: Mon, 10 Dec 2018 17:22:52 +0100 Subject: [PATCH 143/271] [instagram] let's go --- src/you_get/extractors/instagram.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/you_get/extractors/instagram.py b/src/you_get/extractors/instagram.py index 6537b606..9dd7207d 100755 --- a/src/you_get/extractors/instagram.py +++ b/src/you_get/extractors/instagram.py @@ -29,9 +29,14 @@ def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwarg image_url = edge['node']['display_url'] if 'video_url' in edge['node']: image_url = edge['node']['video_url'] - image_url = image_url.split('?')[0] # (not here: '?_nc_ht=instagram.fcph1-1.fna.fbcdn.net') - ext = image_url.split('.')[-1] - size = int(get_head(image_url)['Content-Length']) + try: + image_url = image_url.split('?')[0] + ext = image_url.split('.')[-1] + size = int(get_head(image_url)['Content-Length']) + except: + image_url = image_url.split('?')[0] + '?_nc_ht=instagram.fcph1-1.fna.fbcdn.net' + ext = image_url.split('.')[-1] + size = int(get_head(image_url)['Content-Length']) print_info(site_info, title, ext, size) if not info_only: download_urls(urls=[image_url], @@ -44,9 +49,14 @@ def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwarg image_url = info['entry_data']['PostPage'][0]['graphql']['shortcode_media']['display_url'] if 'video_url' in info['entry_data']['PostPage'][0]['graphql']['shortcode_media']: image_url =info['entry_data']['PostPage'][0]['graphql']['shortcode_media']['video_url'] - image_url = image_url.split('?')[0] + '?_nc_ht=instagram.fcph1-1.fna.fbcdn.net' - ext = image_url.split('.')[-1] - size = int(get_head(image_url)['Content-Length']) + try: + image_url = image_url.split('?')[0] + ext = image_url.split('.')[-1] + size = int(get_head(image_url)['Content-Length']) + except: + image_url = image_url.split('?')[0] + '?_nc_ht=instagram.fcph1-1.fna.fbcdn.net' + ext = image_url.split('.')[-1] + size = int(get_head(image_url)['Content-Length']) print_info(site_info, title, ext, size) if not info_only: download_urls(urls=[image_url], From 5d7df2eb1bd1a8bd572e9ad656696870d0f297d4 Mon Sep 17 00:00:00 2001 From: Yang Bo Date: Sun, 16 Dec 2018 07:23:59 +0000 Subject: [PATCH 144/271] Fix zhibo.tv regular expression. --- src/you_get/extractors/zhibo.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/you_get/extractors/zhibo.py b/src/you_get/extractors/zhibo.py index 4aaa293e..1d2eadea 100644 --- a/src/you_get/extractors/zhibo.py +++ b/src/you_get/extractors/zhibo.py @@ -37,11 +37,14 @@ def zhibo_download(url, output_dir = '.', merge = True, info_only = False, **kwa if is_live is not "1": raise ValueError("The live stream is not online! (Errno:%s)" % is_live) - ourStreamName = r1(r"window.ourStreamName=\'([s\S'\s\.]*)\'\;[\s\S]*window.rtmpDefaultSource", html) - rtmpPollUrl = r1(r"window.rtmpPollUrl=\'([s\S'\s\.]*)\'\;[\s\S]*window.hlsDefaultSource", html) - - #real_url = 'rtmp://220.194.213.56/live.zhibo.tv/8live/' + ourStreamName - real_url = rtmpPollUrl + ourStreamName + match = re.search(r""" + ourStreamName .*? + '(.*?)' .*? + rtmpHighSource .*? + '(.*?)' .*? + '(.*?)' + """, html, re.S | re.X) + real_url = match.group(3) + match.group(1) + match.group(2) print_info(site_info, title, 'flv', float('inf')) if not info_only: From afb2db7c3c943fcc327f7bff254bece4ae5717f8 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 17 Dec 2018 12:07:56 +0100 Subject: [PATCH 145/271] version 0.4.1193 --- src/you_get/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/version.py b/src/you_get/version.py index e0a79a8e..e89eb41a 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1181' +__version__ = '0.4.1193' From fef2298b956219a2856632199e604e380da486f0 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 18 Dec 2018 15:53:56 +0100 Subject: [PATCH 146/271] [instagram] they're forming in straight line --- src/you_get/extractors/instagram.py | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/src/you_get/extractors/instagram.py b/src/you_get/extractors/instagram.py index 9dd7207d..567e0dd7 100755 --- a/src/you_get/extractors/instagram.py +++ b/src/you_get/extractors/instagram.py @@ -29,14 +29,9 @@ def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwarg image_url = edge['node']['display_url'] if 'video_url' in edge['node']: image_url = edge['node']['video_url'] - try: - image_url = image_url.split('?')[0] - ext = image_url.split('.')[-1] - size = int(get_head(image_url)['Content-Length']) - except: - image_url = image_url.split('?')[0] + '?_nc_ht=instagram.fcph1-1.fna.fbcdn.net' - ext = image_url.split('.')[-1] - size = int(get_head(image_url)['Content-Length']) + ext = image_url.split('?')[0].split('.')[-1] + size = int(get_head(image_url)['Content-Length']) + print_info(site_info, title, ext, size) if not info_only: download_urls(urls=[image_url], @@ -49,14 +44,9 @@ def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwarg image_url = info['entry_data']['PostPage'][0]['graphql']['shortcode_media']['display_url'] if 'video_url' in info['entry_data']['PostPage'][0]['graphql']['shortcode_media']: image_url =info['entry_data']['PostPage'][0]['graphql']['shortcode_media']['video_url'] - try: - image_url = image_url.split('?')[0] - ext = image_url.split('.')[-1] - size = int(get_head(image_url)['Content-Length']) - except: - image_url = image_url.split('?')[0] + '?_nc_ht=instagram.fcph1-1.fna.fbcdn.net' - ext = image_url.split('.')[-1] - size = int(get_head(image_url)['Content-Length']) + ext = image_url.split('?')[0].split('.')[-1] + size = int(get_head(image_url)['Content-Length']) + print_info(site_info, title, ext, size) if not info_only: download_urls(urls=[image_url], From 98fedfb2a277a2c4e77fc85adc3865025bc696f2 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 18 Dec 2018 18:20:01 +0100 Subject: [PATCH 147/271] [miaopai] handle weibo.com/tv/v URLs --- src/you_get/extractors/miaopai.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/miaopai.py b/src/you_get/extractors/miaopai.py index b827024b..304ac176 100644 --- a/src/you_get/extractors/miaopai.py +++ b/src/you_get/extractors/miaopai.py @@ -67,7 +67,10 @@ def miaopai_download_by_wbmp(wbmp_url, fid, info_only=False, **kwargs): def miaopai_download_direct(url, info_only, **kwargs): mobile_page = get_content(url, headers=fake_headers_mobile) - title = re.search(r'([\'"])title\1:\s*([\'"])(.+?)\2,', mobile_page).group(3) + try: + title = re.search(r'([\'"])title\1:\s*([\'"])(.+?)\2,', mobile_page).group(3) + except: + title = re.search(r'([\'"])status_title\1:\s*([\'"])(.+?)\2,', mobile_page).group(3) title = title.replace('\n', '_') stream_url = re.search(r'([\'"])stream_url\1:\s*([\'"])(.+?)\2,', mobile_page).group(3) ext = 'mp4' @@ -78,6 +81,9 @@ def miaopai_download_direct(url, info_only, **kwargs): # ---------------------------------------------------------------------- def miaopai_download(url, output_dir = '.', merge = False, info_only = False, **kwargs): + if match1(url, r'weibo\.com/tv/v/(\w+)'): + return miaopai_download_direct(url, info_only=info_only, output_dir=output_dir, merge=merge, **kwargs) + fid = match1(url, r'\?fid=(\d{4}:\w+)') if fid is not None: miaopai_download_by_fid(fid, output_dir, merge, info_only) From b8470667568d723265ae1414b07be7c8cfdaa947 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 21 Dec 2018 15:46:11 +0100 Subject: [PATCH 148/271] [naver] fix #2671 --- src/you_get/extractors/naver.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/you_get/extractors/naver.py b/src/you_get/extractors/naver.py index b9eef8d7..add884e9 100644 --- a/src/you_get/extractors/naver.py +++ b/src/you_get/extractors/naver.py @@ -16,10 +16,15 @@ def naver_download_by_url(url, output_dir='.', merge=True, info_only=False, **kw ep = 'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{}?key={}' page = get_content(url) try: - og_video_url = re.search(r"", page).group(1) - params_dict = urllib.parse.parse_qs(urllib.parse.urlparse(og_video_url).query) - vid = params_dict['vid'][0] - key = params_dict['outKey'][0] + temp = re.search(r"", page) + if temp is not None: + og_video_url = temp.group(1) + params_dict = urllib.parse.parse_qs(urllib.parse.urlparse(og_video_url).query) + vid = params_dict['vid'][0] + key = params_dict['outKey'][0] + else: + vid = re.search(r"\"videoId\"\s*:\s*\"(.+?)\"", page).group(1) + key = re.search(r"\"inKey\"\s*:\s*\"(.+?)\"", page).group(1) meta_str = get_content(ep.format(vid, key)) meta_json = json.loads(meta_str) if 'errorCode' in meta_json: From 4e98f7bcae333ad974a940bbd8fdb540cc9e1e9e Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 22 Dec 2018 23:14:30 +0100 Subject: [PATCH 149/271] [tumblr] always download the high res (1280) version of images --- src/you_get/extractors/tumblr.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/you_get/extractors/tumblr.py b/src/you_get/extractors/tumblr.py index d63aee72..91b348fc 100644 --- a/src/you_get/extractors/tumblr.py +++ b/src/you_get/extractors/tumblr.py @@ -49,17 +49,18 @@ def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs): tuggles = {} for url in urls: - filename = parse.unquote(url.split('/')[-1]) + hd_url = r1(r'(.+)_\d+\.jpg$', url) + '_1280.jpg' # FIXME: decide actual quality + filename = parse.unquote(hd_url.split('/')[-1]) title = '.'.join(filename.split('.')[:-1]) tumblr_id = r1(r'^tumblr_(.+)_\d+$', title) quality = int(r1(r'^tumblr_.+_(\d+)$', title)) ext = filename.split('.')[-1] try: - size = int(get_head(url)['Content-Length']) + size = int(get_head(hd_url)['Content-Length']) if tumblr_id not in tuggles or tuggles[tumblr_id]['quality'] < quality: tuggles[tumblr_id] = { 'title': title, - 'url': url, + 'url': hd_url, 'quality': quality, 'ext': ext, 'size': size, From 672cc4b1f90f355556b9fe800251382a41fb4b48 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 24 Dec 2018 18:21:28 +0100 Subject: [PATCH 150/271] [youtube] show warning for premieres --- src/you_get/extractors/youtube.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py index 9f2d2863..df2e9e42 100644 --- a/src/you_get/extractors/youtube.py +++ b/src/you_get/extractors/youtube.py @@ -225,7 +225,11 @@ class YouTube(VideoExtractor): # YouTube Live if ytplayer_config and (ytplayer_config['args'].get('livestream') == '1' or ytplayer_config['args'].get('live_playback') == '1'): - hlsvp = ytplayer_config['args']['hlsvp'] + if 'hlsvp' in ytplayer_config['args']: + hlsvp = ytplayer_config['args']['hlsvp'] + else: + player_response= json.loads(ytplayer_config['args']['player_response']) + log.e('[Failed] %s' % player_response['playabilityStatus']['reason'], exit_code=1) if 'info_only' in kwargs and kwargs['info_only']: return From 1df62c39ffb2ba3ddf115750cd3aa5d37895a81a Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 26 Dec 2018 16:48:32 +0100 Subject: [PATCH 151/271] [universal] strip query string for direct download --- src/you_get/extractors/universal.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/universal.py b/src/you_get/extractors/universal.py index a1ab1536..756ce4c1 100644 --- a/src/you_get/extractors/universal.py +++ b/src/you_get/extractors/universal.py @@ -137,7 +137,8 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg else: # direct download - filename = parse.unquote(url.split('/')[-1]) or parse.unquote(url.split('/')[-2]) + url_trunk = url.split('?')[0] # strip query string + filename = parse.unquote(url_trunk.split('/')[-1]) or parse.unquote(url_trunk.split('/')[-2]) title = '.'.join(filename.split('.')[:-1]) or filename _, ext, size = url_info(url, faker=True) print_info(site_info, title, ext, size) From 0930bb1a0c89925c3fe22b6fb9635eda0f698dab Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 26 Dec 2018 16:57:25 +0100 Subject: [PATCH 152/271] update LICENSE.txt (making the MIT license detectable) --- LICENSE.txt | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/LICENSE.txt b/LICENSE.txt index 7b25d906..5964bf20 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,15 +1,14 @@ -============================================== -This is a copy of the MIT license. -============================================== -Copyright (C) 2012-2017 Mort Yao -Copyright (C) 2012 Boyu Guo +MIT License -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: +Copyright (c) 2012-2019 Mort Yao +Copyright (c) 2012 Boyu Guo + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. From 60e035cca8c0315a63621f7e6c5ab5d6fc260e8e Mon Sep 17 00:00:00 2001 From: trymelz Date: Tue, 1 Jan 2019 18:29:57 -0600 Subject: [PATCH 153/271] check if the player exist or not --- src/you_get/common.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 78182163..ae42e46b 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -273,7 +273,12 @@ def matchall(text, patterns): def launch_player(player, urls): import subprocess import shlex - subprocess.call(shlex.split(player) + list(urls)) + import shutil + exefile=shlex.split(player)[0] + if shutil.which(exefile) is not None: + subprocess.call(shlex.split(player) + list(urls)) + else: + log.wtf('[Failed] Cannot find player "%s"' % exefile) def parse_query_param(url, param): From f09d7c958af2c98dc90e81b4a2b43717f39d860e Mon Sep 17 00:00:00 2001 From: trymelz Date: Tue, 1 Jan 2019 18:47:51 -0600 Subject: [PATCH 154/271] check python version before using shutil.which function --- src/you_get/common.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index ae42e46b..6d5764ff 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -273,12 +273,15 @@ def matchall(text, patterns): def launch_player(player, urls): import subprocess import shlex - import shutil - exefile=shlex.split(player)[0] - if shutil.which(exefile) is not None: - subprocess.call(shlex.split(player) + list(urls)) + if (sys.version_info >= (3, 3)): + import shutil + exefile=shlex.split(player)[0] + if shutil.which(exefile) is not None: + subprocess.call(shlex.split(player) + list(urls)) + else: + log.wtf('[Failed] Cannot find player "%s"' % exefile) else: - log.wtf('[Failed] Cannot find player "%s"' % exefile) + subprocess.call(shlex.split(player) + list(urls)) def parse_query_param(url, param): From 95b711dc99dfc9958a6951a9dc7749ee8d58852c Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 18 Jan 2019 11:40:14 +0100 Subject: [PATCH 155/271] [tiktok] shake it --- src/you_get/extractors/tiktok.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/tiktok.py b/src/you_get/extractors/tiktok.py index e9ff63ab..9ecc662d 100644 --- a/src/you_get/extractors/tiktok.py +++ b/src/you_get/extractors/tiktok.py @@ -5,7 +5,7 @@ __all__ = ['tiktok_download'] from ..common import * def tiktok_download(url, output_dir='.', merge=True, info_only=False, **kwargs): - html = get_html(url) + html = get_html(url, faker=True) title = r1(r'(.*?)', html) video_id = r1(r'/video/(\d+)', url) or r1(r'musical\?id=(\d+)', html) title = '%s [%s]' % (title, video_id) From 912804b0fa96f32c07635c13de0315f4e03aa643 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 18 Jan 2019 12:09:43 +0100 Subject: [PATCH 156/271] [youtube] fix #2675 --- src/you_get/extractors/youtube.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py index df2e9e42..9c05e787 100644 --- a/src/you_get/extractors/youtube.py +++ b/src/you_get/extractors/youtube.py @@ -41,6 +41,9 @@ class YouTube(VideoExtractor): # - https://www.youtube.com/yts/jsbin/player-da_DK-vflWlK-zq/base.js # - https://www.youtube.com/yts/jsbin/player-vflvABTsY/da_DK/base.js # - https://www.youtube.com/yts/jsbin/player-vfls4aurX/da_DK/base.js + # - https://www.youtube.com/yts/jsbin/player_ias-vfl_RGK2l/en_US/base.js + # - https://www.youtube.com/yts/jsbin/player-vflRjqq_w/da_DK/base.js + # - https://www.youtube.com/yts/jsbin/player_ias-vfl-jbnrr/da_DK/base.js def tr_js(code): code = re.sub(r'function', r'def', code) code = re.sub(r'(\W)(as|if|in|is|or)\(', r'\1_\2(', code) @@ -56,7 +59,8 @@ class YouTube(VideoExtractor): return code js = js.replace('\n', ' ') - f1 = match1(js, r'\.set\(\w+\.sp,\(0,window\.encodeURIComponent\)\(([$\w]+)') or \ + f1 = match1(js, r'\.set\(\w+\.sp,encodeURIComponent\(([$\w]+)') or \ + match1(js, r'\.set\(\w+\.sp,\(0,window\.encodeURIComponent\)\(([$\w]+)') or \ match1(js, r'\.set\(\w+\.sp,([$\w]+)\(\w+\.s\)\)') or \ match1(js, r'"signature",([$\w]+)\(\w+\.\w+\)') f1def = match1(js, r'function %s(\(\w+\)\{[^\{]+\})' % re.escape(f1)) or \ From a151a3e250af9116d92f9d4f2288a74ac0d7d4c1 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 18 Jan 2019 12:26:57 +0100 Subject: [PATCH 157/271] version 0.4.1205 --- src/you_get/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/version.py b/src/you_get/version.py index e89eb41a..b350385b 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1193' +__version__ = '0.4.1205' From 0aafca6de41e35b858eac56002829f56f352f328 Mon Sep 17 00:00:00 2001 From: wangqr Date: Mon, 21 Jan 2019 02:11:41 -0500 Subject: [PATCH 158/271] Use copy when merging audio and video --- src/you_get/processor/ffmpeg.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/you_get/processor/ffmpeg.py b/src/you_get/processor/ffmpeg.py index a18188da..51cc51dd 100755 --- a/src/you_get/processor/ffmpeg.py +++ b/src/you_get/processor/ffmpeg.py @@ -59,12 +59,7 @@ def ffmpeg_concat_av(files, output, ext): params = [FFMPEG] + LOGLEVEL for file in files: if os.path.isfile(file): params.extend(['-i', file]) - params.extend(['-c:v', 'copy']) - if ext == 'mp4': - params.extend(['-c:a', 'aac']) - elif ext == 'webm': - params.extend(['-c:a', 'vorbis']) - params.extend(['-strict', 'experimental']) + params.extend(['-c', 'copy']) params.append(output) return subprocess.call(params, stdin=STDIN) From f07e909a72412bbb189269d3aa53c8721f9571b7 Mon Sep 17 00:00:00 2001 From: wangqr Date: Mon, 21 Jan 2019 09:50:47 -0500 Subject: [PATCH 159/271] Fallback to re-encoding audio when ffmpeg quits with error --- src/you_get/processor/ffmpeg.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/you_get/processor/ffmpeg.py b/src/you_get/processor/ffmpeg.py index 51cc51dd..8bdf77d2 100755 --- a/src/you_get/processor/ffmpeg.py +++ b/src/you_get/processor/ffmpeg.py @@ -1,7 +1,7 @@ #!/usr/bin/env python import logging -import os.path +import os import subprocess import sys from ..util.strings import parameterize @@ -61,7 +61,23 @@ def ffmpeg_concat_av(files, output, ext): if os.path.isfile(file): params.extend(['-i', file]) params.extend(['-c', 'copy']) params.append(output) - return subprocess.call(params, stdin=STDIN) + if subprocess.call(params, stdin=STDIN): + print('Merging without re-encode failed.\nTry again re-encoding audio... ', end="", flush=True) + try: os.remove(output) + except FileNotFoundError: pass + params = [FFMPEG] + LOGLEVEL + for file in files: + if os.path.isfile(file): params.extend(['-i', file]) + params.extend(['-c:v', 'copy']) + if ext == 'mp4': + params.extend(['-c:a', 'aac']) + params.extend(['-strict', 'experimental']) + elif ext == 'webm': + params.extend(['-c:a', 'opus']) + params.append(output) + return subprocess.call(params, stdin=STDIN) + else: + return 0 def ffmpeg_convert_ts_to_mkv(files, output='output.mkv'): for file in files: From a449eca64e9999b359ce50b38ceafab6710e94e8 Mon Sep 17 00:00:00 2001 From: sheerluck Date: Mon, 4 Feb 2019 11:18:06 +0300 Subject: [PATCH 160/271] fix coub ffmpeg Test URL: https://coub.com/view/19cyub 1) ```params.extend(['-safe', '0'])``` fixes ```[concat @ 0x55b4c8f02980] Unsafe file name 'Test_For_The_Princess!.mp4'``` Before patch: Test_For_The_Princess!_full.mp4 is not created After patch: Test_For_The_Princess!_full.mp4 created successfully 2) ```round``` fixes ```audio_duration 64.0 + video_duration 8.04 -> 7``` Before patch: video is freezed for last 8 second After patch: perfect loop --- src/you_get/extractors/coub.py | 4 ++-- src/you_get/processor/ffmpeg.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/coub.py b/src/you_get/extractors/coub.py index 44e403d3..36a0a5d6 100644 --- a/src/you_get/extractors/coub.py +++ b/src/you_get/extractors/coub.py @@ -25,10 +25,10 @@ def coub_download(url, output_dir='.', merge=True, info_only=False, **kwargs): loop_file_path = get_loop_file_path(title, output_dir) single_file_path = audio_file_path if audio_duration > video_duration: - write_loop_file(int(audio_duration / video_duration), loop_file_path, video_file_name) + write_loop_file(round(audio_duration / video_duration), loop_file_path, video_file_name) else: single_file_path = audio_file_path - write_loop_file(int(video_duration / audio_duration), loop_file_path, audio_file_name) + write_loop_file(round(video_duration / audio_duration), loop_file_path, audio_file_name) ffmpeg.ffmpeg_concat_audio_and_video([loop_file_path, single_file_path], title + "_full", "mp4") cleanup_files([video_file_path, audio_file_path, loop_file_path]) diff --git a/src/you_get/processor/ffmpeg.py b/src/you_get/processor/ffmpeg.py index 8bdf77d2..02ecb012 100755 --- a/src/you_get/processor/ffmpeg.py +++ b/src/you_get/processor/ffmpeg.py @@ -267,6 +267,7 @@ def ffmpeg_concat_audio_and_video(files, output, ext): if has_ffmpeg_installed: params = [FFMPEG] + LOGLEVEL params.extend(['-f', 'concat']) + params.extend(['-safe', '0']) # https://stackoverflow.com/questions/38996925/ffmpeg-concat-unsafe-file-name for file in files: if os.path.isfile(file): params.extend(['-i', file]) From 4f191986bd6b9e9c73c437c38a1aec70cbaba57a Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 10 Feb 2019 23:13:28 +0100 Subject: [PATCH 161/271] [miaopai] it is clear that --- src/you_get/extractors/miaopai.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/miaopai.py b/src/you_get/extractors/miaopai.py index 304ac176..01d043f2 100644 --- a/src/you_get/extractors/miaopai.py +++ b/src/you_get/extractors/miaopai.py @@ -84,14 +84,15 @@ def miaopai_download(url, output_dir = '.', merge = False, info_only = False, ** if match1(url, r'weibo\.com/tv/v/(\w+)'): return miaopai_download_direct(url, info_only=info_only, output_dir=output_dir, merge=merge, **kwargs) + if re.match(r'^http[s]://.*\.weibo\.com/\d+/.+', url): + return miaopai_download_direct(url, info_only=info_only, output_dir=output_dir, merge=merge, **kwargs) + fid = match1(url, r'\?fid=(\d{4}:\w+)') if fid is not None: miaopai_download_by_fid(fid, output_dir, merge, info_only) elif '/p/230444' in url: fid = match1(url, r'/p/230444(\w+)') miaopai_download_by_fid('1034:'+fid, output_dir, merge, info_only) - elif re.match(r'^http[s]://weibo\.com/\d+/.+', url): - miaopai_download_direct(url, info_only=info_only, output_dir=output_dir, merge=merge, **kwargs) else: mobile_page = get_content(url, headers = fake_headers_mobile) hit = re.search(r'"page_url"\s*:\s*"([^"]+)"', mobile_page) From fd08f7b639b3cc8c8aa74b9f80a8b680a7c40f47 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 16 Feb 2019 06:09:57 +0100 Subject: [PATCH 162/271] [bilibili] fix #2680 --- src/you_get/extractors/bilibili.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 5ed7f28d..46853118 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -224,7 +224,13 @@ class Bilibili(VideoExtractor): p = int(page.group(1)) cid = re.search(r'"cid":(\d+),"page":%s' % p, self.page).group(1) if cid is not None: - self.download_by_vid(cid, re.search('bangumi', self.url) is not None, **kwargs) + #self.download_by_vid(cid, re.search('bangumi', self.url) is not None, **kwargs) + # FIXME: video qualities + playinfo_text = match1(self.page, r'__playinfo__=(.*?)<') + playinfo = json.loads(playinfo_text) + url0 = playinfo['data']['durl'][0]['url'] + _, ext, size = url_info(url0, headers={'referer': self.url, 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'}) + self.streams['flv'] = {'url': url0, 'container': ext, 'size': size, 'src': [url0]} else: # flashvars? flashvars = re.search(r'flashvars="([^"]+)"', self.page).group(1) @@ -492,7 +498,8 @@ def bilibili_download_playlist_by_url(url, **kwargs): page_list = json.loads(get_content('http://www.bilibili.com/widget/getPageList?aid={}'.format(aid))) page_cnt = len(page_list) for no in range(1, page_cnt+1): - page_url = 'http://www.bilibili.com/video/av{}/index_{}.html'.format(aid, no) + #page_url = 'http://www.bilibili.com/video/av{}/index_{}.html'.format(aid, no) + page_url = 'http://www.bilibili.com/video/av{}/?p={}'.format(aid, no) subtitle = '#%s. %s'% (page_list[no-1]['page'], page_list[no-1]['pagename']) Bilibili().download_by_url(page_url, subtitle=subtitle, **kwargs) From e6534c41d87a7a37499816c26aeb49a44f965505 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 16 Feb 2019 06:17:11 +0100 Subject: [PATCH 163/271] version 0.4.1210 --- src/you_get/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/version.py b/src/you_get/version.py index b350385b..f2b279b1 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1205' +__version__ = '0.4.1210' From 9dc034c2cc23f8a68244924bde2aac3f7db5514f Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 16 Feb 2019 23:08:44 +0100 Subject: [PATCH 164/271] [common] download_urls(): fix URL printing for DASH streams --- src/you_get/common.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 6d5764ff..9735a30f 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -916,7 +916,10 @@ def download_urls( return if dry_run: print_user_agent(faker=faker) - print('Real URLs:\n%s' % '\n'.join(urls)) + try: + print('Real URLs:\n%s' % '\n'.join(urls)) + except: + print('Real URLs:\n%s' % '\n'.join([j for i in urls for j in i])) return if player: From a01a68fcb1cf7ed457b35075b98c9dcc0026774d Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 16 Feb 2019 23:14:59 +0100 Subject: [PATCH 165/271] [bilibili] rewrite the extractor --- src/you_get/extractors/bilibili.py | 581 +++++------------------------ 1 file changed, 96 insertions(+), 485 deletions(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 46853118..6c70a574 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -1,510 +1,121 @@ #!/usr/bin/env python -__all__ = ['bilibili_download'] - -import hashlib -import re -import time -import json -import http.cookiejar -import urllib.request -import urllib.parse -from xml.dom.minidom import parseString - from ..common import * -from ..util.log import * -from ..extractor import * - -from .qq import qq_download_by_vid -from .sina import sina_download_by_vid -from .tudou import tudou_download_by_id -from .youku import youku_download_by_vid +from ..extractor import VideoExtractor class Bilibili(VideoExtractor): - name = 'Bilibili' - live_api = 'https://api.live.bilibili.com/room/v1/Room/playUrl?cid={}&quality=0&platform=web' - api_url = 'http://interface.bilibili.com/v2/playurl?' - bangumi_api_url = 'http://bangumi.bilibili.com/player/web_api/playurl?' - live_room_init_api_url = 'https://api.live.bilibili.com/room/v1/Room/room_init?id={}' - live_room_info_api_url = 'https://api.live.bilibili.com/room/v1/Room/get_info?room_id={}' + name = "Bilibili" - #SEC1 = '1c15888dc316e05a15fdd0a02ed6584f' - SEC1 = '94aba54af9065f71de72f5508f1cd42e' - SEC2 = '9b288147e5474dd2aa67085f716c560d' + # Bilibili media encoding options, in descending quality order. stream_types = [ - {'id': 'hdflv'}, - {'id': 'flv720'}, - {'id': 'flv'}, - {'id': 'hdmp4'}, - {'id': 'mp4'}, - {'id': 'live'}, - {'id': 'vc'} + {'id': 'flv_p60', 'quality': 116, 'audio_quality': 30280, + 'container': 'MP4', 'video_resolution': '1080p', 'desc': '高清 1080P60'}, + {'id': 'flv', 'quality': 80, 'audio_quality': 30280, + 'container': 'MP4', 'video_resolution': '1080p', 'desc': '高清 1080P'}, + {'id': 'flv720_p60', 'quality': 74, 'audio_quality': 30280, + 'container': 'MP4', 'video_resolution': '720p', 'desc': '高清 720P60'}, + {'id': 'flv720', 'quality': 64, 'audio_quality': 30280, + 'container': 'MP4', 'video_resolution': '720p', 'desc': '高清 720P'}, + {'id': 'flv480', 'quality': 32, 'audio_quality': 30280, + 'container': 'MP4', 'video_resolution': '480p', 'desc': '清晰 480P'}, # default + {'id': 'flv360', 'quality': 16, 'audio_quality': 30216, + 'container': 'MP4', 'video_resolution': '360p', 'desc': '流畅 360P'}, ] - fmt2qlt = dict(hdflv=4, flv=3, hdmp4=2, mp4=1) @staticmethod - def bilibili_stream_type(urls): - url = urls[0] - if 'hd.flv' in url or '-80.flv' in url: - return 'hdflv', 'flv' - if '-64.flv' in url: - return 'flv720', 'flv' - if '.flv' in url: - return 'flv', 'flv' - if 'hd.mp4' in url or '-48.mp4' in url: - return 'hdmp4', 'mp4' - if '.mp4' in url: - return 'mp4', 'mp4' - raise Exception('Unknown stream type') - - def api_req(self, cid, quality, bangumi, bangumi_movie=False, **kwargs): - ts = str(int(time.time())) - if not bangumi: - #params_str = 'cid={}&player=1&quality={}&ts={}'.format(cid, quality, ts) - params_str = 'appkey=84956560bc028eb7&cid={}&otype=xml&qn={}&quality={}&type='.format(cid, quality, quality) - chksum = hashlib.md5(bytes(params_str+self.SEC1, 'utf8')).hexdigest() - api_url = self.api_url + params_str + '&sign=' + chksum - else: - mod = 'movie' if bangumi_movie else 'bangumi' - params_str = 'cid={}&module={}&player=1&quality={}&ts={}'.format(cid, mod, quality, ts) - chksum = hashlib.md5(bytes(params_str+self.SEC2, 'utf8')).hexdigest() - api_url = self.bangumi_api_url + params_str + '&sign=' + chksum - - xml_str = get_content(api_url, headers={'referer': self.url, 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'}) - return xml_str - - def parse_bili_xml(self, xml_str): - urls_list = [] - total_size = 0 - doc = parseString(xml_str.encode('utf8')) - durls = doc.getElementsByTagName('durl') - for durl in durls: - size = durl.getElementsByTagName('size')[0] - total_size += int(size.firstChild.nodeValue) - url = durl.getElementsByTagName('url')[0] - urls_list.append(url.firstChild.nodeValue) - stream_type, container = self.bilibili_stream_type(urls_list) - if stream_type not in self.streams: - self.streams[stream_type] = {} - self.streams[stream_type]['src'] = urls_list - self.streams[stream_type]['size'] = total_size - self.streams[stream_type]['container'] = container - - def download_by_vid(self, cid, bangumi, **kwargs): - stream_id = kwargs.get('stream_id') - # guard here. if stream_id invalid, fallback as not stream_id - if stream_id and stream_id in self.fmt2qlt: - quality = stream_id - else: - quality = 'hdflv' if bangumi else 'flv' - - info_only = kwargs.get('info_only') - for qlt in [116,112,80,74,64,32,16,15]: - api_xml = self.api_req(cid, qlt, bangumi, **kwargs) - self.parse_bili_xml(api_xml) - if not info_only or stream_id: - self.danmuku = get_danmuku_xml(cid) + def bilibili_headers(referer=None, cookie=None): + # a reasonable UA + ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36' + headers = {'User-Agent': ua} + if referer is not None: + headers.update({'Referer': referer}) + if cookie is not None: + headers.update({'Cookie': cookie}) + return headers def prepare(self, **kwargs): - if socket.getdefaulttimeout() == 600: # no timeout specified - socket.setdefaulttimeout(2) # fail fast, very speedy! + self.stream_qualities = {s['quality']: s for s in self.stream_types} - # handle "watchlater" URLs - if '/watchlater/' in self.url: - aid = re.search(r'av(\d+)', self.url).group(1) - self.url = 'http://www.bilibili.com/video/av{}/'.format(aid) + html_content = get_content(self.url, headers=self.bilibili_headers()) + #self.title = match1(html_content, + # r'

(.*?)

', self.page) or re.search(r'

', self.page) - if m is not None: - self.title = m.group(1) - s = re.search(r'([^<]+)', m.group(1)) - if s: - self.title = unescape_html(s.group(1)) - if self.title is None: - m = re.search(r'property="og:title" content="([^"]+)"', self.page) - if m is not None: - self.title = m.group(1) - if 'subtitle' in kwargs: - subtitle = kwargs['subtitle'] - self.title = '{} {}'.format(self.title, subtitle) - else: - playinfo = re.search(r'__INITIAL_STATE__=(.*?);\(function\(\)', self.page) - if playinfo is not None: - jsonPlayinfo = json.loads(playinfo.group(1)) - if 'videoData' in jsonPlayinfo: - pages = jsonPlayinfo['videoData']['pages'] - if len(pages) > 1: - qs = dict(parse.parse_qsl(urllib.parse.urlparse(self.url).query)) - page = pages[int(qs.get('p', 1)) - 1] - self.title = '{} #{}. {}'.format(self.title, page['page'], page['part']) - - if 'bangumi.bilibili.com/movie' in self.url: - self.movie_entry(**kwargs) - elif 'bangumi.bilibili.com' in self.url: - self.bangumi_entry(**kwargs) - elif 'bangumi/' in self.url: - self.bangumi_entry(**kwargs) - elif 'live.bilibili.com' in self.url: - self.live_entry(**kwargs) - elif 'vc.bilibili.com' in self.url: - self.vc_entry(**kwargs) - elif 'audio/au' in self.url: - self.audio_entry(**kwargs) - else: - self.entry(**kwargs) - - def movie_entry(self, **kwargs): - patt = r"var\s*aid\s*=\s*'(\d+)'" - aid = re.search(patt, self.page).group(1) - page_list = json.loads(get_content('http://www.bilibili.com/widget/getPageList?aid={}'.format(aid))) - # better ideas for bangumi_movie titles? - self.title = page_list[0]['pagename'] - self.download_by_vid(page_list[0]['cid'], True, bangumi_movie=True, **kwargs) - - def audio_entry(self, **kwargs): - assert re.match(r'https?://www.bilibili.com/audio/au\d+', self.url) - patt = r"(\d+)" - audio_id = re.search(patt, self.url).group(1) - audio_info_url = \ - 'https://www.bilibili.com/audio/music-service-c/web/song/info?sid={}'.format(audio_id) - audio_info_response = json.loads(get_content(audio_info_url)) - if audio_info_response['msg'] != 'success': - log.wtf('fetch audio information failed!') - sys.exit(2) - self.title = audio_info_response['data']['title'] - # TODO:there is no quality option for now - audio_download_url = \ - 'https://www.bilibili.com/audio/music-service-c/web/url?sid={}&privilege=2&quality=2'.format(audio_id) - audio_download_response = json.loads(get_content(audio_download_url)) - if audio_download_response['msg'] != 'success': - log.wtf('fetch audio resource failed!') - sys.exit(2) - self.streams['mp4'] = {} - self.streams['mp4']['src'] = [audio_download_response['data']['cdns'][0]] - self.streams['mp4']['container'] = 'm4a' - self.streams['mp4']['size'] = audio_download_response['data']['size'] - - - def entry(self, **kwargs): - # tencent player - tc_flashvars = re.search(r'"bili-cid=\d+&bili-aid=\d+&vid=([^"]+)"', self.page) - if tc_flashvars: - tc_flashvars = tc_flashvars.group(1) - if tc_flashvars is not None: - self.out = True - qq_download_by_vid(tc_flashvars, self.title, True, output_dir=kwargs['output_dir'], merge=kwargs['merge'], info_only=kwargs['info_only']) - return - - has_plist = re.search(r'"page":2', self.page) - if has_plist and not kwargs.get('playlist'): - log.w('This page contains a playlist. (use --playlist to download all videos.)') - - try: - page_list = json.loads(re.search(r'"pages":(\[.*?\])', self.page).group(1)) - index_id = int(re.search(r'index_(\d+)', self.url).group(1)) - cid = page_list[index_id-1]['cid'] # change cid match rule - except: - page = re.search(r'p=(\d+)', self.url) - if page is None: - p = 1 - else: - p = int(page.group(1)) - cid = re.search(r'"cid":(\d+),"page":%s' % p, self.page).group(1) - if cid is not None: - #self.download_by_vid(cid, re.search('bangumi', self.url) is not None, **kwargs) - # FIXME: video qualities - playinfo_text = match1(self.page, r'__playinfo__=(.*?)<') + # regular av + # TODO: multi-P + if re.match(r'https?://(www)?\.bilibili\.com/video/av(\d+)', self.url): + playinfo_text = match1(html_content, r'__playinfo__=(.*?)', html) - # video_guessulike = r1(r"window.xgData =([s\S'\s\.]*)\'\;[\s\S]*window.vouchData", video_html) + # video_guessulike = r1(r"window.xgData =([s\S'\s\.]*)\'\;[\s\S]*window.vouchData", video_html) video_url = r1(r"window.vurl = \'([s\S'\s\.]*)\'\;[\s\S]*window.imgurl", video_html) part_urls.append(video_url) ext = video_url.split('.')[-1] @@ -34,7 +34,7 @@ def zhibo_download(url, output_dir = '.', merge = True, info_only = False, **kwa html = get_html(url) title = r1(r'([\s\S]*)', html) is_live = r1(r"window.videoIsLive=\'([s\S'\s\.]*)\'\;[\s\S]*window.resDomain", html) - if is_live is not "1": + if is_live != "1": raise ValueError("The live stream is not online! (Errno:%s)" % is_live) match = re.search(r""" From 2e5ced0700fe5cfff2f604bd9645158694b4e970 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 25 Apr 2019 15:35:35 +0200 Subject: [PATCH 237/271] [tiktok] fix extraction --- src/you_get/extractors/tiktok.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/you_get/extractors/tiktok.py b/src/you_get/extractors/tiktok.py index 9ecc662d..2c388054 100644 --- a/src/you_get/extractors/tiktok.py +++ b/src/you_get/extractors/tiktok.py @@ -6,12 +6,10 @@ from ..common import * def tiktok_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = get_html(url, faker=True) - title = r1(r'(.*?)', html) + title = r1(r'(.*?)', html) video_id = r1(r'/video/(\d+)', url) or r1(r'musical\?id=(\d+)', html) title = '%s [%s]' % (title, video_id) - dataText = r1(r'var data = \[(.*)\] ', html) or r1(r'var data = (\{.*\})', html) - data = json.loads(dataText) - source = 'http:' + data['video']['play_addr']['url_list'][0] + source = r1(r' Date: Tue, 2 Jul 2019 10:46:34 +0800 Subject: [PATCH 257/271] Fix the parser issue for acfun videos. --- src/you_get/extractors/acfun.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/you_get/extractors/acfun.py b/src/you_get/extractors/acfun.py index 88363ed5..3dacedf5 100644 --- a/src/you_get/extractors/acfun.py +++ b/src/you_get/extractors/acfun.py @@ -113,11 +113,14 @@ def acfun_download(url, output_dir='.', merge=True, info_only=False, **kwargs): if re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/\D/\D\D(\d+)', url): html = get_content(url) - title = r1(r'data-title="([^"]+)"', html) - if match1(url, r'_(\d+)$'): # current P - title = title + " " + r1(r'active">([^<]*)', html) - vid = r1('data-vid="(\d+)"', html) - up = r1('data-name="([^"]+)"', html) + json_text = match1(html, r"(?s)videoInfo\s*=\s*(\{.*?\});") + json_data = json.loads(json_text) + vid = json_data.get('currentVideoInfo').get('id') + up = json_data.get('user').get('name') + title = json_data.get('title') + video_list = json_data.get('videoList') + if len(video_list) > 1: + title += " - " + [p.get('title') for p in video_list if p.get('id') == vid][0] # bangumi elif re.match("https?://[^\.]*\.*acfun\.[^\.]+/bangumi/ab(\d+)", url): html = get_content(url) From 80671b86ae2e25d9d3a6b3ce46f3e49b10293368 Mon Sep 17 00:00:00 2001 From: Tianran YAO <48848200+yaotianran@users.noreply.github.com> Date: Tue, 16 Jul 2019 09:45:54 +0800 Subject: [PATCH 258/271] changed youku ccode to 0519 --- src/you_get/extractors/youku.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/youku.py b/src/you_get/extractors/youku.py index 75a49c70..d7107eca 100644 --- a/src/you_get/extractors/youku.py +++ b/src/you_get/extractors/youku.py @@ -78,7 +78,7 @@ class Youku(VideoExtractor): self.api_error_code = None self.api_error_msg = None - self.ccode = '0590' + self.ccode = '0519' # Found in http://g.alicdn.com/player/ykplayer/0.5.64/youku-player.min.js # grep -oE '"[0-9a-zA-Z+/=]{256}"' youku-player.min.js self.ckey = 'DIl58SLFxFNndSV1GFNnMQVYkx1PP5tKe1siZu/86PR1u/Wh1Ptd+WOZsHHWxysSfAOhNJpdVWsdVJNsfJ8Sxd8WKVvNfAS8aS8fAOzYARzPyPc3JvtnPHjTdKfESTdnuTW6ZPvk2pNDh4uFzotgdMEFkzQ5wZVXl2Pf1/Y6hLK0OnCNxBj3+nb0v72gZ6b0td+WOZsHHWxysSo/0y9D2K42SaB8Y/+aD2K42SaB8Y/+ahU+WOZsHcrxysooUeND' From 179bbeaa5e46048cc963d1c4923fcf2893e76908 Mon Sep 17 00:00:00 2001 From: laiqing Date: Tue, 16 Jul 2019 12:48:55 +0800 Subject: [PATCH 259/271] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E8=A5=BF=E7=93=9C?= =?UTF-8?q?=E8=A7=86=E9=A2=91=E8=8E=B7=E5=8F=96=E4=B8=8D=E5=88=B0video=20i?= =?UTF-8?q?d=E5=92=8Ctitle=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/you_get/extractors/ixigua.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/ixigua.py b/src/you_get/extractors/ixigua.py index 3cf07b09..164161cc 100644 --- a/src/you_get/extractors/ixigua.py +++ b/src/you_get/extractors/ixigua.py @@ -81,8 +81,8 @@ def get_video_url_from_video_id(video_id): def ixigua_download(url, output_dir='.', merge=True, info_only=False, **kwargs): # example url: https://www.ixigua.com/i6631065141750268420/#mid=63024814422 html = get_html(url, faker=True) - video_id = match1(html, r"videoId\s*:\s*'([^']+)'") - title = match1(html, r"title: '(\S+)',") + video_id = match1(html, r"\"vid\":\"([^\"]+)") + title = match1(html, r"\"title\":\"(\S+?)\",") if not video_id: log.e("video_id not found, url:{}".format(url)) return From 151938e1aa28c6d88ffca1e0edeff287239d7e26 Mon Sep 17 00:00:00 2001 From: qiaoruntao <925783095@qq.com> Date: Tue, 16 Jul 2019 13:36:08 +0800 Subject: [PATCH 260/271] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E5=BF=AB=E6=89=8B?= =?UTF-8?q?=E7=9B=B4=E6=92=AD=E6=97=A0=E6=B3=95=E4=B8=8B=E8=BD=BD=E7=9A=84?= =?UTF-8?q?=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/you_get/extractors/kuaishou.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/you_get/extractors/kuaishou.py b/src/you_get/extractors/kuaishou.py index a21f8ffa..917920d1 100644 --- a/src/you_get/extractors/kuaishou.py +++ b/src/you_get/extractors/kuaishou.py @@ -16,11 +16,14 @@ def kuaishou_download_by_url(url, info_only=False, **kwargs): # size = video_list[-1]['size'] # result wrong size try: - og_video_url = re.search(r"", page).group(1) - video_url = og_video_url - title = url.split('/')[-1] + search_result=re.search(r"\"playUrls\":\[(\{\"quality\"\:\"\w+\",\"url\":\".*?\"\})+\]", page) + all_video_info_str = search_result.group(1) + all_video_infos=re.findall(r"\{\"quality\"\:\"(\w+)\",\"url\":\"(.*?)\"\}", all_video_info_str) + # get the one of the best quality + video_url = all_video_infos[0][1].encode("utf-8").decode('unicode-escape') + title = re.search(r"(.*?)", page).group(1) size = url_size(video_url) - video_format = video_url.split('.')[-1] + video_format = "flv"#video_url.split('.')[-1] print_info(site_info, title, video_format, size) if not info_only: download_urls([video_url], title, video_format, size, **kwargs) From d3d397178fb2b24a7d0aabf11a995e293c968ebf Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 25 Jul 2019 23:08:23 +0800 Subject: [PATCH 261/271] [common] update UA --- src/you_get/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index c3962431..24681b22 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -144,7 +144,7 @@ fake_headers = { 'Accept-Charset': 'UTF-8,*;q=0.5', 'Accept-Encoding': 'gzip,deflate,sdch', 'Accept-Language': 'en-US,en;q=0.8', - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:60.0) Gecko/20100101 Firefox/60.0', # noqa + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0', # noqa } if sys.stdout.isatty(): From e36675ce72f120ab7d053eeaab9c1268e11ed18e Mon Sep 17 00:00:00 2001 From: Ensteinjun Date: Tue, 30 Jul 2019 15:09:55 +0800 Subject: [PATCH 262/271] Fix bug: unable get video title --- src/you_get/extractors/youtube.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py index eea31503..976d270b 100644 --- a/src/you_get/extractors/youtube.py +++ b/src/you_get/extractors/youtube.py @@ -207,8 +207,7 @@ class YouTube(VideoExtractor): raise elif video_info['status'] == ['ok']: if 'use_cipher_signature' not in video_info or video_info['use_cipher_signature'] == ['False']: - self.title = parse.unquote_plus(video_info['title'][0]) - + self.title = parse.unquote_plus(json.loads(video_info["player_response"][0])["videoDetails"]["title"]) # Parse video page (for DASH) video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid) try: @@ -229,7 +228,7 @@ class YouTube(VideoExtractor): video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid) ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+?});', video_page).group(1)) - self.title = ytplayer_config['args']['title'] + self.title = json.loads(ytplayer_config["args"]["player_response"])["videoDetails"]["title"] self.html5player = 'https://www.youtube.com' + ytplayer_config['assets']['js'] stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',') From 648487b9a256ffa1d9ba91758e0c8afe8409fb9b Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 2 Aug 2019 13:30:10 +0200 Subject: [PATCH 263/271] version 0.4.1328 --- src/you_get/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/version.py b/src/you_get/version.py index a31efa48..48bf3b5f 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1314' +__version__ = '0.4.1328' From 0fe204ad06cd96726ad9f770936ef961ad8bb12c Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 3 Aug 2019 10:31:49 +0200 Subject: [PATCH 264/271] [youtube] warn when target URL is from a playlist and --playlist is not used --- src/you_get/extractors/youtube.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py index 976d270b..4483f8eb 100644 --- a/src/you_get/extractors/youtube.py +++ b/src/you_get/extractors/youtube.py @@ -195,6 +195,9 @@ class YouTube(VideoExtractor): self.download_playlist_by_url(self.url, **kwargs) exit(0) + if re.search('\Wlist=', self.url) and not kwargs.get('playlist'): + log.w('This video is from a playlist. (use --playlist to download all videos in the playlist.)') + # Get video info # 'eurl' is a magic parameter that can bypass age restriction # full form: 'eurl=https%3A%2F%2Fyoutube.googleapis.com%2Fv%2F{VIDEO_ID}' From 2a1db6e79c5465808b908d4b3b6c874c0d7a7cc9 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 3 Aug 2019 13:53:57 +0200 Subject: [PATCH 265/271] [panda] purge --- src/you_get/common.py | 1 - src/you_get/extractors/__init__.py | 1 - src/you_get/extractors/panda.py | 43 ------------------------------ 3 files changed, 45 deletions(-) delete mode 100644 src/you_get/extractors/panda.py diff --git a/src/you_get/common.py b/src/you_get/common.py index 24681b22..b2bca0a5 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -86,7 +86,6 @@ SITES = { 'naver' : 'naver', '7gogo' : 'nanagogo', 'nicovideo' : 'nicovideo', - 'panda' : 'panda', 'pinterest' : 'pinterest', 'pixnet' : 'pixnet', 'pptv' : 'pptv', diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index 0c4cccc7..2961f015 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -51,7 +51,6 @@ from .nanagogo import * from .naver import * from .netease import * from .nicovideo import * -from .panda import * from .pinterest import * from .pixnet import * from .pptv import * diff --git a/src/you_get/extractors/panda.py b/src/you_get/extractors/panda.py deleted file mode 100644 index c9af4b38..00000000 --- a/src/you_get/extractors/panda.py +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env python - -__all__ = ['panda_download'] - -from ..common import * -from ..util.log import * -import json -import time - -def panda_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): - roomid = re.search('/(\d+)', url) - if roomid is None: - log.wtf('Cannot found room id for this url') - roomid = roomid.group(1) - json_request_url ="http://www.panda.tv/api_room_v2?roomid={}&__plat=pc_web&_={}".format(roomid, int(time.time())) - content = get_html(json_request_url) - api_json = json.loads(content) - - errno = api_json["errno"] - errmsg = api_json["errmsg"] - if errno: - raise ValueError("Errno : {}, Errmsg : {}".format(errno, errmsg)) - data = api_json["data"] - title = data["roominfo"]["name"] - room_key = data["videoinfo"]["room_key"] - plflag = data["videoinfo"]["plflag"].split("_") - status = data["videoinfo"]["status"] - if status != "2": - raise ValueError("The live stream is not online! (status:%s)" % status) - - data2 = json.loads(data["videoinfo"]["plflag_list"]) - rid = data2["auth"]["rid"] - sign = data2["auth"]["sign"] - ts = data2["auth"]["time"] - real_url = "http://pl{}.live.panda.tv/live_panda/{}.flv?sign={}&ts={}&rid={}".format(plflag[1], room_key, sign, ts, rid) - - print_info(site_info, title, 'flv', float('inf')) - if not info_only: - download_urls([real_url], title, 'flv', None, output_dir, merge = merge) - -site_info = "panda.tv" -download = panda_download -download_playlist = playlist_not_supported('panda') From 4ca4a51507ce2450f22f7ad2bf000fcfd48b06ac Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 3 Aug 2019 15:53:37 +0200 Subject: [PATCH 266/271] [twitter] disable faker to prevent 302 infinite redirect --- src/you_get/extractors/twitter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/twitter.py b/src/you_get/extractors/twitter.py index ec1b06af..4b239e67 100644 --- a/src/you_get/extractors/twitter.py +++ b/src/you_get/extractors/twitter.py @@ -34,7 +34,7 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs) **kwargs) return - html = get_html(url, faker=True) + html = get_html(url, faker=False) # disable faker to prevent 302 infinite redirect screen_name = r1(r'twitter\.com/([^/]+)', url) or r1(r'data-screen-name="([^"]*)"', html) or \ r1(r' Date: Mon, 5 Aug 2019 09:52:38 +0200 Subject: [PATCH 267/271] avalable -> available --- src/you_get/extractors/mgtv.py | 2 +- src/you_get/extractors/qie.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/mgtv.py b/src/you_get/extractors/mgtv.py index 730dfeba..657167a6 100644 --- a/src/you_get/extractors/mgtv.py +++ b/src/you_get/extractors/mgtv.py @@ -68,7 +68,7 @@ class MGTV(VideoExtractor): self.title = content['data']['info']['title'] domain = content['data']['stream_domain'][0] - #stream_avalable = [i['name'] for i in content['data']['stream']] + #stream_available = [i['name'] for i in content['data']['stream']] stream_available = {} for i in content['data']['stream']: stream_available[i['name']] = i['url'] diff --git a/src/you_get/extractors/qie.py b/src/you_get/extractors/qie.py index 38f703ed..08d462bf 100644 --- a/src/you_get/extractors/qie.py +++ b/src/you_get/extractors/qie.py @@ -58,7 +58,7 @@ class QiE(VideoExtractor): content = loads(content) self.title = content['data']['room_name'] rtmp_url = content['data']['rtmp_url'] - #stream_avalable = [i['name'] for i in content['data']['stream']] + #stream_available = [i['name'] for i in content['data']['stream']] stream_available = {} stream_available['normal'] = rtmp_url + '/' + content['data']['rtmp_live'] if len(content['data']['rtmp_multi_bitrate']) > 0: From 2b80c6de9dad29fdd4a07633e741950fe51738f7 Mon Sep 17 00:00:00 2001 From: Semen Zhydenko Date: Mon, 5 Aug 2019 09:53:01 +0200 Subject: [PATCH 268/271] basicly -> basically --- src/you_get/processor/ffmpeg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/processor/ffmpeg.py b/src/you_get/processor/ffmpeg.py index 02ecb012..63679b83 100755 --- a/src/you_get/processor/ffmpeg.py +++ b/src/you_get/processor/ffmpeg.py @@ -220,7 +220,7 @@ def ffmpeg_concat_mp4_to_mp4(files, output='output.mp4'): def ffmpeg_download_stream(files, title, ext, params={}, output_dir='.', stream=True): """str, str->True WARNING: NOT THE SAME PARMS AS OTHER FUNCTIONS!!!!!! - You can basicly download anything with this function + You can basically download anything with this function but better leave it alone with """ output = title + '.' + ext From 013e75c886160ded6ce62aea8db13860fd6e206c Mon Sep 17 00:00:00 2001 From: Semen Zhydenko Date: Mon, 5 Aug 2019 09:53:23 +0200 Subject: [PATCH 269/271] infomation -> information --- src/you_get/extractors/flickr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/flickr.py b/src/you_get/extractors/flickr.py index 4efa78ef..2535dd1c 100644 --- a/src/you_get/extractors/flickr.py +++ b/src/you_get/extractors/flickr.py @@ -74,7 +74,7 @@ def get_api_key(page): # this happens only when the url points to a gallery page # that contains no inline api_key(and never makes xhr api calls) # in fact this might be a better approch for getting a temporary api key - # since there's no place for a user to add custom infomation that may + # since there's no place for a user to add custom information that may # misguide the regex in the homepage if not match: return match1(get_html('https://flickr.com'), pattern_inline_api_key) From be931a5416ad170ae719fb9d7e6b0f7b1b8ceef4 Mon Sep 17 00:00:00 2001 From: Semen Zhydenko Date: Mon, 5 Aug 2019 09:53:54 +0200 Subject: [PATCH 270/271] Unkown -> Unknown --- src/you_get/extractors/icourses.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/icourses.py b/src/you_get/extractors/icourses.py index ec70f64c..606e21e6 100644 --- a/src/you_get/extractors/icourses.py +++ b/src/you_get/extractors/icourses.py @@ -110,7 +110,7 @@ def icourses_playlist_download(url, output_dir='.', **kwargs): video_list = re.findall(resid_courseid_patt, page) if not video_list: - raise Exception('Unkown url pattern') + raise Exception('Unknown url pattern') for video in video_list: video_url = change_for_video_ip.format(video[0], video[1]) From aa151acaa3ee2ecb4a603aa82f87019a42dc2508 Mon Sep 17 00:00:00 2001 From: lxfly2000 Date: Sat, 10 Aug 2019 19:31:29 +0800 Subject: [PATCH 271/271] Fix AcFun Bangumi download. --- README.md | 2 +- src/you_get/extractors/acfun.py | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index e1551c9a..360b5d0b 100644 --- a/README.md +++ b/README.md @@ -403,7 +403,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the | **niconico
ニコニコ動画** | |✓| | | | **163
网易视频
网易云音乐** |
|✓| |✓| | 56网 | |✓| | | -| **AcFun** | |✓| | | +| **AcFun** | |✓| | | | **Baidu
百度贴吧** | |✓|✓| | | 爆米花网 | |✓| | | | **bilibili
哔哩哔哩** | |✓| | | diff --git a/src/you_get/extractors/acfun.py b/src/you_get/extractors/acfun.py index 3dacedf5..61f6cae8 100644 --- a/src/you_get/extractors/acfun.py +++ b/src/you_get/extractors/acfun.py @@ -124,10 +124,11 @@ def acfun_download(url, output_dir='.', merge=True, info_only=False, **kwargs): # bangumi elif re.match("https?://[^\.]*\.*acfun\.[^\.]+/bangumi/ab(\d+)", url): html = get_content(url) - title = match1(html, r'"title"\s*:\s*"([^"]+)"') - if match1(url, r'_(\d+)$'): # current P - title = title + " " + r1(r'active">([^<]*)', html) - vid = match1(html, r'videoId="(\d+)"') + tag_script = match1(html, r'') + json_text = tag_script[tag_script.find('{') : tag_script.find('};') + 1] + json_data = json.loads(json_text) + title = json_data['bangumiTitle'] + " " + json_data['episodeName'] + " " + json_data['title'] + vid = str(json_data['videoId']) up = "acfun" else: raise NotImplemented @@ -148,6 +149,6 @@ def acfun_download(url, output_dir='.', merge=True, info_only=False, **kwargs): **kwargs) -site_info = "AcFun.tv" +site_info = "AcFun.cn" download = acfun_download download_playlist = playlist_not_supported('acfun')