From 6fa81497d539a99fda2636126df3e485179a4edd Mon Sep 17 00:00:00 2001 From: MaxwellGoblin Date: Thu, 20 Jul 2017 17:14:05 +0800 Subject: [PATCH 001/271] [youtube]use mp4_audio track when no audio track for webm --- src/you_get/extractors/youtube.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py index 986906d6..3b412dc2 100644 --- a/src/you_get/extractors/youtube.py +++ b/src/you_get/extractors/youtube.py @@ -366,14 +366,22 @@ class YouTube(VideoExtractor): dash_url += '&signature={}'.format(sig) dash_size = stream['clen'] itag = stream['itag'] + audio_url = None + audio_size = None + try: + audio_url = dash_webm_a_url + audio_size = int(dash_webm_a_size) + except UnboundLocalError as e: + audio_url = dash_mp4_a_url + audio_size = int(dash_mp4_a_size) self.dash_streams[itag] = { 'quality': stream['size'], 'itag': itag, 'type': mimeType, 'mime': mimeType, 'container': 'webm', - 'src': [dash_url, dash_webm_a_url], - 'size': int(dash_size) + int(dash_webm_a_size) + 'src': [dash_url, audio_url], + 'size': int(dash_size) + int(audio_size) } def extract(self, **kwargs): From a1290be2ec14943c6a77cbb4d80c561083cee44e Mon Sep 17 00:00:00 2001 From: hellsof Date: Wed, 3 Jan 2018 11:43:26 +0800 Subject: [PATCH 002/271] fix https://v.qq.com/x/page/q0527wsyqpv.html --- src/you_get/extractors/qq.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/you_get/extractors/qq.py b/src/you_get/extractors/qq.py index 438c1f24..199df921 100644 --- a/src/you_get/extractors/qq.py +++ b/src/you_get/extractors/qq.py @@ -149,6 +149,8 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs): vid = "" if rurl: vid = rurl.split('/')[-1].split('.')[0] + if vid == "undefined": + vid = "" vid = vid if vid else url.split('/')[-1].split('.')[0] #https://v.qq.com/x/cover/ps6mnfqyrfo7es3/q0181hpdvo5.html? vid = vid if vid else match1(content, r'vid"*\s*:\s*"\s*([^"]+)"') #general fallback if not vid: From fb2c9368933d4ba59a622c65765c2368fd12c19d Mon Sep 17 00:00:00 2001 From: Justsoos Date: Mon, 19 Feb 2018 22:00:55 +0800 Subject: [PATCH 003/271] Add longzhu.com --- src/you_get/common.py | 1 + src/you_get/extractors/__init__.py | 1 + src/you_get/extractors/longzhu.py | 73 ++++++++++++++++++++++++++++++ 3 files changed, 75 insertions(+) create mode 100644 src/you_get/extractors/longzhu.py diff --git a/src/you_get/common.py b/src/you_get/common.py index a4a036a4..4ea144f1 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -74,6 +74,7 @@ SITES = { 'le' : 'le', 'letv' : 'le', 'lizhi' : 'lizhi', + 'longzhu' : 'longzhu', 'magisto' : 'magisto', 'metacafe' : 'metacafe', 'mgtv' : 'mgtv', diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index 46e5c89c..ec9e86ae 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -41,6 +41,7 @@ from .kugou import * from .kuwo import * from .le import * from .lizhi import * +from .longzhu import * from .magisto import * from .metacafe import * from .mgtv import * diff --git a/src/you_get/extractors/longzhu.py b/src/you_get/extractors/longzhu.py new file mode 100644 index 00000000..ed0cb084 --- /dev/null +++ b/src/you_get/extractors/longzhu.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python + +__all__ = ['longzhu_download'] + +import json +from ..common import ( + get_content, + match1, + print_info, + download_urls, + playlist_not_supported, +) +from ..common import player + +def longzhu_download(url, output_dir = '.', merge=True, info_only=False, **kwargs): + web_domain = url.split('/')[2] + if (web_domain == 'star.longzhu.com') or (web_domain == 'y.longzhu.com'): + domain = url.split('/')[3].split('?')[0] + m_url = 'http://m.longzhu.com/{0}'.format(domain) + m_html = get_content(m_url) + room_id_patt = r'var\s*roomId\s*=\s*(\d+);' + room_id = match1(m_html,room_id_patt) + + json_url = 'http://liveapi.plu.cn/liveapp/roomstatus?roomId={0}'.format(room_id) + content = get_content(json_url) + data = json.loads(content) + streamUri = data['streamUri'] + if len(streamUri) <= 4: + raise ValueError('The live stream is not online!') + title = data['title'] + streamer = data['userName'] + title = str.format(streamer,': ',title) + + steam_api_url = 'http://livestream.plu.cn/live/getlivePlayurl?roomId={0}'.format(room_id) + content = get_content(steam_api_url) + data = json.loads(content) + isonline = data.get('isTransfer') + if isonline == '0': + raise ValueError('The live stream is not online!') + + real_url = data['playLines'][0]['urls'][0]['securityUrl'] + + print_info(site_info, title, 'flv', float('inf')) + + if not info_only: + download_urls([real_url], title, 'flv', None, output_dir, merge=merge) + + elif web_domain == 'replay.longzhu.com': + videoid = match1(url, r'(\d+)$') + json_url = 'http://liveapi.longzhu.com/livereplay/getreplayfordisplay?videoId={0}'.format(videoid) + content = get_content(json_url) + data = json.loads(content) + + username = data['userName'] + title = data['title'] + title = str.format(username,':',title) + real_url = data['videoUrl'] + + if player: + print_info('Longzhu Video', title, 'm3u8', 0) + download_urls([real_url], title, 'm3u8', 0, output_dir, merge=merge) + else: + urls = general_m3u8_extractor(real_url) + print_info('Longzhu Video', title, 'm3u8', 0) + if not info_only: + download_urls(urls, title, 'ts', 0, output_dir=output_dir, merge=merge, **kwargs) + + else: + raise ValueError('Wrong url or unsupported link ... {0}'.format(url)) + +site_info = 'longzhu.com' +download = longzhu_download +download_playlist = playlist_not_supported('longzhu') \ No newline at end of file From a22dce896cd13ca398f748b93c73ea172b837df1 Mon Sep 17 00:00:00 2001 From: John128217 <35967907+John128217@users.noreply.github.com> Date: Wed, 21 Feb 2018 14:23:21 -0800 Subject: [PATCH 004/271] Added an auto rename option and fixed the force option I've noticed that if I am downloading two videos with same names from youtube (e.g. https://www.youtube.com/watch?v=606hmlA_nxw and https://www.youtube.com/watch?v=CLrXTnggUeY), only one of them will be saved (usually the bigger one according to the original script "os.path.getsize(output_filepath) >= total_size * 0.9"). However, I want them both while preserving their names from youtube. So I modified the source code. It looks like there are a lot of changes, but I just added an indent and everything changed. Also, I've noticed that "force" is not working at all. I fixed that issue. --- src/you_get/common.py | 63 +++++++++++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index a4a036a4..76cf5b97 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -134,6 +134,7 @@ player = None extractor_proxy = None cookies = None output_filename = None +auto_rename = False fake_headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', # noqa @@ -598,27 +599,40 @@ def url_save( tmp_headers['Referer'] = refer file_size = url_size(url, faker=faker, headers=tmp_headers) - if os.path.exists(filepath): - if not force and file_size == os.path.getsize(filepath): - if not is_part: - if bar: - bar.done() - print( - 'Skipping {}: file already exists'.format( - tr(os.path.basename(filepath)) + continue_renameing = True + while continue_renameing: + continue_renameing = False + if os.path.exists(filepath): + if not force and file_size == os.path.getsize(filepath): + if not is_part: + if bar: + bar.done() + print( + 'Skipping {}: file already exists'.format( + tr(os.path.basename(filepath)) + ) ) - ) + else: + if bar: + bar.update_received(file_size) + return else: - if bar: - bar.update_received(file_size) - return - else: - if not is_part: - if bar: - bar.done() - print('Overwriting %s' % tr(os.path.basename(filepath)), '...') - elif not os.path.exists(os.path.dirname(filepath)): - os.mkdir(os.path.dirname(filepath)) + if not is_part: + if bar: + bar.done() + if not force and auto_rename: + path, ext = os.path.basename(filepath).rsplit('.', 1) + if (re.compile(' \(\d\)').match(path[-4:]) is None): + thisfile = path + ' (1).' + ext + else: + thisfile = path[:-2] + str(int(path[-2]) + 1) + ').' + ext + filepath = os.path.join(os.path.dirname(filepath), thisfile) + print('Changing name to %s' % tr(os.path.basename(filepath)), '...') + continue_renameing = True + continue + print('Overwriting %s' % tr(os.path.basename(filepath)), '...') + elif not os.path.exists(os.path.dirname(filepath)): + os.mkdir(os.path.dirname(filepath)) temp_filepath = filepath + '.download' if file_size != float('inf') \ else filepath @@ -883,7 +897,7 @@ def download_urls( output_filepath = os.path.join(output_dir, output_filename) if total_size: - if not force and os.path.exists(output_filepath) \ + if not force and os.path.exists(output_filepath) and not auto_rename\ and os.path.getsize(output_filepath) >= total_size * 0.9: print('Skipping %s: file already exists' % output_filepath) print() @@ -1370,6 +1384,10 @@ def script_main(download, download_playlist, **kwargs): '-l', '--playlist', action='store_true', help='Prefer to download a playlist' ) + download_grp.add_argument( + '-a', '--auto-rename', action='store_true', default=False, + help='Auto rename same name different files' + ) proxy_grp = parser.add_argument_group('Proxy options') proxy_grp = proxy_grp.add_mutually_exclusive_group() @@ -1414,11 +1432,16 @@ def script_main(download, download_playlist, **kwargs): global player global extractor_proxy global output_filename + global auto_rename output_filename = args.output_filename extractor_proxy = args.extractor_proxy info_only = args.info + if args.force: + force = True + if args.auto_rename: + auto_rename = True if args.url: dry_run = True if args.json: From c7290c7c2fa5bbf9c4623cf5694b742212d74df7 Mon Sep 17 00:00:00 2001 From: John128217 <35967907+John128217@users.noreply.github.com> Date: Wed, 21 Feb 2018 21:55:28 -0800 Subject: [PATCH 005/271] A number bug fixed Now if you can have more than 10 videos that have same names. --- src/you_get/common.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 76cf5b97..0d1b1810 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -622,10 +622,13 @@ def url_save( bar.done() if not force and auto_rename: path, ext = os.path.basename(filepath).rsplit('.', 1) - if (re.compile(' \(\d\)').match(path[-4:]) is None): + finder = re.compile(' \([1-9]\d*?\)$') + if (finder.search(path) is None): thisfile = path + ' (1).' + ext else: - thisfile = path[:-2] + str(int(path[-2]) + 1) + ').' + ext + def numreturn(a): + return ' (' + str(int(a.group()[2:-1]) + 1) + ').' + thisfile = finder.sub(numreturn, path) + ext filepath = os.path.join(os.path.dirname(filepath), thisfile) print('Changing name to %s' % tr(os.path.basename(filepath)), '...') continue_renameing = True From b4850f5a5907a263d36d7d78e231f86e2321bd4c Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 22 Feb 2018 17:21:43 +0100 Subject: [PATCH 006/271] [common] indent! --- src/you_get/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 4ea144f1..b4d57841 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -74,7 +74,7 @@ SITES = { 'le' : 'le', 'letv' : 'le', 'lizhi' : 'lizhi', - 'longzhu' : 'longzhu', + 'longzhu' : 'longzhu', 'magisto' : 'magisto', 'metacafe' : 'metacafe', 'mgtv' : 'mgtv', From 0b50fdfab430cff3b1e02c17def260ae0a5b47a3 Mon Sep 17 00:00:00 2001 From: perror <15058342792@163.com> Date: Wed, 28 Feb 2018 16:45:48 +0800 Subject: [PATCH 007/271] [ixigua] fix URL request error and video download error and video encryption parameters acquisition --- src/you_get/extractors/ixigua.py | 40 ++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/src/you_get/extractors/ixigua.py b/src/you_get/extractors/ixigua.py index aaed195d..0c668e82 100644 --- a/src/you_get/extractors/ixigua.py +++ b/src/you_get/extractors/ixigua.py @@ -5,30 +5,35 @@ import random import binascii from ..common import * -def get_video_id(text): - re_id = r"videoId: '(.*?)'" - return re.findall(re_id, text)[0] +headers = { + 'User-Agent': 'Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36' + ' (KHTML, like Gecko) Chrome/61.0.3163.100 Mobile Safari/537.36' +} + def get_r(): return str(random.random())[2:] + def right_shift(val, n): return val >> n if val >= 0 else (val + 0x100000000) >> n + def get_s(text): """get video info""" - id = get_video_id(text) + js_data = json.loads(text) + id = js_data['data']['video_id'] p = get_r() url = 'http://i.snssdk.com/video/urls/v/1/toutiao/mp4/%s' % id n = parse.urlparse(url).path + '?r=%s' % p c = binascii.crc32(n.encode('utf-8')) s = right_shift(c, 0) - title = ''.join(re.findall(r"title: '(.*?)',", text)) - return url + '?r=%s&s=%s' % (p, s), title + return url + '?r=%s&s=%s' % (p, s), js_data['data']['title'] + def get_moment(url, user_id, base_url, video_list): """Recursively obtaining a video list""" - video_list_data = json.loads(get_content(url)) + video_list_data = json.loads(get_content(url, headers=headers)) if not video_list_data['next']['max_behot_time']: return video_list [video_list.append(i["display_url"]) for i in video_list_data["data"]] @@ -41,23 +46,33 @@ def get_moment(url, user_id, base_url, video_list): } return get_moment(**_param) + def ixigua_download(url, output_dir='.', info_only=False, **kwargs): """ Download a single video Sample URL: https://www.ixigua.com/a6487187567887254029/#mid=59051127876 """ try: - video_info_url, title = get_s(get_content(url)) - video_info = json.loads(get_content(video_info_url)) + video_page_id = re.findall('(\d+)', [i for i in url.split('/') if i][3])[0] if 'toutiao.com' in url \ + else re.findall('(\d+)', [i for i in url.split('/') if i][2])[0] + + video_start_info_url = r'https://m.ixigua.com/i{}/info/'.format(video_page_id) + video_info_url, title = get_s(get_content(video_start_info_url, headers=headers or kwargs.get('headers', {}))) + video_info = json.loads(get_content(video_info_url, headers=headers or kwargs.get('headers', {}))) except Exception: raise NotImplementedError(url) try: video_url = base64.b64decode(video_info["data"]["video_list"]["video_1"]["main_url"]).decode() except Exception: raise NotImplementedError(url) - filetype, ext, size = url_info(video_url) + filetype, ext, size = url_info(video_url, headers=headers or kwargs.get('headers', {})) print_info(site_info, title, filetype, size) if not info_only: - download_urls([video_url], title, ext, size, output_dir=output_dir) + _param = { + 'output_dir': output_dir, + 'headers': headers or kwargs.get('headers', {}) + } + download_urls([video_url], title, ext, size, **_param) + def ixigua_download_playlist(url, output_dir='.', info_only=False, **kwargs): """Download all video from the user's video list @@ -80,6 +95,7 @@ def ixigua_download_playlist(url, output_dir='.', info_only=False, **kwargs): for i in get_moment(**_param): ixigua_download(i, output_dir, info_only, **kwargs) + site_info = "ixigua.com" download = ixigua_download -download_playlist = ixigua_download_playlist \ No newline at end of file +download_playlist = ixigua_download_playlist From 7633898850f6ed30c78e1fb5bdb0f96b81d9d87a Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 1 Mar 2018 22:55:09 +0100 Subject: [PATCH 008/271] version 0.4.1040 --- src/you_get/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/version.py b/src/you_get/version.py index 2d4ff9d0..7e220d0d 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1025' +__version__ = '0.4.1040' From 1900f7608cc2756d5460c99eb792c8e0eb42e7f4 Mon Sep 17 00:00:00 2001 From: mq-liu Date: Wed, 7 Mar 2018 09:48:11 +0800 Subject: [PATCH 009/271] fix bilibili download fail the bilibili api has changed "https://interface.bilibili.com/v2/playurl?cid=33250486&appkey=84956560bc028eb7&otype=json&type=&quality=0&qn=0&sign=a1b0401c8bf70d676bab133fa032469f" --- src/you_get/extractors/bilibili.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index e5abccab..046d2cb1 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -23,7 +23,7 @@ from .youku import youku_download_by_vid class Bilibili(VideoExtractor): name = 'Bilibili' live_api = 'http://live.bilibili.com/api/playurl?cid={}&otype=json' - api_url = 'http://interface.bilibili.com/playurl?' + api_url = 'http://interface.bilibili.com/v2/playurl?' bangumi_api_url = 'http://bangumi.bilibili.com/player/web_api/playurl?' live_room_init_api_url = 'https://api.live.bilibili.com/room/v1/Room/room_init?id={}' live_room_info_api_url = 'https://api.live.bilibili.com/room/v1/Room/get_info?room_id={}' From 92eb72bc7d20370e2835ed78dad94c0accaa068a Mon Sep 17 00:00:00 2001 From: Kugel-Blitz <21170940+Kugel-Blitz@users.noreply.github.com> Date: Sun, 11 Mar 2018 10:06:19 +1300 Subject: [PATCH 010/271] Use 0513 when cookies are used 0507 doesn't seem to honour cookies when they're loaded. --- src/you_get/extractors/youku.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/youku.py b/src/you_get/extractors/youku.py index 9d74b9c8..d40d16c1 100644 --- a/src/you_get/extractors/youku.py +++ b/src/you_get/extractors/youku.py @@ -78,7 +78,10 @@ class Youku(VideoExtractor): self.api_error_code = None self.api_error_msg = None - self.ccode = '0513' + if cookies: + self.ccode = '0513' + else: + self.ccode = '0507' self.utid = None def youku_ups(self): From 344502af0d223def2a9ed0fde3766f6f7490b23b Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 15 Mar 2018 22:38:23 +0100 Subject: [PATCH 011/271] [youku] resolve conflict --- src/you_get/extractors/youku.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/youku.py b/src/you_get/extractors/youku.py index 099552cf..d40d16c1 100644 --- a/src/you_get/extractors/youku.py +++ b/src/you_get/extractors/youku.py @@ -78,7 +78,10 @@ class Youku(VideoExtractor): self.api_error_code = None self.api_error_msg = None - self.ccode = '0507' + if cookies: + self.ccode = '0513' + else: + self.ccode = '0507' self.utid = None def youku_ups(self): From fdf53508388135917bb976319b2be01b96034634 Mon Sep 17 00:00:00 2001 From: Phun Date: Mon, 19 Mar 2018 12:48:38 +0800 Subject: [PATCH 012/271] fix the bug of v.qq.com --- src/you_get/extractors/qq.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/qq.py b/src/you_get/extractors/qq.py index 89dd7b61..5591e3eb 100644 --- a/src/you_get/extractors/qq.py +++ b/src/you_get/extractors/qq.py @@ -122,9 +122,9 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs): return #do redirect - if 'v.qq.com/page' in url: + if 'v.qq.com/x' in url: # for URLs like this: - # http://v.qq.com/page/k/9/7/k0194pwgw97.html + # https://v.qq.com/x/page/r05533mns3s.html new_url = url_locations([url])[0] if url == new_url: #redirect in js? From 3faaebb6762ff1bbea1e9b45b6dd348a92ddbcfc Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 22 Mar 2018 22:40:07 +0100 Subject: [PATCH 013/271] [qq] no more redirect (close #2586) --- src/you_get/extractors/qq.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/you_get/extractors/qq.py b/src/you_get/extractors/qq.py index 5591e3eb..ffca5a85 100644 --- a/src/you_get/extractors/qq.py +++ b/src/you_get/extractors/qq.py @@ -121,18 +121,6 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs): qq_download_by_vid(vid, vid, output_dir, merge, info_only) return - #do redirect - if 'v.qq.com/x' in url: - # for URLs like this: - # https://v.qq.com/x/page/r05533mns3s.html - new_url = url_locations([url])[0] - if url == new_url: - #redirect in js? - content = get_content(url) - url = match1(content,r'window\.location\.href="(.*?)"') - else: - url = new_url - if 'kuaibao.qq.com' in url or re.match(r'http://daxue.qq.com/content/content/id/\d+', url): content = get_content(url) vid = match1(content, r'vid\s*=\s*"\s*([^"]+)"') From 8979cd63eaa0979f249e4132a10b3706c4952b02 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 22 Mar 2018 22:44:33 +0100 Subject: [PATCH 014/271] [qq] break if no pay --- src/you_get/extractors/qq.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/you_get/extractors/qq.py b/src/you_get/extractors/qq.py index ffca5a85..7b1a6860 100644 --- a/src/you_get/extractors/qq.py +++ b/src/you_get/extractors/qq.py @@ -47,6 +47,9 @@ def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False): else: log.w(key_json['msg']) break + if key_json.get('filename') is None: + log.w(key_json['msg']) + break part_urls.append(url) _, ext, size = url_info(url) From 821e639e025296b4c041d0535ca4d95ad72ea397 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 22 Mar 2018 22:46:12 +0100 Subject: [PATCH 015/271] [youku] boom boom boom --- src/you_get/extractors/youku.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/you_get/extractors/youku.py b/src/you_get/extractors/youku.py index d40d16c1..bc4d8088 100644 --- a/src/you_get/extractors/youku.py +++ b/src/you_get/extractors/youku.py @@ -78,10 +78,7 @@ class Youku(VideoExtractor): self.api_error_code = None self.api_error_msg = None - if cookies: - self.ccode = '0513' - else: - self.ccode = '0507' + self.ccode = '0590' self.utid = None def youku_ups(self): From d3719ed4b62be2697e18755bcda7cb2249c8d7c1 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 24 Mar 2018 23:56:38 +0100 Subject: [PATCH 016/271] [bilibili] warn when target URL is a playlist --- src/you_get/extractors/bilibili.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 046d2cb1..7e5bdb37 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -167,8 +167,8 @@ class Bilibili(VideoExtractor): qq_download_by_vid(tc_flashvars, self.title, output_dir=kwargs['output_dir'], merge=kwargs['merge'], info_only=kwargs['info_only']) return - has_plist = re.search(r' Date: Mon, 26 Mar 2018 12:05:12 +0200 Subject: [PATCH 017/271] from ..common import general_m3u8_extractor Import the definition of __general_m3u8_extractor()__ for the function call on line 64... flake8 testing of https://github.com/soimort/you-get on Python 3.6.3 $ __flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics__ ``` ./src/you_get/extractors/longzhu.py:63:20: F821 undefined name 'general_m3u8_extractor' urls = general_m3u8_extractor(real_url) ^ 1 F821 undefined name 'general_m3u8_extractor' ``` --- src/you_get/extractors/longzhu.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/longzhu.py b/src/you_get/extractors/longzhu.py index ed0cb084..29b340c5 100644 --- a/src/you_get/extractors/longzhu.py +++ b/src/you_get/extractors/longzhu.py @@ -5,6 +5,7 @@ __all__ = ['longzhu_download'] import json from ..common import ( get_content, + general_m3u8_extractor, match1, print_info, download_urls, @@ -70,4 +71,4 @@ def longzhu_download(url, output_dir = '.', merge=True, info_only=False, **kwarg site_info = 'longzhu.com' download = longzhu_download -download_playlist = playlist_not_supported('longzhu') \ No newline at end of file +download_playlist = playlist_not_supported('longzhu') From 43923bc8f6c7df552e672a4e80aed0e58010964d Mon Sep 17 00:00:00 2001 From: JayXon Date: Thu, 29 Mar 2018 00:59:28 -0700 Subject: [PATCH 018/271] [youku] use default ckey 1080p works --- src/you_get/extractors/youku.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/youku.py b/src/you_get/extractors/youku.py index bc4d8088..fc1a5cd2 100644 --- a/src/you_get/extractors/youku.py +++ b/src/you_get/extractors/youku.py @@ -78,7 +78,10 @@ class Youku(VideoExtractor): self.api_error_code = None self.api_error_msg = None - self.ccode = '0590' + self.ccode = '0502' + # Found in http://g.alicdn.com/player/ykplayer/0.5.28/youku-player.min.js + # grep -oE '"[0-9a-zA-Z+/=]{256}"' youku-player.min.js + self.ckey = 'DIl58SLFxFNndSV1GFNnMQVYkx1PP5tKe1siZu/86PR1u/Wh1Ptd+WOZsHHWxysSfAOhNJpdVWsdVJNsfJ8Sxd8WKVvNfAS8aS8fAOzYARzPyPc3JvtnPHjTdKfESTdnuTW6ZPvk2pNDh4uFzotgdMEFkzQ5wZVXl2Pf1/Y6hLK0OnCNxBj3+nb0v72gZ6b0td+WOZsHHWxysSo/0y9D2K42SaB8Y/+aD2K42SaB8Y/+ahU+WOZsHcrxysooUeND' self.utid = None def youku_ups(self): @@ -86,6 +89,7 @@ class Youku(VideoExtractor): url += '&client_ip=192.168.1.1' url += '&utid=' + self.utid url += '&client_ts=' + str(int(time.time())) + url += '&ckey=' + urllib.parse.quote(self.ckey) if self.password_protected: url += '&password=' + self.password headers = dict(Referer=self.referer) From 6a9039aab110f40ba6a4fed5915d58cffee8aa46 Mon Sep 17 00:00:00 2001 From: hellsof Date: Wed, 11 Apr 2018 19:59:14 +0800 Subject: [PATCH 019/271] fix https://v.qq.com/x/page/d0552xbadkl.html https://y.qq.com/n/yqq/mv/v/g00268vlkzy.html --- src/you_get/extractors/qq.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/qq.py b/src/you_get/extractors/qq.py index 6a859b8b..915f1b4b 100644 --- a/src/you_get/extractors/qq.py +++ b/src/you_get/extractors/qq.py @@ -140,7 +140,8 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs): vid = "" if rurl: vid = rurl.split('/')[-1].split('.')[0] - if vid == "undefined": + # https://v.qq.com/x/page/d0552xbadkl.html https://y.qq.com/n/yqq/mv/v/g00268vlkzy.html + if vid == "undefined" or vid == "index": vid = "" vid = vid if vid else url.split('/')[-1].split('.')[0] #https://v.qq.com/x/cover/ps6mnfqyrfo7es3/q0181hpdvo5.html? vid = vid if vid else match1(content, r'vid"*\s*:\s*"\s*([^"]+)"') #general fallback From ead0979ca133e75b62835c3d3ac2783955534a0f Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 12 Apr 2018 03:19:12 +0200 Subject: [PATCH 020/271] [universal] relative path lah --- src/you_get/extractors/universal.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/you_get/extractors/universal.py b/src/you_get/extractors/universal.py index 633cf55b..b6bb68b1 100644 --- a/src/you_get/extractors/universal.py +++ b/src/you_get/extractors/universal.py @@ -67,6 +67,14 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg urls += re.findall(r'href="(https?://[^"]+\.png)"', page, re.I) urls += re.findall(r'href="(https?://[^"]+\.gif)"', page, re.I) + # relative path + rel_urls = [] + rel_urls += re.findall(r'href="(\.[^"]+\.jpe?g)"', page, re.I) + rel_urls += re.findall(r'href="(\.[^"]+\.png)"', page, re.I) + rel_urls += re.findall(r'href="(\.[^"]+\.gif)"', page, re.I) + for rel_url in rel_urls: + urls += [ r1(r'(.*/)', url) + rel_url ] + # MPEG-DASH MPD mpd_urls = re.findall(r'src="(https?://[^"]+\.mpd)"', page) for mpd_url in mpd_urls: From 25aa2ac2e5f5b408edfc53f64a6706a716f0e0c3 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 15 Apr 2018 16:34:15 +0200 Subject: [PATCH 021/271] [universal] better extraction of title and ext --- src/you_get/extractors/universal.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/you_get/extractors/universal.py b/src/you_get/extractors/universal.py index b6bb68b1..e343d4cd 100644 --- a/src/you_get/extractors/universal.py +++ b/src/you_get/extractors/universal.py @@ -112,10 +112,9 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg else: # direct download - filename = parse.unquote(url.split('/')[-1]) - title = '.'.join(filename.split('.')[:-1]) - ext = filename.split('.')[-1] - _, _, size = url_info(url, faker=True) + filename = parse.unquote(url.split('/')[-1]) or parse.unquote(url.split('/')[-2]) + title = '.'.join(filename.split('.')[:-1]) or filename + _, ext, size = url_info(url, faker=True) print_info(site_info, title, ext, size) if not info_only: download_urls([url], title, ext, size, From c77f29861c27725811c54285f351fc120279d75c Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 15 Apr 2018 17:07:46 +0200 Subject: [PATCH 022/271] [universal] support Open Graph og:video:url --- src/you_get/extractors/universal.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/you_get/extractors/universal.py b/src/you_get/extractors/universal.py index e343d4cd..573d8eea 100644 --- a/src/you_get/extractors/universal.py +++ b/src/you_get/extractors/universal.py @@ -31,6 +31,19 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg if page_title: page_title = unescape_html(page_title) + meta_videos = re.findall(r' Date: Sun, 15 Apr 2018 17:13:08 +0200 Subject: [PATCH 023/271] version 0.4.1060 --- src/you_get/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/version.py b/src/you_get/version.py index 7e220d0d..4d91c55d 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1040' +__version__ = '0.4.1060' From b28d78f71d50369cb6d306ef3e68430dedf86f1a Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 19 Apr 2018 03:22:18 +0200 Subject: [PATCH 024/271] [twitter] support twitter moments --- src/you_get/extractors/twitter.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/twitter.py b/src/you_get/extractors/twitter.py index 9c5acb31..894439aa 100644 --- a/src/you_get/extractors/twitter.py +++ b/src/you_get/extractors/twitter.py @@ -18,6 +18,17 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs) if re.match(r'https?://mobile', url): # normalize mobile URL url = 'https://' + match1(url, r'//mobile\.(.+)') + if re.match(r'https?://twitter\.com/i/moments/', url): # moments + html = get_html(url) + paths = re.findall(r'data-permalink-path="([^"]+)"', html) + for path in paths: + twitter_download('https://twitter.com' + path, + output_dir=output_dir, + merge=merge, + info_only=info_only, + **kwargs) + return + html = get_html(url) screen_name = r1(r'data-screen-name="([^"]*)"', html) or \ r1(r' Date: Sun, 22 Apr 2018 10:38:40 +0800 Subject: [PATCH 025/271] fix bilibili update xml-url to get all format of video --- src/you_get/extractors/bilibili.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 7e5bdb37..d23bbe5c 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -28,7 +28,8 @@ class Bilibili(VideoExtractor): live_room_init_api_url = 'https://api.live.bilibili.com/room/v1/Room/room_init?id={}' live_room_info_api_url = 'https://api.live.bilibili.com/room/v1/Room/get_info?room_id={}' - SEC1 = '1c15888dc316e05a15fdd0a02ed6584f' + #SEC1 = '1c15888dc316e05a15fdd0a02ed6584f' + SEC1 = '94aba54af9065f71de72f5508f1cd42e' SEC2 = '9b288147e5474dd2aa67085f716c560d' stream_types = [ {'id': 'hdflv'}, @@ -44,7 +45,7 @@ class Bilibili(VideoExtractor): @staticmethod def bilibili_stream_type(urls): url = urls[0] - if 'hd.flv' in url or '-112.flv' in url: + if 'hd.flv' in url or '-80.flv' in url: return 'hdflv', 'flv' if '-64.flv' in url: return 'flv720', 'flv' @@ -59,7 +60,8 @@ class Bilibili(VideoExtractor): def api_req(self, cid, quality, bangumi, bangumi_movie=False, **kwargs): ts = str(int(time.time())) if not bangumi: - params_str = 'cid={}&player=1&quality={}&ts={}'.format(cid, quality, ts) + #params_str = 'cid={}&player=1&quality={}&ts={}'.format(cid, quality, ts) + params_str = 'appkey=84956560bc028eb7&cid={}&otype=xml&qn={}&quality={}&type='.format(cid, quality, quality) chksum = hashlib.md5(bytes(params_str+self.SEC1, 'utf8')).hexdigest() api_url = self.api_url + params_str + '&sign=' + chksum else: @@ -97,7 +99,7 @@ class Bilibili(VideoExtractor): quality = 'hdflv' if bangumi else 'flv' info_only = kwargs.get('info_only') - for qlt in range(4, -1, -1): + for qlt in [116,112,80,74,64,32,16,15]: api_xml = self.api_req(cid, qlt, bangumi, **kwargs) self.parse_bili_xml(api_xml) if not info_only or stream_id: From d057a49e5b4222cb69b47008cb9e0af1b6b0209f Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 24 Apr 2018 14:46:38 +0200 Subject: [PATCH 026/271] [common] url_to_module: quote non-ASCII characters in URL --- src/you_get/common.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/you_get/common.py b/src/you_get/common.py index 11200d10..1a6cac2b 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -1581,6 +1581,9 @@ def url_to_module(url): domain = r1(r'(\.[^.]+\.[^.]+)$', video_host) or video_host assert domain, 'unsupported url: ' + url + # all non-ASCII code points must be quoted (percent-encoded UTF-8) + url = ''.join([ch if ord(ch) in range(128) else parse.quote(ch) for ch in url]) + k = r1(r'([^.]+)', domain) if k in SITES: return ( From ff6deaf2bde4a8e81094c7ff5893fa4d9b30efb0 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 24 Apr 2018 14:48:20 +0200 Subject: [PATCH 027/271] [tumblr] fallback to universal_download --- src/you_get/extractors/tumblr.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/you_get/extractors/tumblr.py b/src/you_get/extractors/tumblr.py index 5817e548..fe4973be 100644 --- a/src/you_get/extractors/tumblr.py +++ b/src/you_get/extractors/tumblr.py @@ -70,6 +70,11 @@ def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs): real_url = r1(r']+tumblr_video_container[^>]+>]+src=[\'"]([^\'"]*)[\'"]', html) + + if iframe_url is None: + universal_download(url, output_dir, merge=merge, info_only=info_only, **kwargs) + return + if iframe_url: iframe_html = get_content(iframe_url, headers=fake_headers) real_url = r1(r']*>[\n ]*]+src=[\'"]([^\'"]*)[\'"]', iframe_html) From 44960677c4c315e479d3b2015582f98f32d40c48 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 25 Apr 2018 22:30:46 +0200 Subject: [PATCH 028/271] [common] use quoted video_host and video_url as well --- src/you_get/common.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/you_get/common.py b/src/you_get/common.py index 1a6cac2b..e3000854 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -1583,6 +1583,8 @@ def url_to_module(url): # all non-ASCII code points must be quoted (percent-encoded UTF-8) url = ''.join([ch if ord(ch) in range(128) else parse.quote(ch) for ch in url]) + video_host = r1(r'https?://([^/]+)/', url) + video_url = r1(r'https?://[^/]+(.*)', url) k = r1(r'([^.]+)', domain) if k in SITES: From fe34688d07872e18fa0127c969a4f05152e3342a Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 25 Apr 2018 22:42:23 +0200 Subject: [PATCH 029/271] [universal] fix my brain damage since 2015 (2c7aa3b) --- src/you_get/extractors/universal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/universal.py b/src/you_get/extractors/universal.py index 573d8eea..57994b9c 100644 --- a/src/you_get/extractors/universal.py +++ b/src/you_get/extractors/universal.py @@ -112,7 +112,7 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg for candy in candies: try: mime, ext, size = url_info(candy['url'], faker=True) - if not size: size = float('Int') + if not size: size = float('Inf') except: continue else: From bcc98c5a5cfae4cd13487f0a51662ede35e746bd Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 25 Apr 2018 22:59:39 +0200 Subject: [PATCH 030/271] [universal] use faker only if necessary --- src/you_get/extractors/universal.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/you_get/extractors/universal.py b/src/you_get/extractors/universal.py index 57994b9c..6a1c2d30 100644 --- a/src/you_get/extractors/universal.py +++ b/src/you_get/extractors/universal.py @@ -111,16 +111,25 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg for candy in candies: try: - mime, ext, size = url_info(candy['url'], faker=True) - if not size: size = float('Inf') + try: + mime, ext, size = url_info(candy['url'], faker=False) + assert size + except: + mime, ext, size = url_info(candy['url'], faker=True) + if not size: size = float('Inf') except: continue else: print_info(site_info, candy['title'], ext, size) if not info_only: - download_urls([candy['url']], candy['title'], ext, size, - output_dir=output_dir, merge=merge, - faker=True) + try: + download_urls([candy['url']], candy['title'], ext, size, + output_dir=output_dir, merge=merge, + faker=False) + except: + download_urls([candy['url']], candy['title'], ext, size, + output_dir=output_dir, merge=merge, + faker=True) return else: From 4f1b609d71a04672a2b95b4fb13f0ba486e57df0 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 28 Apr 2018 18:59:52 +0200 Subject: [PATCH 031/271] [bilibili] fix title --- src/you_get/extractors/bilibili.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index d23bbe5c..916782af 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -130,6 +130,9 @@ class Bilibili(VideoExtractor): m = re.search(r'(.*?)', self.page) or re.search(r'', self.page) if m is not None: self.title = m.group(1) + s = re.search(r'([^<]+)', m.group(1)) + if s: + self.title = unescape_html(s.group(1)) if self.title is None: m = re.search(r'property="og:title" content="([^"]+)"', self.page) if m is not None: From 18d3cf0eb424fa92473141c2af6a9d0183550a72 Mon Sep 17 00:00:00 2001 From: QYLGithub <15058342792@163.com> Date: Sun, 29 Apr 2018 11:38:49 +0800 Subject: [PATCH 032/271] Call toutiao.py method --- src/you_get/extractors/ixigua.py | 98 ++------------------------------ 1 file changed, 5 insertions(+), 93 deletions(-) diff --git a/src/you_get/extractors/ixigua.py b/src/you_get/extractors/ixigua.py index 0c668e82..bc19b1d0 100644 --- a/src/you_get/extractors/ixigua.py +++ b/src/you_get/extractors/ixigua.py @@ -1,101 +1,13 @@ #!/usr/bin/env python __all__ = ['ixigua_download', 'ixigua_download_playlist'] -import base64 -import random -import binascii -from ..common import * - -headers = { - 'User-Agent': 'Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36' - ' (KHTML, like Gecko) Chrome/61.0.3163.100 Mobile Safari/537.36' -} +from .toutiao import download as toutiao_download +from .toutiao import download_playlist as toutiao_download_playlist -def get_r(): - return str(random.random())[2:] - - -def right_shift(val, n): - return val >> n if val >= 0 else (val + 0x100000000) >> n - - -def get_s(text): - """get video info""" - js_data = json.loads(text) - id = js_data['data']['video_id'] - p = get_r() - url = 'http://i.snssdk.com/video/urls/v/1/toutiao/mp4/%s' % id - n = parse.urlparse(url).path + '?r=%s' % p - c = binascii.crc32(n.encode('utf-8')) - s = right_shift(c, 0) - return url + '?r=%s&s=%s' % (p, s), js_data['data']['title'] - - -def get_moment(url, user_id, base_url, video_list): - """Recursively obtaining a video list""" - video_list_data = json.loads(get_content(url, headers=headers)) - if not video_list_data['next']['max_behot_time']: - return video_list - [video_list.append(i["display_url"]) for i in video_list_data["data"]] - max_behot_time = video_list_data['next']['max_behot_time'] - _param = { - 'user_id': user_id, - 'base_url': base_url, - 'video_list': video_list, - 'url': base_url.format(user_id=user_id, max_behot_time=max_behot_time), - } - return get_moment(**_param) - - -def ixigua_download(url, output_dir='.', info_only=False, **kwargs): - """ Download a single video - Sample URL: https://www.ixigua.com/a6487187567887254029/#mid=59051127876 - """ - try: - video_page_id = re.findall('(\d+)', [i for i in url.split('/') if i][3])[0] if 'toutiao.com' in url \ - else re.findall('(\d+)', [i for i in url.split('/') if i][2])[0] - - video_start_info_url = r'https://m.ixigua.com/i{}/info/'.format(video_page_id) - video_info_url, title = get_s(get_content(video_start_info_url, headers=headers or kwargs.get('headers', {}))) - video_info = json.loads(get_content(video_info_url, headers=headers or kwargs.get('headers', {}))) - except Exception: - raise NotImplementedError(url) - try: - video_url = base64.b64decode(video_info["data"]["video_list"]["video_1"]["main_url"]).decode() - except Exception: - raise NotImplementedError(url) - filetype, ext, size = url_info(video_url, headers=headers or kwargs.get('headers', {})) - print_info(site_info, title, filetype, size) - if not info_only: - _param = { - 'output_dir': output_dir, - 'headers': headers or kwargs.get('headers', {}) - } - download_urls([video_url], title, ext, size, **_param) - - -def ixigua_download_playlist(url, output_dir='.', info_only=False, **kwargs): - """Download all video from the user's video list - Sample URL: https://www.ixigua.com/c/user/71141690831/ - """ - if 'user' not in url: - raise NotImplementedError(url) - user_id = url.split('/')[-2] - max_behot_time = 0 - if not user_id: - raise NotImplementedError(url) - base_url = "https://www.ixigua.com/c/user/article/?user_id={user_id}" \ - "&max_behot_time={max_behot_time}&max_repin_time=0&count=20&page_type=0" - _param = { - 'user_id': user_id, - 'base_url': base_url, - 'video_list': [], - 'url': base_url.format(user_id=user_id, max_behot_time=max_behot_time), - } - for i in get_moment(**_param): - ixigua_download(i, output_dir, info_only, **kwargs) +def ixigua_download(url, output_dir='.', merge=True, info_only=False, **kwargs): + return toutiao_download(url.replace('ixigua', '365yg')) site_info = "ixigua.com" download = ixigua_download -download_playlist = ixigua_download_playlist +download_playlist = toutiao_download_playlist \ No newline at end of file From 351173ba797ad1ebc830ed1de223f48c3570248e Mon Sep 17 00:00:00 2001 From: yangxiaochen Date: Thu, 10 May 2018 20:32:59 +0800 Subject: [PATCH 033/271] [qq] fix some error cases("check vid&filename failed" and "format invalid") --- src/you_get/extractors/qq.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/qq.py b/src/you_get/extractors/qq.py index 915f1b4b..15116b0c 100644 --- a/src/you_get/extractors/qq.py +++ b/src/you_get/extractors/qq.py @@ -18,11 +18,14 @@ def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False): host = video_json['vl']['vi'][0]['ul']['ui'][0]['url'] streams = video_json['fl']['fi'] seg_cnt = video_json['vl']['vi'][0]['cl']['fc'] + filename = video_json['vl']['vi'][0]['fn'] if seg_cnt == 0: seg_cnt = 1 + else: + fn_pre, magic_str, video_type = filename.split('.') best_quality = streams[-1]['name'] - part_format_id = streams[-1]['id'] + #part_format_id = streams[-1]['id'] part_urls= [] total_size = 0 @@ -31,7 +34,17 @@ def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False): # filename = fn_pre + '.mp4' #else: # filename = fn_pre + '.p' + str(part_format_id % 10000) + '.' + str(part) + '.mp4' - filename = fn_pre + '.p' + str(part_format_id % 10000) + '.' + str(part) + '.mp4' + #filename = fn_pre + '.p' + str(part_format_id % 10000) + '.' + str(part) + '.mp4' + + # fix some error cases("check vid&filename failed" and "format invalid") + # https://v.qq.com/x/page/q06058th9ll.html + # https://v.qq.com/x/page/t060789a21e.html + if seg_cnt == 1: + part_format_id = video_json['vl']['vi'][0]['cl']['keyid'].split('.')[-1] + else: + part_format_id = video_json['vl']['vi'][0]['cl']['ci'][part - 1]['keyid'].split('.')[1] + filename = '.'.join([fn_pre, magic_str, str(part), video_type]) + key_api = "http://vv.video.qq.com/getkey?otype=json&platform=11&format={}&vid={}&filename={}&appver=3.2.19.333".format(part_format_id, vid, filename) part_info = get_content(key_api) key_json = json.loads(match1(part_info, r'QZOutputJson=(.*)')[:-1]) From 24578efa1e34c24c7828d82cc27c70b478e6740a Mon Sep 17 00:00:00 2001 From: perror <15058342792@163.com> Date: Fri, 11 May 2018 12:01:31 +0800 Subject: [PATCH 034/271] repair douyutv 403 error --- src/you_get/extractors/douyutv.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/you_get/extractors/douyutv.py b/src/you_get/extractors/douyutv.py index b7b15e74..72a41a0a 100644 --- a/src/you_get/extractors/douyutv.py +++ b/src/you_get/extractors/douyutv.py @@ -9,6 +9,10 @@ import hashlib import time import re +headers = { + 'user-agent': 'Mozilla/5.0 (iPad; CPU OS 8_1_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B466 Safari/600.1.4' + } + def douyutv_video_download(url, output_dir='.', merge=True, info_only=False, **kwargs): ep = 'http://vmobile.douyu.com/video/getInfo?vid=' patt = r'show/([0-9A-Za-z]+)' @@ -19,7 +23,7 @@ def douyutv_video_download(url, output_dir='.', merge=True, info_only=False, **k log.wtf('Unknown url pattern') vid = hit.group(1) - page = get_content(url) + page = get_content(url, headers=headers) hit = re.search(title_patt, page) if hit is None: title = vid @@ -35,21 +39,18 @@ def douyutv_video_download(url, output_dir='.', merge=True, info_only=False, **k urls = general_m3u8_extractor(m3u8_url) download_urls(urls, title, 'ts', 0, output_dir=output_dir, merge=merge, **kwargs) -def douyutv_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): + +def douyutv_download(url, output_dir='.', merge=True, info_only=False, **kwargs): if 'v.douyu.com/show/' in url: douyutv_video_download(url, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs) return - headers = { - 'user-agent': 'Mozilla/5.0 (iPad; CPU OS 8_1_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B466 Safari/600.1.4' - } - - url = re.sub(r'[w.]*douyu.com','m.douyu.com',url) + url = re.sub(r'[w.]*douyu.com', 'm.douyu.com', url) html = get_content(url, headers) room_id_patt = r'room_id\s*:\s*(\d+),' room_id = match1(html, room_id_patt) if room_id == "0": - room_id = url[url.rfind('/')+1:] + room_id = url[url.rfind('/') + 1:] api_url = "http://www.douyutv.com/api/v1/" args = "room/%s?aid=wp&client_sys=wp&time=%d" % (room_id, int(time.time())) @@ -60,7 +61,7 @@ def douyutv_download(url, output_dir = '.', merge = True, info_only = False, **k content = get_content(json_request_url, headers) json_content = json.loads(content) data = json_content['data'] - server_status = json_content.get('error',0) + server_status = json_content.get('error', 0) if server_status is not 0: raise ValueError("Server returned error:%s" % server_status) @@ -73,7 +74,8 @@ def douyutv_download(url, output_dir = '.', merge = True, info_only = False, **k print_info(site_info, title, 'flv', float('inf')) if not info_only: - download_url_ffmpeg(real_url, title, 'flv', params={}, output_dir = output_dir, merge = merge) + download_url_ffmpeg(real_url, title, 'flv', params={}, output_dir=output_dir, merge=merge) + site_info = "douyu.com" download = douyutv_download From 50bba5527b52121a1f41b75d763a38fe2432e73e Mon Sep 17 00:00:00 2001 From: yangxiaochen Date: Fri, 11 May 2018 17:12:01 +0800 Subject: [PATCH 035/271] [douyin] send the request without fake headers, the douyin website will return fake body or the 403 response! --- src/you_get/extractors/douyin.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/douyin.py b/src/you_get/extractors/douyin.py index 213837e7..20ce0e26 100644 --- a/src/you_get/extractors/douyin.py +++ b/src/you_get/extractors/douyin.py @@ -7,6 +7,7 @@ from ..common import ( url_size, print_info, get_content, + fake_headers, download_urls, playlist_not_supported, ) @@ -16,13 +17,13 @@ __all__ = ['douyin_download_by_url'] def douyin_download_by_url(url, **kwargs): - page_content = get_content(url) + page_content = get_content(url, headers=fake_headers) match_rule = re.compile(r'var data = \[(.*?)\];') video_info = json.loads(match_rule.findall(page_content)[0]) video_url = video_info['video']['play_addr']['url_list'][0] title = video_info['cha_list'][0]['cha_name'] video_format = 'mp4' - size = url_size(video_url) + size = url_size(video_url, faker=True) print_info( site_info='douyin.com', title=title, type=video_format, size=size @@ -30,6 +31,7 @@ def douyin_download_by_url(url, **kwargs): if not kwargs['info_only']: download_urls( urls=[video_url], title=title, ext=video_format, total_size=size, + faker=True, **kwargs ) From daf630e9d782c53878b77b33a891d8003e747a72 Mon Sep 17 00:00:00 2001 From: yangxiaochen Date: Fri, 11 May 2018 18:08:23 +0800 Subject: [PATCH 036/271] [douyin] fix if there is not title, you-get cannot works well https://www.douyin.com/share/video/6553248251821165832 --- src/you_get/extractors/douyin.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/douyin.py b/src/you_get/extractors/douyin.py index 20ce0e26..e39775f4 100644 --- a/src/you_get/extractors/douyin.py +++ b/src/you_get/extractors/douyin.py @@ -21,7 +21,13 @@ def douyin_download_by_url(url, **kwargs): match_rule = re.compile(r'var data = \[(.*?)\];') video_info = json.loads(match_rule.findall(page_content)[0]) video_url = video_info['video']['play_addr']['url_list'][0] - title = video_info['cha_list'][0]['cha_name'] + # fix: https://www.douyin.com/share/video/6553248251821165832 + # if there is no title, use desc + cha_list = video_info['cha_list'] + if cha_list: + title = cha_list[0]['cha_name'] + else: + title = video_info['desc'] video_format = 'mp4' size = url_size(video_url, faker=True) print_info( From 9b03331589e645d76b28fd9021b6d17426186695 Mon Sep 17 00:00:00 2001 From: Zheng Luo Date: Sat, 12 May 2018 20:08:13 -0400 Subject: [PATCH 037/271] Update cccode to 0510 for youku.py --- src/you_get/extractors/youku.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/youku.py b/src/you_get/extractors/youku.py index fc1a5cd2..bfdb014f 100644 --- a/src/you_get/extractors/youku.py +++ b/src/you_get/extractors/youku.py @@ -78,7 +78,7 @@ class Youku(VideoExtractor): self.api_error_code = None self.api_error_msg = None - self.ccode = '0502' + self.ccode = '0510' # Found in http://g.alicdn.com/player/ykplayer/0.5.28/youku-player.min.js # grep -oE '"[0-9a-zA-Z+/=]{256}"' youku-player.min.js self.ckey = 'DIl58SLFxFNndSV1GFNnMQVYkx1PP5tKe1siZu/86PR1u/Wh1Ptd+WOZsHHWxysSfAOhNJpdVWsdVJNsfJ8Sxd8WKVvNfAS8aS8fAOzYARzPyPc3JvtnPHjTdKfESTdnuTW6ZPvk2pNDh4uFzotgdMEFkzQ5wZVXl2Pf1/Y6hLK0OnCNxBj3+nb0v72gZ6b0td+WOZsHHWxysSo/0y9D2K42SaB8Y/+aD2K42SaB8Y/+ahU+WOZsHcrxysooUeND' From 9ae300029f5de925be4e1de304e2809ec694d668 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 15 May 2018 21:22:51 +0200 Subject: [PATCH 038/271] version 0.4.1077 --- src/you_get/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/version.py b/src/you_get/version.py index 4d91c55d..64ef890f 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1060' +__version__ = '0.4.1077' From f7179968f8147e304fb61b26e381f28d4da07dff Mon Sep 17 00:00:00 2001 From: cclauss Date: Fri, 18 May 2018 09:08:13 +0200 Subject: [PATCH 039/271] Add flake8 to the testing (again) Another attempt at #2145 --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index 2d780e81..ed1531b9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,6 +8,8 @@ python: - "3.6" - "nightly" - "pypy3" +before_install: pip install flake8 +before_script: flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics script: make test sudo: false notifications: From 59f544665ff89a270c7c1e11f90f423c7690929c Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 20 May 2018 13:00:12 +0200 Subject: [PATCH 040/271] [ixigua] remove undefined name (#2599) --- src/you_get/extractors/ixigua.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/ixigua.py b/src/you_get/extractors/ixigua.py index bc19b1d0..59133442 100644 --- a/src/you_get/extractors/ixigua.py +++ b/src/you_get/extractors/ixigua.py @@ -1,5 +1,6 @@ #!/usr/bin/env python -__all__ = ['ixigua_download', 'ixigua_download_playlist'] +__all__ = ['ixigua_download'] + from .toutiao import download as toutiao_download from .toutiao import download_playlist as toutiao_download_playlist @@ -10,4 +11,4 @@ def ixigua_download(url, output_dir='.', merge=True, info_only=False, **kwargs): site_info = "ixigua.com" download = ixigua_download -download_playlist = toutiao_download_playlist \ No newline at end of file +download_playlist = toutiao_download_playlist From 582d89e2f268ab1a72d6b065694760097c270702 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 20 May 2018 13:47:28 +0200 Subject: [PATCH 041/271] .travis.yml: skip flake8 on python 3.2 --- .travis.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index ed1531b9..9df327b0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,8 @@ python: - "nightly" - "pypy3" before_install: pip install flake8 -before_script: flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics +before_script: + - if [[ $TRAVIS_PYTHON_VERSION != '3.2'* ]]; then flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics; fi script: make test sudo: false notifications: From 25b1c25517fbfb71a9e997edb4dad991249da6f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9F=A9=E6=9E=97=E6=B3=BD?= <7954178+hanlz@users.noreply.github.com> Date: Mon, 21 May 2018 17:40:56 +0800 Subject: [PATCH 042/271] Move the warning message output to standard error. `ffmpeg -version` gives "ffmpeg version 2.8.14-0ubuntu0.16.04.1 Copyright (c) 2000-2018 the FFmpeg developers" on Ubuntu Xenial, which make int() failed during extracting version code. --- src/you_get/processor/ffmpeg.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/you_get/processor/ffmpeg.py b/src/you_get/processor/ffmpeg.py index 89d53e50..1e3bd7eb 100755 --- a/src/you_get/processor/ffmpeg.py +++ b/src/you_get/processor/ffmpeg.py @@ -3,6 +3,7 @@ import logging import os.path import subprocess +import sys from ..util.strings import parameterize from ..common import print_more_compatible as print @@ -25,8 +26,8 @@ def get_usable_ffmpeg(cmd): try: version = [int(i) for i in vers[2].split('.')] except: - print('It seems that your ffmpeg is a nightly build.') - print('Please switch to the latest stable if merging failed.') + print('It seems that your ffmpeg is a nightly build.', file=sys.stderr) + print('Please switch to the latest stable if merging failed.', file=sys.stderr) version = [1, 0] return cmd, 'ffprobe', version except: From d26482b9a92ccaaecd683dc4fb5f17a6519ce417 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 21 May 2018 22:59:51 +0200 Subject: [PATCH 043/271] [twitter] prevent mobile redirection --- src/you_get/extractors/twitter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/twitter.py b/src/you_get/extractors/twitter.py index 894439aa..9cc3c5c7 100644 --- a/src/you_get/extractors/twitter.py +++ b/src/you_get/extractors/twitter.py @@ -19,7 +19,7 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs) url = 'https://' + match1(url, r'//mobile\.(.+)') if re.match(r'https?://twitter\.com/i/moments/', url): # moments - html = get_html(url) + html = get_html(url, faker=True) paths = re.findall(r'data-permalink-path="([^"]+)"', html) for path in paths: twitter_download('https://twitter.com' + path, @@ -29,7 +29,7 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs) **kwargs) return - html = get_html(url) + html = get_html(url, faker=True) screen_name = r1(r'data-screen-name="([^"]*)"', html) or \ r1(r' Date: Sun, 27 May 2018 19:16:31 +0200 Subject: [PATCH 044/271] [instagram] download video_url --- src/you_get/extractors/instagram.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/you_get/extractors/instagram.py b/src/you_get/extractors/instagram.py index e06eba00..332d9b61 100755 --- a/src/you_get/extractors/instagram.py +++ b/src/you_get/extractors/instagram.py @@ -27,6 +27,8 @@ def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwarg for edge in edges: title = edge['node']['shortcode'] image_url = edge['node']['display_url'] + if 'video_url' in edge['node']: + image_url = edge['node']['video_url'] ext = image_url.split('.')[-1] size = int(get_head(image_url)['Content-Length']) print_info(site_info, title, ext, size) @@ -39,6 +41,8 @@ def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwarg else: title = info['entry_data']['PostPage'][0]['graphql']['shortcode_media']['shortcode'] image_url = info['entry_data']['PostPage'][0]['graphql']['shortcode_media']['display_url'] + if 'video_url' in info['entry_data']['PostPage'][0]['graphql']['shortcode_media']: + image_url =info['entry_data']['PostPage'][0]['graphql']['shortcode_media']['video_url'] ext = image_url.split('.')[-1] size = int(get_head(image_url)['Content-Length']) print_info(site_info, title, ext, size) From b7603a4db0cbe1578f471da946cfa07c2de01736 Mon Sep 17 00:00:00 2001 From: "Agent Fitz ;-)" Date: Tue, 29 May 2018 21:36:29 +0800 Subject: [PATCH 045/271] =?UTF-8?q?=E8=85=BE=E8=AE=AF=E8=A7=86=E9=A2=91?= =?UTF-8?q?=E9=BB=98=E8=AE=A4=E4=B8=8B=E8=BD=BDSHD=E8=A7=86=E9=A2=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 现在可以直接下载720P(SHD)的腾讯视频 --- src/you_get/extractors/qq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/qq.py b/src/you_get/extractors/qq.py index 15116b0c..4a67c57c 100644 --- a/src/you_get/extractors/qq.py +++ b/src/you_get/extractors/qq.py @@ -9,7 +9,7 @@ from .qie_video import download_by_url as qie_video_download from urllib.parse import urlparse,parse_qs def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False): - info_api = 'http://vv.video.qq.com/getinfo?otype=json&appver=3.2.19.333&platform=11&defnpayver=1&vid={}'.format(vid) + info_api = 'http://vv.video.qq.com/getinfo?otype=json&appver=3.2.19.333&platform=11&defnpayver=1&defn=shd&vid={}'.format(vid) info = get_content(info_api) video_json = json.loads(match1(info, r'QZOutputJson=(.*)')[:-1]) From 3653420fe9965df30e63e7ed0495d58fa4538195 Mon Sep 17 00:00:00 2001 From: kiss4u Date: Sat, 2 Jun 2018 23:15:44 +0800 Subject: [PATCH 046/271] fix some url format from v.qq.com https://v.qq.com/x/page/w0674l9yrrh.html http://v.sports.qq.com/#/cover/t0fqsm1y83r8v5j/a0026nvw5jr --- src/you_get/extractors/acfun.py | 2 +- src/you_get/extractors/bilibili.py | 2 +- src/you_get/extractors/qq.py | 29 +++++++++++++++++++++++------ 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/src/you_get/extractors/acfun.py b/src/you_get/extractors/acfun.py index c521422f..4b45c5e9 100644 --- a/src/you_get/extractors/acfun.py +++ b/src/you_get/extractors/acfun.py @@ -65,7 +65,7 @@ def acfun_download_by_vid(vid, title, output_dir='.', merge=True, info_only=Fals elif sourceType == 'tudou': tudou_download_by_iid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) elif sourceType == 'qq': - qq_download_by_vid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) + qq_download_by_vid(sourceId, title, True, output_dir=output_dir, merge=merge, info_only=info_only) elif sourceType == 'letv': letvcloud_download_by_vu(sourceId, '2d8c027396', title, output_dir=output_dir, merge=merge, info_only=info_only) elif sourceType == 'zhuzhan': diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 916782af..9e2b8bc0 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -169,7 +169,7 @@ class Bilibili(VideoExtractor): tc_flashvars = tc_flashvars.group(1) if tc_flashvars is not None: self.out = True - qq_download_by_vid(tc_flashvars, self.title, output_dir=kwargs['output_dir'], merge=kwargs['merge'], info_only=kwargs['info_only']) + qq_download_by_vid(tc_flashvars, self.title, True, output_dir=kwargs['output_dir'], merge=kwargs['merge'], info_only=kwargs['info_only']) return has_plist = re.search(r'"page":2', self.page) diff --git a/src/you_get/extractors/qq.py b/src/you_get/extractors/qq.py index 15116b0c..60fb751f 100644 --- a/src/you_get/extractors/qq.py +++ b/src/you_get/extractors/qq.py @@ -8,8 +8,14 @@ from .qie import download as qieDownload from .qie_video import download_by_url as qie_video_download from urllib.parse import urlparse,parse_qs -def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False): - info_api = 'http://vv.video.qq.com/getinfo?otype=json&appver=3.2.19.333&platform=11&defnpayver=1&vid={}'.format(vid) +def qq_download_by_vid(vid, title, default_from, output_dir='.', merge=True, info_only=False): + + if default_from: + platform = 11 + else: + platform = 4100201 + + info_api = 'http://vv.video.qq.com/getinfo?otype=json&appver=3.2.19.333&platform={}&defnpayver=1&vid={}'.format(platform, vid) info = get_content(info_api) video_json = json.loads(match1(info, r'QZOutputJson=(.*)')[:-1]) @@ -17,7 +23,8 @@ def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False): title = video_json['vl']['vi'][0]['ti'] host = video_json['vl']['vi'][0]['ul']['ui'][0]['url'] streams = video_json['fl']['fi'] - seg_cnt = video_json['vl']['vi'][0]['cl']['fc'] + seg_cnt = fc_cnt = video_json['vl']['vi'][0]['cl']['fc'] + filename = video_json['vl']['vi'][0]['fn'] if seg_cnt == 0: seg_cnt = 1 @@ -39,7 +46,10 @@ def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False): # fix some error cases("check vid&filename failed" and "format invalid") # https://v.qq.com/x/page/q06058th9ll.html # https://v.qq.com/x/page/t060789a21e.html - if seg_cnt == 1: + + if fc_cnt == 0: + # fix jason error + # https://v.qq.com/x/page/w0674l9yrrh.html part_format_id = video_json['vl']['vi'][0]['cl']['keyid'].split('.')[-1] else: part_format_id = video_json['vl']['vi'][0]['cl']['ci'][part - 1]['keyid'].split('.')[1] @@ -112,6 +122,8 @@ def kg_qq_download_by_shareid(shareid, output_dir='.', info_only=False, caption= def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs): """""" + default_from = True + if re.match(r'https?://egame.qq.com/live\?anchorid=(\d+)', url): from . import qq_egame qq_egame.qq_egame_download(url, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs) @@ -134,7 +146,7 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs): content = get_content(url) vids = matchall(content, [r'\?vid=(\w+)']) for vid in vids: - qq_download_by_vid(vid, vid, output_dir, merge, info_only) + qq_download_by_vid(vid, vid, default_from, output_dir, merge, info_only) return if 'kuaibao.qq.com' in url or re.match(r'http://daxue.qq.com/content/content/id/\d+', url): @@ -165,7 +177,12 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs): title = match1(content, r'"title":"([^"]+)"') if not title else title title = vid if not title else title #general fallback - qq_download_by_vid(vid, title, output_dir, merge, info_only) + if 'v.sports.qq.com' in url: + # fix url forbidden + # http://v.sports.qq.com/#/cover/t0fqsm1y83r8v5j/a0026nvw5jr + default_from = False + + qq_download_by_vid(vid, title, default_from, output_dir, merge, info_only) site_info = "QQ.com" download = qq_download From 928c8ccbe356800e582c8f0d60901da555ef7631 Mon Sep 17 00:00:00 2001 From: kiss4u Date: Sun, 3 Jun 2018 09:10:46 +0800 Subject: [PATCH 047/271] modify comments --- src/you_get/extractors/qq.py | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/src/you_get/extractors/qq.py b/src/you_get/extractors/qq.py index 60fb751f..c3c653a8 100644 --- a/src/you_get/extractors/qq.py +++ b/src/you_get/extractors/qq.py @@ -13,6 +13,7 @@ def qq_download_by_vid(vid, title, default_from, output_dir='.', merge=True, inf if default_from: platform = 11 else: + # fix return {,"msg":"cannot play outside"} platform = 4100201 info_api = 'http://vv.video.qq.com/getinfo?otype=json&appver=3.2.19.333&platform={}&defnpayver=1&vid={}'.format(platform, vid) @@ -32,24 +33,13 @@ def qq_download_by_vid(vid, title, default_from, output_dir='.', merge=True, inf fn_pre, magic_str, video_type = filename.split('.') best_quality = streams[-1]['name'] - #part_format_id = streams[-1]['id'] part_urls= [] total_size = 0 for part in range(1, seg_cnt+1): - #if seg_cnt == 1 and video_json['vl']['vi'][0]['vh'] <= 480: - # filename = fn_pre + '.mp4' - #else: - # filename = fn_pre + '.p' + str(part_format_id % 10000) + '.' + str(part) + '.mp4' - #filename = fn_pre + '.p' + str(part_format_id % 10000) + '.' + str(part) + '.mp4' - - # fix some error cases("check vid&filename failed" and "format invalid") - # https://v.qq.com/x/page/q06058th9ll.html - # https://v.qq.com/x/page/t060789a21e.html - if fc_cnt == 0: - # fix jason error - # https://v.qq.com/x/page/w0674l9yrrh.html + # fix json parsing error + # example:https://v.qq.com/x/page/w0674l9yrrh.html part_format_id = video_json['vl']['vi'][0]['cl']['keyid'].split('.')[-1] else: part_format_id = video_json['vl']['vi'][0]['cl']['ci'][part - 1]['keyid'].split('.')[1] @@ -179,7 +169,7 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs): if 'v.sports.qq.com' in url: # fix url forbidden - # http://v.sports.qq.com/#/cover/t0fqsm1y83r8v5j/a0026nvw5jr + # example:http://v.sports.qq.com/#/cover/t0fqsm1y83r8v5j/a0026nvw5jr default_from = False qq_download_by_vid(vid, title, default_from, output_dir, merge, info_only) From a10c98c5ab9aa5dc37e0c1baa652a9f456b2ca25 Mon Sep 17 00:00:00 2001 From: kiss4u Date: Mon, 4 Jun 2018 00:43:15 +0800 Subject: [PATCH 048/271] support for zhibo.tv example: http://v.zhibo.tv/31609372 example: http://video.zhibo.tv/video/details/d103057f-663e-11e8-9d83-525400ccac43.html --- README.md | 1 + src/you_get/common.py | 7 ++-- src/you_get/extractors/__init__.py | 1 + src/you_get/extractors/zhibo.py | 52 ++++++++++++++++++++++++++++++ 4 files changed, 58 insertions(+), 3 deletions(-) create mode 100644 src/you_get/extractors/zhibo.py diff --git a/README.md b/README.md index 86c5e4e9..f6f8efdc 100644 --- a/README.md +++ b/README.md @@ -416,6 +416,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the | 西瓜视频 | |✓| | | | 快手 | |✓|✓| | | 抖音 | |✓| | | +| 中国体育(TV) | |✓| | | For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page. diff --git a/src/you_get/common.py b/src/you_get/common.py index e3000854..30e533f1 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -24,6 +24,7 @@ sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='utf8') SITES = { '163' : 'netease', '56' : 'w56', + '365yg' : 'toutiao', 'acfun' : 'acfun', 'archive' : 'archive', 'baidu' : 'baidu', @@ -64,6 +65,7 @@ SITES = { 'iqiyi' : 'iqiyi', 'ixigua' : 'ixigua', 'isuntv' : 'suntv', + 'iwara' : 'iwara', 'joy' : 'joy', 'kankanews' : 'bilibili', 'khanacademy' : 'khan', @@ -82,6 +84,7 @@ SITES = { 'mixcloud' : 'mixcloud', 'mtv81' : 'mtv81', 'musicplayon' : 'musicplayon', + 'miaopai' : 'yixia', 'naver' : 'naver', '7gogo' : 'nanagogo', 'nicovideo' : 'nicovideo', @@ -118,14 +121,12 @@ SITES = { 'xiaojiadianvideo' : 'fc2video', 'ximalaya' : 'ximalaya', 'yinyuetai' : 'yinyuetai', - 'miaopai' : 'yixia', 'yizhibo' : 'yizhibo', 'youku' : 'youku', - 'iwara' : 'iwara', 'youtu' : 'youtube', 'youtube' : 'youtube', 'zhanqi' : 'zhanqi', - '365yg' : 'toutiao', + 'zhibo' : 'zhibo', } dry_run = False diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index ec9e86ae..649a911f 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -88,3 +88,4 @@ from .ted import * from .khan import * from .zhanqi import * from .kuaishou import * +from .zhibo import * \ No newline at end of file diff --git a/src/you_get/extractors/zhibo.py b/src/you_get/extractors/zhibo.py new file mode 100644 index 00000000..4aaa293e --- /dev/null +++ b/src/you_get/extractors/zhibo.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python + +__all__ = ['zhibo_download'] + +from ..common import * + +def zhibo_vedio_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): + # http://video.zhibo.tv/video/details/d103057f-663e-11e8-9d83-525400ccac43.html + + html = get_html(url) + title = r1(r'([\s\S]*)', html) + total_size = 0 + part_urls= [] + + video_html = r1(r'', html) + + # video_guessulike = r1(r"window.xgData =([s\S'\s\.]*)\'\;[\s\S]*window.vouchData", video_html) + video_url = r1(r"window.vurl = \'([s\S'\s\.]*)\'\;[\s\S]*window.imgurl", video_html) + part_urls.append(video_url) + ext = video_url.split('.')[-1] + + print_info(site_info, title, ext, total_size) + if not info_only: + download_urls(part_urls, title, ext, total_size, output_dir=output_dir, merge=merge) + + +def zhibo_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): + if 'video.zhibo.tv' in url: + zhibo_vedio_download(url, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs) + return + + # if 'v.zhibo.tv' in url: + # http://v.zhibo.tv/31609372 + html = get_html(url) + title = r1(r'([\s\S]*)', html) + is_live = r1(r"window.videoIsLive=\'([s\S'\s\.]*)\'\;[\s\S]*window.resDomain", html) + if is_live is not "1": + raise ValueError("The live stream is not online! (Errno:%s)" % is_live) + + ourStreamName = r1(r"window.ourStreamName=\'([s\S'\s\.]*)\'\;[\s\S]*window.rtmpDefaultSource", html) + rtmpPollUrl = r1(r"window.rtmpPollUrl=\'([s\S'\s\.]*)\'\;[\s\S]*window.hlsDefaultSource", html) + + #real_url = 'rtmp://220.194.213.56/live.zhibo.tv/8live/' + ourStreamName + real_url = rtmpPollUrl + ourStreamName + + print_info(site_info, title, 'flv', float('inf')) + if not info_only: + download_url_ffmpeg(real_url, title, 'flv', params={}, output_dir=output_dir, merge=merge) + +site_info = "zhibo.tv" +download = zhibo_download +download_playlist = playlist_not_supported('zhibo') From ebbe13e88e78e2f6eff80a495ad5a90580391d49 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 23 Jun 2018 00:29:00 +0200 Subject: [PATCH 049/271] [universal] a URL with space is not a good URL --- src/you_get/extractors/universal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/universal.py b/src/you_get/extractors/universal.py index 6a1c2d30..57b9b2d1 100644 --- a/src/you_get/extractors/universal.py +++ b/src/you_get/extractors/universal.py @@ -67,12 +67,12 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg urls = [] for i in media_exts: - urls += re.findall(r'(https?://[^;"\'\\]+' + i + r'[^;"\'\\]*)', page) + urls += re.findall(r'(https?://[^ ;"\'\\]+' + i + r'[^ ;"\'\\]*)', page) p_urls = re.findall(r'(https?%3A%2F%2F[^;&]+' + i + r'[^;&]*)', page) urls += [parse.unquote(url) for url in p_urls] - q_urls = re.findall(r'(https?:\\\\/\\\\/[^;"\']+' + i + r'[^;"\']*)', page) + q_urls = re.findall(r'(https?:\\\\/\\\\/[^ ;"\']+' + i + r'[^ ;"\']*)', page) urls += [url.replace('\\\\/', '/') for url in q_urls] # a link href to an image is often an interesting one From 52e6a7482d99bc5d928f81e27309ff964c17c7a7 Mon Sep 17 00:00:00 2001 From: Justlearnm0re Date: Sun, 24 Jun 2018 17:41:58 +0800 Subject: [PATCH 050/271] fix cid match The old regex is broken, fix it with new one. --- src/you_get/extractors/bilibili.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 9e2b8bc0..523abbdb 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -177,7 +177,9 @@ class Bilibili(VideoExtractor): log.w('This page contains a playlist. (use --playlist to download all videos.)') try: - cid = re.search(r'cid=(\d+)', self.page).group(1) + page_list = json.loads(re.search(r'"pages":(\[.*?\])', self.page).group(1)) + index_id = int(re.search(r'index_(\d+)', self.url).group(1)) + cid = page_list[index_id-1]['cid'] # change cid match rule except: cid = re.search(r'"cid":(\d+)', self.page).group(1) if cid is not None: From 1adb799c869c50ae19c643db9091912dbb68bac1 Mon Sep 17 00:00:00 2001 From: Justsoos Date: Mon, 25 Jun 2018 16:55:13 +0800 Subject: [PATCH 051/271] Update douyutv.py douyu update --- src/you_get/extractors/douyutv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/douyutv.py b/src/you_get/extractors/douyutv.py index 72a41a0a..bdcea458 100644 --- a/src/you_get/extractors/douyutv.py +++ b/src/you_get/extractors/douyutv.py @@ -45,9 +45,9 @@ def douyutv_download(url, output_dir='.', merge=True, info_only=False, **kwargs) douyutv_video_download(url, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs) return - url = re.sub(r'[w.]*douyu.com', 'm.douyu.com', url) + url = re.sub(r'.*douyu.com','https://m.douyu.com/room', url) html = get_content(url, headers) - room_id_patt = r'room_id\s*:\s*(\d+),' + room_id_patt = r'"rid"\s*:\s*(\d+),' room_id = match1(html, room_id_patt) if room_id == "0": room_id = url[url.rfind('/') + 1:] From 4f00ca5b8da7dcaf51a3d5f91e168c53f7efd156 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 27 Jun 2018 22:18:27 +0200 Subject: [PATCH 052/271] [bilibili] warn when target URL is a playlist and --playlist is not used --- src/you_get/extractors/bilibili.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 523abbdb..cd71b071 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -173,7 +173,7 @@ class Bilibili(VideoExtractor): return has_plist = re.search(r'"page":2', self.page) - if has_plist: + if has_plist and not kwargs.get('playlist'): log.w('This page contains a playlist. (use --playlist to download all videos.)') try: @@ -341,6 +341,7 @@ def parse_cid_playurl(xml): def bilibili_download_playlist_by_url(url, **kwargs): url = url_locations([url])[0] + kwargs['playlist'] = True # a bangumi here? possible? if 'live.bilibili' in url: site.download_by_url(url) From 93c08277d20b373d270b488bf1e11d20dda7e17e Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 27 Jun 2018 22:50:10 +0200 Subject: [PATCH 053/271] [bilibili] better subtitle with page no --- src/you_get/extractors/bilibili.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index cd71b071..93749596 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -360,7 +360,7 @@ def bilibili_download_playlist_by_url(url, **kwargs): page_cnt = len(page_list) for no in range(1, page_cnt+1): page_url = 'http://www.bilibili.com/video/av{}/index_{}.html'.format(aid, no) - subtitle = page_list[no-1]['pagename'] + subtitle = '#%s. %s'% (page_list[no-1]['page'], page_list[no-1]['pagename']) Bilibili().download_by_url(page_url, subtitle=subtitle, **kwargs) site = Bilibili() From 1171be87c7dc2d2ae66c1c37d02ae9723fe414e5 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 27 Jun 2018 23:21:42 +0200 Subject: [PATCH 054/271] [baidu] squanch this --- src/you_get/extractors/baidu.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/you_get/extractors/baidu.py b/src/you_get/extractors/baidu.py index 6f558e31..d0146217 100644 --- a/src/you_get/extractors/baidu.py +++ b/src/you_get/extractors/baidu.py @@ -129,6 +129,15 @@ def baidu_download(url, output_dir='.', stream_type=None, merge=True, info_only= html = get_html(url) title = r1(r'title:"([^"]+)"', html) + vhsrc = re.findall(r'vhsrc="([^"]+)"', html) + if vhsrc is not None: + ext = 'mp4' + size = url_size(vhsrc[0]) + print_info(site_info, title, ext, size) + if not info_only: + download_urls(vhsrc, title, ext, size, + output_dir=output_dir, merge=False) + items = re.findall( r'//imgsrc.baidu.com/forum/w[^"]+/([^/"]+)', html) urls = ['http://imgsrc.baidu.com/forum/pic/item/' + i From 18af8f36e2e9a12761ae582527818b8af3c3e891 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 27 Jun 2018 23:35:15 +0200 Subject: [PATCH 055/271] version 0.4.1099 --- src/you_get/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/version.py b/src/you_get/version.py index 64ef890f..9f3287d9 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1077' +__version__ = '0.4.1099' From 4a3f1b5bd2c87d43667730578df12ac121d11322 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 29 Jun 2018 15:04:42 +0200 Subject: [PATCH 056/271] [miaopai] squanch that --- src/you_get/extractors/yixia.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/you_get/extractors/yixia.py b/src/you_get/extractors/yixia.py index ccaaf546..ff45730d 100644 --- a/src/you_get/extractors/yixia.py +++ b/src/you_get/extractors/yixia.py @@ -51,10 +51,10 @@ def yixia_download(url, output_dir = '.', merge = True, info_only = False, **kwa yixia_download_by_scid = yixia_miaopai_download_by_scid site_info = "Yixia Miaopai" - scid = match1(url, r'miaopai\.com/show/channel/(.+)\.htm') or \ - match1(url, r'miaopai\.com/show/(.+)\.htm') or \ - match1(url, r'm\.miaopai\.com/show/channel/(.+)\.htm') or \ - match1(url, r'm\.miaopai\.com/show/channel/(.+)') + scid = match1(url, r'miaopai\.com/show/channel/([^.]+)\.htm') or \ + match1(url, r'miaopai\.com/show/([^.]+)\.htm') or \ + match1(url, r'm\.miaopai\.com/show/channel/([^.]+)\.htm') or \ + match1(url, r'm\.miaopai\.com/show/channel/([^.]+)') elif 'xiaokaxiu.com' in hostname: #Xiaokaxiu yixia_download_by_scid = yixia_xiaokaxiu_download_by_scid From 503ff846f77225ee373f58c016da6428d8d5a2b7 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 1 Jul 2018 13:23:48 +0200 Subject: [PATCH 057/271] [common] do not coerce headers into a dict --- src/you_get/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 30e533f1..73192e61 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -505,7 +505,7 @@ def get_head(url, headers={}, get_method='HEAD'): req = request.Request(url) req.get_method = lambda: get_method res = urlopen_with_retry(req) - return dict(res.headers) + return res.headers def url_info(url, faker=False, headers={}): From 37e2a798972d2ad7d73abfc86ab7c54a2f42a077 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 1 Jul 2018 13:48:22 +0200 Subject: [PATCH 058/271] [common] wubba lubba dub dub --- src/you_get/common.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 73192e61..f4c37f71 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -1594,15 +1594,8 @@ def url_to_module(url): url ) else: - import http.client - video_host = r1(r'https?://([^/]+)/', url) # .cn could be removed - if url.startswith('https://'): - conn = http.client.HTTPSConnection(video_host) - else: - conn = http.client.HTTPConnection(video_host) - conn.request('HEAD', video_url, headers=fake_headers) - res = conn.getresponse() - location = res.getheader('location') + location = get_location(url) + if location and location != url and not location.startswith('/'): return url_to_module(location) else: From 3e8927959836b96982a06fe922a0946cfca52ed2 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 1 Jul 2018 15:47:54 +0200 Subject: [PATCH 059/271] [common] make get_location accept headers --- src/you_get/common.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index f4c37f71..c8b0f80b 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -369,13 +369,16 @@ def get_decoded_html(url, faker=False): return data -def get_location(url): +def get_location(url, headers=None, get_method='HEAD'): logging.debug('get_location: %s' % url) - response = request.urlopen(url) - # urllib will follow redirections and it's too much code to tell urllib - # not to do that - return response.geturl() + if headers: + req = request.Request(url, headers=headers) + else: + req = request.Request(url) + req.get_method = lambda: get_method + res = urlopen_with_retry(req) + return res.geturl() def urlopen_with_retry(*args, **kwargs): @@ -1594,7 +1597,10 @@ def url_to_module(url): url ) else: - location = get_location(url) + try: + location = get_location(url) # t.co isn't happy with fake_headers + except: + location = get_location(url, headers=fake_headers) if location and location != url and not location.startswith('/'): return url_to_module(location) From d503237508167e761f835045d36cdec0a928d31f Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 1 Jul 2018 15:48:22 +0200 Subject: [PATCH 060/271] [common] dumb --- src/you_get/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index c8b0f80b..97bc93a5 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -499,7 +499,7 @@ def urls_size(urls, faker=False, headers={}): return sum([url_size(url, faker=faker, headers=headers) for url in urls]) -def get_head(url, headers={}, get_method='HEAD'): +def get_head(url, headers=None, get_method='HEAD'): logging.debug('get_head: %s' % url) if headers: From 9ab4bfbf3e4ef99bd67c2bcba4eaa4398edc1cbd Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 1 Jul 2018 15:50:08 +0200 Subject: [PATCH 061/271] [common] update UA --- src/you_get/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 97bc93a5..b19d602f 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -143,7 +143,7 @@ fake_headers = { 'Accept-Charset': 'UTF-8,*;q=0.5', 'Accept-Encoding': 'gzip,deflate,sdch', 'Accept-Language': 'en-US,en;q=0.8', - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0', # noqa + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:60.0) Gecko/20100101 Firefox/60.0', # noqa } if sys.stdout.isatty(): From e635628639cc6f709dbc84dd8e45c3bdcef7758a Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 3 Jul 2018 14:49:51 +0200 Subject: [PATCH 062/271] [instagram] no more ? --- src/you_get/extractors/instagram.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/you_get/extractors/instagram.py b/src/you_get/extractors/instagram.py index 332d9b61..65fc01f5 100755 --- a/src/you_get/extractors/instagram.py +++ b/src/you_get/extractors/instagram.py @@ -29,6 +29,7 @@ def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwarg image_url = edge['node']['display_url'] if 'video_url' in edge['node']: image_url = edge['node']['video_url'] + image_url = image_url.split('?')[0] ext = image_url.split('.')[-1] size = int(get_head(image_url)['Content-Length']) print_info(site_info, title, ext, size) @@ -43,6 +44,7 @@ def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwarg image_url = info['entry_data']['PostPage'][0]['graphql']['shortcode_media']['display_url'] if 'video_url' in info['entry_data']['PostPage'][0]['graphql']['shortcode_media']: image_url =info['entry_data']['PostPage'][0]['graphql']['shortcode_media']['video_url'] + image_url = image_url.split('?')[0] ext = image_url.split('.')[-1] size = int(get_head(image_url)['Content-Length']) print_info(site_info, title, ext, size) From 196e94bdfff870e795b6aab5618c33166378fe4a Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 12 Jul 2018 01:03:01 +0200 Subject: [PATCH 063/271] [baidu] because the fleeb has all of the fleeb juice --- src/you_get/extractors/baidu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/baidu.py b/src/you_get/extractors/baidu.py index d0146217..c9d64547 100644 --- a/src/you_get/extractors/baidu.py +++ b/src/you_get/extractors/baidu.py @@ -129,7 +129,7 @@ def baidu_download(url, output_dir='.', stream_type=None, merge=True, info_only= html = get_html(url) title = r1(r'title:"([^"]+)"', html) - vhsrc = re.findall(r'vhsrc="([^"]+)"', html) + vhsrc = re.findall(r'"BDE_Image" src="([^"]+)"', html) if vhsrc is not None: ext = 'mp4' size = url_size(vhsrc[0]) From a07ba1a5dfc8f0a46b957426a1dc528a90c589b8 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 14 Jul 2018 14:54:21 +0200 Subject: [PATCH 064/271] [baidu] it's important that the fleeb is rubbed --- src/you_get/extractors/baidu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/baidu.py b/src/you_get/extractors/baidu.py index c9d64547..1392e7de 100644 --- a/src/you_get/extractors/baidu.py +++ b/src/you_get/extractors/baidu.py @@ -129,7 +129,7 @@ def baidu_download(url, output_dir='.', stream_type=None, merge=True, info_only= html = get_html(url) title = r1(r'title:"([^"]+)"', html) - vhsrc = re.findall(r'"BDE_Image" src="([^"]+)"', html) + vhsrc = re.findall(r'"BDE_Image" src="([^"]+)"', html) or re.findall(r'vhsrc="([^"]+)"', html) if vhsrc is not None: ext = 'mp4' size = url_size(vhsrc[0]) From 50216593e439b6e940e868a9f98c4475ee3636f5 Mon Sep 17 00:00:00 2001 From: Fangzhou Li Date: Mon, 16 Jul 2018 04:22:13 +0800 Subject: [PATCH 065/271] [util] improve compatibility with WSL --- src/you_get/util/fs.py | 8 ++++---- src/you_get/util/os.py | 30 ++++++++++++++++++++++++++++++ tests/test_util.py | 7 ++++--- 3 files changed, 38 insertions(+), 7 deletions(-) create mode 100644 src/you_get/util/os.py diff --git a/src/you_get/util/fs.py b/src/you_get/util/fs.py index d49a117d..b6b7069a 100644 --- a/src/you_get/util/fs.py +++ b/src/you_get/util/fs.py @@ -1,8 +1,8 @@ #!/usr/bin/env python -import platform +from .os import detect_os -def legitimize(text, os=platform.system()): +def legitimize(text, os=detect_os()): """Converts a string to a valid filename. """ @@ -13,7 +13,7 @@ def legitimize(text, os=platform.system()): ord('|'): '-', }) - if os == 'Windows': + if os == 'windows' or os == 'cygwin' or os == 'wsl': # Windows (non-POSIX namespace) text = text.translate({ # Reserved in Windows VFAT and NTFS @@ -31,7 +31,7 @@ def legitimize(text, os=platform.system()): }) else: # *nix - if os == 'Darwin': + if os == 'mac': # Mac OS HFS+ text = text.translate({ ord(':'): '-', diff --git a/src/you_get/util/os.py b/src/you_get/util/os.py new file mode 100644 index 00000000..11730e28 --- /dev/null +++ b/src/you_get/util/os.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python + +from platform import system + +def detect_os(): + """Detect operating system. + """ + + # Inspired by: + # https://github.com/scivision/pybashutils/blob/78b7f2b339cb03b1c37df94015098bbe462f8526/pybashutils/windows_linux_detect.py + + syst = system().lower() + os = 'unknown' + + if 'cygwin' in syst: + os = 'cygwin' + elif 'darwin' in syst: + os = 'mac' + elif 'linux' in syst: + os = 'linux' + # detect WSL https://github.com/Microsoft/BashOnWindows/issues/423 + with open('/proc/version', 'r') as f: + if 'microsoft' in f.read().lower(): + os = 'wsl' + elif 'windows' in syst: + os = 'windows' + elif 'bsd' in syst: + os = 'bsd' + + return os diff --git a/tests/test_util.py b/tests/test_util.py index 239083bc..88743b03 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -6,6 +6,7 @@ from you_get.util.fs import * class TestUtil(unittest.TestCase): def test_legitimize(self): - self.assertEqual(legitimize("1*2", os="Linux"), "1*2") - self.assertEqual(legitimize("1*2", os="Darwin"), "1*2") - self.assertEqual(legitimize("1*2", os="Windows"), "1-2") + self.assertEqual(legitimize("1*2", os="linux"), "1*2") + self.assertEqual(legitimize("1*2", os="mac"), "1*2") + self.assertEqual(legitimize("1*2", os="windows"), "1-2") + self.assertEqual(legitimize("1*2", os="wsl"), "1-2") From ddf67aadb5f90ececd2246c7a6302a66b630eeac Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 17 Jul 2018 17:59:31 +0200 Subject: [PATCH 066/271] [baidu] squanch this, mofo --- src/you_get/extractors/baidu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/baidu.py b/src/you_get/extractors/baidu.py index 1392e7de..b30c9d86 100644 --- a/src/you_get/extractors/baidu.py +++ b/src/you_get/extractors/baidu.py @@ -129,7 +129,7 @@ def baidu_download(url, output_dir='.', stream_type=None, merge=True, info_only= html = get_html(url) title = r1(r'title:"([^"]+)"', html) - vhsrc = re.findall(r'"BDE_Image" src="([^"]+)"', html) or re.findall(r'vhsrc="([^"]+)"', html) + vhsrc = re.findall(r'"BDE_Image"[^>]+src="([^"]+)"', html) or re.findall(r'vhsrc="([^"]+)"', html) if vhsrc is not None: ext = 'mp4' size = url_size(vhsrc[0]) From 80aa34f538f52f60484609405d07fc62377827bc Mon Sep 17 00:00:00 2001 From: ellipse42 Date: Sat, 21 Jul 2018 06:26:19 +0800 Subject: [PATCH 067/271] [miaopai] support not fixed length fid --- src/you_get/extractors/miaopai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/miaopai.py b/src/you_get/extractors/miaopai.py index 6d9a79c7..f37d45b0 100644 --- a/src/you_get/extractors/miaopai.py +++ b/src/you_get/extractors/miaopai.py @@ -31,7 +31,7 @@ def miaopai_download_by_fid(fid, output_dir = '.', merge = False, info_only = Fa #---------------------------------------------------------------------- def miaopai_download(url, output_dir = '.', merge = False, info_only = False, **kwargs): - fid = match1(url, r'\?fid=(\d{4}:\w{32})') + fid = match1(url, r'\?fid=(\d{4}:\w+)') if fid is not None: miaopai_download_by_fid(fid, output_dir, merge, info_only) elif '/p/230444' in url: From feffd883ea30a4b8f6af9cbd3e631489e051919c Mon Sep 17 00:00:00 2001 From: hellsof Date: Sat, 28 Jul 2018 13:51:43 +0800 Subject: [PATCH 068/271] support view.inews.qq.com/a/20180521V0Z9MH00 https://kuaibao.qq.com/s/20180521V0Z9MH00 https://v.qq.com/x/cover/t0fqsm1y83r8v5j/a0026nvw5jr.html --- src/you_get/extractors/qq.py | 51 +++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/src/you_get/extractors/qq.py b/src/you_get/extractors/qq.py index 7f2f4acc..e39bf2e3 100644 --- a/src/you_get/extractors/qq.py +++ b/src/you_get/extractors/qq.py @@ -2,28 +2,25 @@ __all__ = ['qq_download'] -from ..common import * -from ..util.log import * from .qie import download as qieDownload from .qie_video import download_by_url as qie_video_download -from urllib.parse import urlparse,parse_qs +from ..common import * -def qq_download_by_vid(vid, title, default_from, output_dir='.', merge=True, info_only=False): - if default_from: - platform = 11 - else: - # fix return {,"msg":"cannot play outside"} - platform = 4100201 - - info_api = 'http://vv.video.qq.com/getinfo?otype=json&appver=3.2.19.333&platform=11&defnpayver=1&defn=shd&vid={}'.format(vid) - info = get_content(info_api) - video_json = json.loads(match1(info, r'QZOutputJson=(.*)')[:-1]) +def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False): + # http://v.sports.qq.com/#/cover/t0fqsm1y83r8v5j/a0026nvw5jr https://v.qq.com/x/cover/t0fqsm1y83r8v5j/a0026nvw5jr.html + video_json = None + platforms = [4100201, 11] + for platform in platforms: + info_api = 'http://vv.video.qq.com/getinfo?otype=json&appver=3.2.19.333&platform={}&defnpayver=1&defn=shd&vid={}'.format(platform, vid) + info = get_content(info_api) + video_json = json.loads(match1(info, r'QZOutputJson=(.*)')[:-1]) + if not video_json.get('msg')=='cannot play outside': + break fn_pre = video_json['vl']['vi'][0]['lnk'] title = video_json['vl']['vi'][0]['ti'] host = video_json['vl']['vi'][0]['ul']['ui'][0]['url'] - streams = video_json['fl']['fi'] seg_cnt = fc_cnt = video_json['vl']['vi'][0]['cl']['fc'] filename = video_json['vl']['vi'][0]['fn'] @@ -32,8 +29,6 @@ def qq_download_by_vid(vid, title, default_from, output_dir='.', merge=True, inf else: fn_pre, magic_str, video_type = filename.split('.') - best_quality = streams[-1]['name'] - part_urls= [] total_size = 0 for part in range(1, seg_cnt+1): @@ -112,7 +107,6 @@ def kg_qq_download_by_shareid(shareid, output_dir='.', info_only=False, caption= def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs): """""" - default_from = True if re.match(r'https?://egame.qq.com/live\?anchorid=(\d+)', url): from . import qq_egame @@ -136,10 +130,18 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs): content = get_content(url) vids = matchall(content, [r'\?vid=(\w+)']) for vid in vids: - qq_download_by_vid(vid, vid, default_from, output_dir, merge, info_only) + qq_download_by_vid(vid, vid, output_dir, merge, info_only) return - if 'kuaibao.qq.com' in url or re.match(r'http://daxue.qq.com/content/content/id/\d+', url): + if 'kuaibao.qq.com/s/' in url: + # https://kuaibao.qq.com/s/20180521V0Z9MH00 + nid = match1(url, r'/s/([^/&?#]+)') + content = get_content('https://kuaibao.qq.com/getVideoRelate?id=' + nid) + info_json = json.loads(content) + vid=info_json['videoinfo']['vid'] + title=info_json['videoinfo']['title'] + elif 'kuaibao.qq.com' in url or re.match(r'http://daxue.qq.com/content/content/id/\d+', url): + # http://daxue.qq.com/content/content/id/2321 content = get_content(url) vid = match1(content, r'vid\s*=\s*"\s*([^"]+)"') title = match1(content, r'title">([^"]+)