From 439354e730d8b864de9401536c93220467ccb355 Mon Sep 17 00:00:00 2001 From: lcjh <120989324@qq.com> Date: Wed, 24 Mar 2021 17:48:59 +0000 Subject: [PATCH 01/26] add HDR support for bilibili --- src/you_get/extractors/bilibili.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index a812d72d..a696b398 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -12,6 +12,8 @@ class Bilibili(VideoExtractor): # Bilibili media encoding options, in descending quality order. stream_types = [ + {'id': 'hdflv2', 'quality': 125, 'audio_quality': 30280, + 'container': 'FLV', 'video_resolution': '3840p', 'desc': '真彩 HDR'}, {'id': 'hdflv2_4k', 'quality': 120, 'audio_quality': 30280, 'container': 'FLV', 'video_resolution': '2160p', 'desc': '超清 4K'}, {'id': 'flv_p60', 'quality': 116, 'audio_quality': 30280, From 25204d8841cdbbcad4f5df357c80853f34286025 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 29 Mar 2021 16:38:54 +0200 Subject: [PATCH 02/26] [test] remove test_bilibili (videos deleted) --- tests/test.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tests/test.py b/tests/test.py index 0f7595b3..5a86ee8f 100644 --- a/tests/test.py +++ b/tests/test.py @@ -40,14 +40,6 @@ class YouGetTests(unittest.TestCase): def test_acfun(self): acfun.download('https://www.acfun.cn/v/ac11701912', info_only=True) - def test_bilibili(self): - bilibili.download( - "https://www.bilibili.com/watchlater/#/BV1PE411q7mZ/p6", info_only=True - ) - bilibili.download( - "https://www.bilibili.com/watchlater/#/av74906671/p6", info_only=True - ) - def test_soundcloud(self): ## single song soundcloud.download( From 17eff492fe1db0c378f2447b54c5f09ed98b2626 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 29 Mar 2021 16:44:07 +0200 Subject: [PATCH 03/26] version 0.4.1520 --- src/you_get/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/version.py b/src/you_get/version.py index 95505814..60bff607 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1500' +__version__ = '0.4.1520' From ef9ff72183acd93b1b10b2b836d145447cceb016 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 2 Apr 2021 01:44:36 +0200 Subject: [PATCH 04/26] [bilibili] fix a bug for non-interative multi-part videos (https://github.com/soimort/you-get/pull/2746#pullrequestreview-626492105) --- src/you_get/extractors/bilibili.py | 32 ++++++++++-------------------- 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index a696b398..644c5af4 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -181,7 +181,7 @@ class Bilibili(VideoExtractor): self.download_playlist_by_url(self.url, **kwargs) return - # regular av video + # regular video if sort == 'video': initial_state_text = match1(html_content, r'__INITIAL_STATE__=(.*?);\(function\(\)') # FIXME initial_state = json.loads(initial_state_text) @@ -601,13 +601,21 @@ class Bilibili(VideoExtractor): log.e('[Error] Unsupported URL pattern.') exit(1) - # regular av video + # regular video if sort == 'video': initial_state_text = match1(html_content, r'__INITIAL_STATE__=(.*?);\(function\(\)') # FIXME initial_state = json.loads(initial_state_text) aid = initial_state['videoData']['aid'] pn = initial_state['videoData']['videos'] - if pn!= len(initial_state['videoData']['pages']):#interaction video 互动视频 + + if pn == len(initial_state['videoData']['pages']): + # non-interative video + for pi in range(1, pn + 1): + purl = 'https://www.bilibili.com/video/av%s?p=%s' % (aid, pi) + self.__class__().download_by_url(purl, **kwargs) + + else: + # interative video search_node_list = [] download_cid_set = set([initial_state['videoData']['cid']]) params = { @@ -658,24 +666,6 @@ class Bilibili(VideoExtractor): self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams] self.extract(**kwargs) self.download(**kwargs) - else: - playinfo_text = match1(html_content, r'__playinfo__=(.*?)', html) + data = re.search(r'window\._sharedData\s*=\s*(.*);', cont) try: info = json.loads(data.group(1)) post = info['entry_data']['PostPage'][0] assert post except: # with logged-in cookies - data = re.search(r'window\.__additionalDataLoaded\(\'[^\']+\',(.*)\);', html) + data = re.search(r'window\.__additionalDataLoaded\(\'[^\']+\',(.*)\);', cont) if data is not None: log.e('[Warning] Cookies needed.') post = json.loads(data.group(1)) From ad24e68baa5106522c6d4a37395896e9776ea88f Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 11 Jul 2021 18:46:41 +0200 Subject: [PATCH 20/26] version 0.4.1536 --- src/you_get/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/version.py b/src/you_get/version.py index 70ca2ef5..dd603918 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1527' +__version__ = '0.4.1536' From f4ec55b00e5b8743727b138d06f6ffadadd49579 Mon Sep 17 00:00:00 2001 From: Tim Gates Date: Sat, 31 Jul 2021 09:34:46 +1000 Subject: [PATCH 21/26] docs: Fix a few typos There are small typos in: - src/you_get/extractors/flickr.py - src/you_get/extractors/mtv81.py - src/you_get/extractors/qingting.py Fixes: - Should read `several` rather than `serveral`. - Should read `channel` rather than `chaanel`. - Should read `approach` rather than `approch`. --- src/you_get/extractors/flickr.py | 2 +- src/you_get/extractors/mtv81.py | 2 +- src/you_get/extractors/qingting.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/you_get/extractors/flickr.py b/src/you_get/extractors/flickr.py index 2535dd1c..79fca4ff 100644 --- a/src/you_get/extractors/flickr.py +++ b/src/you_get/extractors/flickr.py @@ -73,7 +73,7 @@ def get_api_key(page): match = match1(page, pattern_inline_api_key) # this happens only when the url points to a gallery page # that contains no inline api_key(and never makes xhr api calls) - # in fact this might be a better approch for getting a temporary api key + # in fact this might be a better approach for getting a temporary api key # since there's no place for a user to add custom information that may # misguide the regex in the homepage if not match: diff --git a/src/you_get/extractors/mtv81.py b/src/you_get/extractors/mtv81.py index b92f74bc..ef432159 100644 --- a/src/you_get/extractors/mtv81.py +++ b/src/you_get/extractors/mtv81.py @@ -28,7 +28,7 @@ def mtv81_download(url, output_dir='.', merge=True, info_only=False, **kwargs): # # rtmpdump -r 'rtmpe://cp30865.edgefcs.net/ondemand/mtviestor/_!/intlod/MTVInternational/MBUS/GeoLocals/00JP/VIAMTVI/PYC/201304/7122HVAQ4/00JPVIAMTVIPYC7122HVAQ4_640x_360_1200_m30.mp4' -o "title.mp4" --swfVfy http://media.mtvnservices.com/player/prime/mediaplayerprime.1.10.8.swf # - # because rtmpdump is unstable,may try serveral times + # because rtmpdump is unstable,may try several times # if not info_only: # import pdb diff --git a/src/you_get/extractors/qingting.py b/src/you_get/extractors/qingting.py index 9859d4be..8dd1b14f 100644 --- a/src/you_get/extractors/qingting.py +++ b/src/you_get/extractors/qingting.py @@ -10,7 +10,7 @@ __all__ = ['qingting_download_by_url'] class Qingting(VideoExtractor): # every resource is described by its channel id and program id - # so vid is tuple (chaanel_id, program_id) + # so vid is tuple (channel_id, program_id) name = 'Qingting' stream_types = [ From b97e9484430e2344d3c332d39880ba3ae68890e8 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 15 Aug 2021 05:54:36 +0200 Subject: [PATCH 22/26] [youtube] tr_js: support 3-char main function names --- src/you_get/extractors/youtube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py index 58614c5f..81b45ac5 100644 --- a/src/you_get/extractors/youtube.py +++ b/src/you_get/extractors/youtube.py @@ -77,10 +77,11 @@ class YouTube(VideoExtractor): # - https://www.youtube.com/yts/jsbin/player-vflRjqq_w/da_DK/base.js # - https://www.youtube.com/yts/jsbin/player_ias-vfl-jbnrr/da_DK/base.js # - https://www.youtube.com/s/player/0b643cd1/player_ias.vflset/sv_SE/base.js + # - https://www.youtube.com/s/player/50e823fc/player_ias.vflset/sv_SE/base.js def tr_js(code): code = re.sub(r'function', r'def', code) # add prefix '_sig_' to prevent namespace pollution - code = re.sub(r'(\W)([$\w][$\w])\(', r'\1_sig_\2(', code) + code = re.sub(r'(\W)([$\w][$\w][$\w]?)\(', r'\1_sig_\2(', code) code = re.sub(r'\$', '_dollar', code) code = re.sub(r'\{', r': ', code) code = re.sub(r'\}', r'\n', code) From 09cd505311af7dff87d344436a7f4a87b1763cd1 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 15 Aug 2021 06:39:48 +0200 Subject: [PATCH 23/26] [test] disable test_soundcloud temporarily --- tests/test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test.py b/tests/test.py index 5a86ee8f..1989455f 100644 --- a/tests/test.py +++ b/tests/test.py @@ -42,9 +42,9 @@ class YouGetTests(unittest.TestCase): def test_soundcloud(self): ## single song - soundcloud.download( - 'https://soundcloud.com/keiny-pham/impure-bird', info_only=True - ) + #soundcloud.download( + # 'https://soundcloud.com/keiny-pham/impure-bird', info_only=True + #) ## playlist #soundcloud.download( # 'https://soundcloud.com/anthony-flieger/sets/cytus', info_only=True From e1e1503b08c037ec0eb06c6c951240d004cf342b Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 15 Aug 2021 06:44:15 +0200 Subject: [PATCH 24/26] [test] disable test_soundcloud temporarily --- tests/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test.py b/tests/test.py index 1989455f..4a2a117c 100644 --- a/tests/test.py +++ b/tests/test.py @@ -40,7 +40,7 @@ class YouGetTests(unittest.TestCase): def test_acfun(self): acfun.download('https://www.acfun.cn/v/ac11701912', info_only=True) - def test_soundcloud(self): + #def test_soundcloud(self): ## single song #soundcloud.download( # 'https://soundcloud.com/keiny-pham/impure-bird', info_only=True From 5498c377ff3e8c03831ecef9defe18bea8b4937d Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 15 Aug 2021 06:47:09 +0200 Subject: [PATCH 25/26] version 0.4.1545 --- src/you_get/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/version.py b/src/you_get/version.py index dd603918..f7daa7f8 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1536' +__version__ = '0.4.1545' From 63fd9716a8740fc6862b70a474e398ca6e9f26bd Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 27 Aug 2021 05:14:00 +0200 Subject: [PATCH 26/26] [universal] fix blogger --- src/you_get/extractors/universal.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/universal.py b/src/you_get/extractors/universal.py index abc69475..fdc7426d 100644 --- a/src/you_get/extractors/universal.py +++ b/src/you_get/extractors/universal.py @@ -70,12 +70,13 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg '[-_][6-9]\d\dx1\d\d\d\.jpe?g', '[-_][6-9]\d\dx[6-9]\d\d\.jpe?g', 's1600/[\w%]+\.jpe?g', # blogger + 'blogger\.googleusercontent\.com/img/a/\w*', # blogger 'img[6-9]\d\d/[\w%]+\.jpe?g' # oricon? ] urls = [] for i in media_exts: - urls += re.findall(r'(https?://[^ ;&"\'\\<>]+' + i + r'[^ ;&"\'\\<>]*)', page) + urls += re.findall(r'(https?://[^ ;&"\'\\<>]*' + i + r'[^ ;&"\'\\<>]*)', page) p_urls = re.findall(r'(https?%3A%2F%2F[^;&"]+' + i + r'[^;&"]*)', page) urls += [parse.unquote(url) for url in p_urls]