From 0fc63efa63c88662f363fa89768b1c1f9dd6cc36 Mon Sep 17 00:00:00 2001 From: arix00 <15333224+arix00@users.noreply.github.com> Date: Sun, 1 Jan 2023 20:38:21 -0800 Subject: [PATCH] Download multipage video collection When there're more than single page videos in a collection, Download all videos as current code will only handle first page. For 'space_channel_series' and 'space_channel_collection' --- src/you_get/extractors/bilibili.py | 38 ++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 6d34c2c4..b082553e 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -747,13 +747,20 @@ class Bilibili(VideoExtractor): elif sort == 'space_channel_series': m = re.match(r'https?://space\.?bilibili\.com/(\d+)/channel/seriesdetail\?.*sid=(\d+)', self.url) mid, sid = m.group(1), m.group(2) - api_url = self.bilibili_series_archives_api(mid, sid) - api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url)) - archives_info = json.loads(api_content) - # TBD: channel of more than 100 videos + pn = 1 + video_list = [] + while True: + api_url = self.bilibili_series_archives_api(mid, sid, pn) + api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url)) + archives_info = json.loads(api_content) + video_list.extend(archives_info['data']['archives']) + if len(video_list) < archives_info['data']['page']['total'] and len(archives_info['data']['archives']) > 0: + pn += 1 + else: + break - epn, i = len(archives_info['data']['archives']), 0 - for video in archives_info['data']['archives']: + epn, i = len(video_list), 0 + for video in video_list: i += 1; log.w('Extracting %s of %s videos ...' % (i, epn)) url = 'https://www.bilibili.com/video/av%s' % video['aid'] self.__class__().download_playlist_by_url(url, **kwargs) @@ -761,13 +768,20 @@ class Bilibili(VideoExtractor): elif sort == 'space_channel_collection': m = re.match(r'https?://space\.?bilibili\.com/(\d+)/channel/collectiondetail\?.*sid=(\d+)', self.url) mid, sid = m.group(1), m.group(2) - api_url = self.bilibili_space_collection_api(mid, sid) - api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url)) - archives_info = json.loads(api_content) - # TBD: channel of more than 100 videos + pn = 1 + video_list = [] + while True: + api_url = self.bilibili_space_collection_api(mid, sid, pn) + api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url)) + archives_info = json.loads(api_content) + video_list.extend(archives_info['data']['archives']) + if len(video_list) < archives_info['data']['page']['total'] and len(archives_info['data']['archives']) > 0: + pn += 1 + else: + break - epn, i = len(archives_info['data']['archives']), 0 - for video in archives_info['data']['archives']: + epn, i = len(video_list), 0 + for video in video_list: i += 1; log.w('Extracting %s of %s videos ...' % (i, epn)) url = 'https://www.bilibili.com/video/av%s' % video['aid'] self.__class__().download_playlist_by_url(url, **kwargs)