Download multipage video collection

When there're more than single page videos in a collection, Download all videos as current code will only handle first page. For 'space_channel_series' and 'space_channel_collection'
2025-01-23 13:35:16 +03:00 · 2023-01-01 20:38:21 -08:00 · 2023-01-01 20:38:21 -08:00 · 0fc63efa63
commit 0fc63efa63
parent c0a483dab1
1 changed files with 26 additions and 12 deletions
--- a/src/you_get/extractors/bilibili.py
+++ b/src/you_get/extractors/bilibili.py
@ -747,13 +747,20 @@ class Bilibili(VideoExtractor):
        elif sort == 'space_channel_series':
            m = re.match(r'https?://space\.?bilibili\.com/(\d+)/channel/seriesdetail\?.*sid=(\d+)', self.url)
            mid, sid = m.group(1), m.group(2)
-            api_url = self.bilibili_series_archives_api(mid, sid)
-            api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url))
-            archives_info = json.loads(api_content)
-            # TBD: channel of more than 100 videos
+            pn = 1
+            video_list = []
+            while True:
+                api_url = self.bilibili_series_archives_api(mid, sid, pn)
+                api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url))
+                archives_info = json.loads(api_content)
+                video_list.extend(archives_info['data']['archives'])
+                if len(video_list) < archives_info['data']['page']['total'] and len(archives_info['data']['archives']) > 0:
+                    pn += 1
+                else:
+                    break

-            epn, i = len(archives_info['data']['archives']), 0
-            for video in archives_info['data']['archives']:
+            epn, i = len(video_list), 0
+            for video in video_list:
                i += 1; log.w('Extracting %s of %s videos ...' % (i, epn))
                url = 'https://www.bilibili.com/video/av%s' % video['aid']
                self.__class__().download_playlist_by_url(url, **kwargs)
@ -761,13 +768,20 @@ class Bilibili(VideoExtractor):
        elif sort == 'space_channel_collection':
            m = re.match(r'https?://space\.?bilibili\.com/(\d+)/channel/collectiondetail\?.*sid=(\d+)', self.url)
            mid, sid = m.group(1), m.group(2)
-            api_url = self.bilibili_space_collection_api(mid, sid)
-            api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url))
-            archives_info = json.loads(api_content)
-            # TBD: channel of more than 100 videos
+            pn = 1
+            video_list = []
+            while True:
+                api_url = self.bilibili_space_collection_api(mid, sid, pn)
+                api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url))
+                archives_info = json.loads(api_content)
+                video_list.extend(archives_info['data']['archives'])
+                if len(video_list) < archives_info['data']['page']['total'] and len(archives_info['data']['archives']) > 0:
+                    pn += 1
+                else:
+                    break

-            epn, i = len(archives_info['data']['archives']), 0
-            for video in archives_info['data']['archives']:
+            epn, i = len(video_list), 0
+            for video in video_list:
                i += 1; log.w('Extracting %s of %s videos ...' % (i, epn))
                url = 'https://www.bilibili.com/video/av%s' % video['aid']
                self.__class__().download_playlist_by_url(url, **kwargs)