Updated bilibili extractor to support new site code parsing when using cookies

2025-03-13 11:24:02 +03:00 · 2023-03-04 17:33:19 -08:00 · 2023-03-04 17:33:19 -08:00 · 9b306bee13
commit 9b306bee13
parent 2aaa877a9b
1 changed files with 48 additions and 14 deletions
--- a/src/you_get/extractors/bilibili.py
+++ b/src/you_get/extractors/bilibili.py
@ -338,21 +338,47 @@ class Bilibili(VideoExtractor):
        # bangumi
        elif sort == 'bangumi':
            ep_id = ""
            avid = ""
            cid = ""
            initial_state_text = match1(html_content, r'__INITIAL_STATE__=(.*?);\(function\(\)')  # FIXME
-            initial_state = json.loads(initial_state_text)
+            if (initial_state_text):
                initial_state = json.loads(initial_state_text)
-            # warn if this bangumi has more than 1 video
+                # warn if this bangumi has more than 1 video
-            epn = len(initial_state['epList'])
+                epn = len(initial_state['epList'])
-            if epn > 1 and not kwargs.get('playlist'):
+                if epn > 1 and not kwargs.get('playlist'):
-                log.w('This bangumi currently has %s videos. (use --playlist to download all videos.)' % epn)
+                    log.w('This bangumi currently has %s videos. (use --playlist to download all videos.)' % epn)
-            # set video title
+                # set video title
-            self.title = initial_state['h1Title']
+                self.title = initial_state['h1Title']
                # construct playinfos
                ep_id = initial_state['epInfo']['id']
                avid = initial_state['epInfo']['aid']
                cid = initial_state['epInfo']['cid']
            else:
                initial_state_text = match1(html_content, r'\"episodes\":(.*?)\,\"user_status')
                pinitial_state = json.loads(initial_state_text)
                epn = len(pinitial_state)
                if epn > 1 and not kwargs.get('playlist'):
                    log.w('This bangumi currently has %s videos. (use --playlist to download all videos.)' % epn)
                initial_state = {}
                for dic in pinitial_state:
                    if dic['link'] == self.url.rstrip('/'):
                        initial_state = dic
                        break
                self.title = initial_state['share_copy']
                # construct playinfos
                ep_id = initial_state['id']
                avid = initial_state['aid']
                cid = initial_state['cid']
            # construct playinfos
            ep_id = initial_state['epInfo']['id']
            avid = initial_state['epInfo']['aid']
            cid = initial_state['epInfo']['cid']
            playinfos = []
            api_url = self.bilibili_bangumi_api(avid, cid, ep_id)
            api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url))
@ -716,10 +742,18 @@ class Bilibili(VideoExtractor):
                                self.download(**kwargs)
        elif sort == 'bangumi':
            episodes = []
            initial_state_text = match1(html_content, r'__INITIAL_STATE__=(.*?);\(function\(\)')  # FIXME
-            initial_state = json.loads(initial_state_text)
+            if (initial_state_text):
-            epn, i = len(initial_state['epList']), 0
+                # initial_state = json.loads(initial_state_text)
-            for ep in initial_state['epList']:
+                episodes = json.loads(initial_state_text)['epList']
            else:
                initial_state_text = match1(html_content, r'\"episodes\":(.*?)\,\"user_status')
                # initial_state = json.loads(initial_state_text)
                episodes = json.loads(initial_state_text)
            epn, i = len(episodes), 0
            for ep in episodes:
                i += 1; log.w('Extracting %s of %s videos ...' % (i, epn))
                ep_id = ep['id']
                epurl = 'https://www.bilibili.com/bangumi/play/ep%s/' % ep_id