Update bilibili.py

This commit is contained in:
jpppppppppppppppppppppppp 2023-03-18 00:14:06 +08:00 committed by GitHub
parent 2aaa877a9b
commit e3b41cebad
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -9,7 +9,7 @@ import math
class Bilibili(VideoExtractor): class Bilibili(VideoExtractor):
name = "Bilibili" name = "Bilibili"
epid = ""
# Bilibili media encoding options, in descending quality order. # Bilibili media encoding options, in descending quality order.
stream_types = [ stream_types = [
{'id': 'hdflv2_8k', 'quality': 127, 'audio_quality': 30280, {'id': 'hdflv2_8k', 'quality': 127, 'audio_quality': 30280,
@ -42,8 +42,6 @@ class Bilibili(VideoExtractor):
{'id': 'jpg', 'quality': 0}, {'id': 'jpg', 'quality': 0},
] ]
codecids = {7: 'AVC', 12: 'HEVC', 13: 'AV1'}
@staticmethod @staticmethod
def height_to_quality(height, qn): def height_to_quality(height, qn):
if height <= 360 and qn <= 16: if height <= 360 and qn <= 16:
@ -72,7 +70,7 @@ class Bilibili(VideoExtractor):
@staticmethod @staticmethod
def bilibili_api(avid, cid, qn=0): def bilibili_api(avid, cid, qn=0):
return 'https://api.bilibili.com/x/player/playurl?avid=%s&cid=%s&qn=%s&type=&otype=json&fnver=0&fnval=4048&fourk=1' % (avid, cid, qn) return 'https://api.bilibili.com/x/player/playurl?avid=%s&cid=%s&qn=%s&type=&otype=json&fnver=0&fnval=16&fourk=1' % (avid, cid, qn)
@staticmethod @staticmethod
def bilibili_audio_api(sid): def bilibili_audio_api(sid):
@ -117,7 +115,7 @@ class Bilibili(VideoExtractor):
@staticmethod @staticmethod
def bilibili_space_channel_api(mid, cid, pn=1, ps=100): def bilibili_space_channel_api(mid, cid, pn=1, ps=100):
return 'https://api.bilibili.com/x/space/channel/video?mid=%s&cid=%s&pn=%s&ps=%s&order=0&jsonp=jsonp' % (mid, cid, pn, ps) return 'https://api.bilibili.com/x/space/channel/video?mid=%s&cid=%s&pn=%s&ps=%s&order=0&jsonp=jsonp' % (mid, cid, pn, ps)
@staticmethod @staticmethod
def bilibili_space_collection_api(mid, cid, pn=1, ps=30): def bilibili_space_collection_api(mid, cid, pn=1, ps=30):
return 'https://api.bilibili.com/x/polymer/space/seasons_archives_list?mid=%s&season_id=%s&sort_reverse=false&page_num=%s&page_size=%s' % (mid, cid, pn, ps) return 'https://api.bilibili.com/x/polymer/space/seasons_archives_list?mid=%s&season_id=%s&sort_reverse=false&page_num=%s&page_size=%s' % (mid, cid, pn, ps)
@ -125,7 +123,7 @@ class Bilibili(VideoExtractor):
@staticmethod @staticmethod
def bilibili_series_archives_api(mid, sid, pn=1, ps=100): def bilibili_series_archives_api(mid, sid, pn=1, ps=100):
return 'https://api.bilibili.com/x/series/archives?mid=%s&series_id=%s&pn=%s&ps=%s&only_normal=true&sort=asc&jsonp=jsonp' % (mid, sid, pn, ps) return 'https://api.bilibili.com/x/series/archives?mid=%s&series_id=%s&pn=%s&ps=%s&only_normal=true&sort=asc&jsonp=jsonp' % (mid, sid, pn, ps)
@staticmethod @staticmethod
def bilibili_space_favlist_api(fid, pn=1, ps=20): def bilibili_space_favlist_api(fid, pn=1, ps=20):
return 'https://api.bilibili.com/x/v3/fav/resource/list?media_id=%s&pn=%s&ps=%s&order=mtime&type=0&tid=0&jsonp=jsonp' % (fid, pn, ps) return 'https://api.bilibili.com/x/v3/fav/resource/list?media_id=%s&pn=%s&ps=%s&order=mtime&type=0&tid=0&jsonp=jsonp' % (fid, pn, ps)
@ -172,9 +170,9 @@ class Bilibili(VideoExtractor):
# redirect: bangumi.bilibili.com/anime -> bangumi/play/ep # redirect: bangumi.bilibili.com/anime -> bangumi/play/ep
elif re.match(r'https?://(www\.)?bilibili\.com/bangumi/play/ss(\d+)', self.url) or \ elif re.match(r'https?://(www\.)?bilibili\.com/bangumi/play/ss(\d+)', self.url) or \
re.match(r'https?://bangumi\.bilibili\.com/anime/(\d+)/play', self.url): re.match(r'https?://bangumi\.bilibili\.com/anime/(\d+)/play', self.url):
initial_state_text = match1(html_content, r'__INITIAL_STATE__=(.*?);\(function\(\)') # FIXME initial_state_text = '{' + match1(html_content, r'("initEpList":.*?),"initSections"') + '}' # FIXME
initial_state = json.loads(initial_state_text) initial_state = json.loads(initial_state_text)
ep_id = initial_state['epList'][0]['id'] ep_id = initial_state['initEpList'][0]['id']
self.url = 'https://www.bilibili.com/bangumi/play/ep%s' % ep_id self.url = 'https://www.bilibili.com/bangumi/play/ep%s' % ep_id
html_content = get_content(self.url, headers=self.bilibili_headers(referer=self.url)) html_content = get_content(self.url, headers=self.bilibili_headers(referer=self.url))
@ -193,6 +191,7 @@ class Bilibili(VideoExtractor):
sort = 'audio' sort = 'audio'
elif re.match(r'https?://(www\.)?bilibili\.com/bangumi/play/ep(\d+)', self.url): elif re.match(r'https?://(www\.)?bilibili\.com/bangumi/play/ep(\d+)', self.url):
sort = 'bangumi' sort = 'bangumi'
self.epid = re.findall(r'https?://(www\.)?bilibili\.com/bangumi/play/ep(\d+)', self.url)[0][1]
elif match1(html_content, r'<meta property="og:url" content="(https://www.bilibili.com/bangumi/play/[^"]+)"'): elif match1(html_content, r'<meta property="og:url" content="(https://www.bilibili.com/bangumi/play/[^"]+)"'):
sort = 'bangumi' sort = 'bangumi'
elif re.match(r'https?://live\.bilibili\.com/', self.url): elif re.match(r'https?://live\.bilibili\.com/', self.url):
@ -224,10 +223,6 @@ class Bilibili(VideoExtractor):
if 'videoData' in initial_state: if 'videoData' in initial_state:
# (standard video) # (standard video)
# warn if cookies are not loaded
if cookies is None:
log.w('You will need login cookies for 720p formats or above. (use --cookies to load cookies.txt.)')
# warn if it is a multi-part video # warn if it is a multi-part video
pn = initial_state['videoData']['videos'] pn = initial_state['videoData']['videos']
if pn > 1 and not kwargs.get('playlist'): if pn > 1 and not kwargs.get('playlist'):
@ -308,10 +303,11 @@ class Bilibili(VideoExtractor):
if 'dash' in playinfo['data']: if 'dash' in playinfo['data']:
audio_size_cache = {} audio_size_cache = {}
for video in playinfo['data']['dash']['video']: for video in playinfo['data']['dash']['video']:
# prefer the latter codecs!
s = self.stream_qualities[video['id']] s = self.stream_qualities[video['id']]
format_id = f"dash-{s['id']}-{self.codecids[video['codecid']]}" # prefix format_id = 'dash-' + s['id'] # prefix
container = 'mp4' # enforce MP4 container container = 'mp4' # enforce MP4 container
desc = s['desc'] + ' ' + video['codecs'] desc = s['desc']
audio_quality = s['audio_quality'] audio_quality = s['audio_quality']
baseurl = video['baseUrl'] baseurl = video['baseUrl']
size = self.url_size(baseurl, headers=self.bilibili_headers(referer=self.url)) size = self.url_size(baseurl, headers=self.bilibili_headers(referer=self.url))
@ -338,21 +334,24 @@ class Bilibili(VideoExtractor):
# bangumi # bangumi
elif sort == 'bangumi': elif sort == 'bangumi':
initial_state_text = match1(html_content, r'__INITIAL_STATE__=(.*?);\(function\(\)') # FIXME initial_state_text = '{' + match1(html_content, r'("initEpList":.*?),"initSections"') + '}'
initial_state = json.loads(initial_state_text) initial_state = json.loads(initial_state_text)
# warn if this bangumi has more than 1 video # warn if this bangumi has more than 1 video
epn = len(initial_state['epList']) epn = len(initial_state['initEpList'])
if epn > 1 and not kwargs.get('playlist'): if epn > 1 and not kwargs.get('playlist'):
log.w('This bangumi currently has %s videos. (use --playlist to download all videos.)' % epn) log.w('This bangumi currently has %d videos. (use --playlist to download all videos.)' % epn)
# set video title # set video title
self.title = initial_state['h1Title'] for i in range(epn):
if int(initial_state['initEpList'][i]['id']) == int(self.epid):
break
self.title = initial_state['initEpList'][i]['share_copy']
# construct playinfos # construct playinfos
ep_id = initial_state['epInfo']['id'] ep_id = self.epid
avid = initial_state['epInfo']['aid'] avid = initial_state['initEpList'][i]['aid']
cid = initial_state['epInfo']['cid'] cid = initial_state['initEpList'][i]['cid']
playinfos = [] playinfos = []
api_url = self.bilibili_bangumi_api(avid, cid, ep_id) api_url = self.bilibili_bangumi_api(avid, cid, ep_id)
api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url)) api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url))
@ -752,20 +751,13 @@ class Bilibili(VideoExtractor):
elif sort == 'space_channel_series': elif sort == 'space_channel_series':
m = re.match(r'https?://space\.?bilibili\.com/(\d+)/channel/seriesdetail\?.*sid=(\d+)', self.url) m = re.match(r'https?://space\.?bilibili\.com/(\d+)/channel/seriesdetail\?.*sid=(\d+)', self.url)
mid, sid = m.group(1), m.group(2) mid, sid = m.group(1), m.group(2)
pn = 1 api_url = self.bilibili_series_archives_api(mid, sid)
video_list = [] api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url))
while True: archives_info = json.loads(api_content)
api_url = self.bilibili_series_archives_api(mid, sid, pn) # TBD: channel of more than 100 videos
api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url))
archives_info = json.loads(api_content)
video_list.extend(archives_info['data']['archives'])
if len(video_list) < archives_info['data']['page']['total'] and len(archives_info['data']['archives']) > 0:
pn += 1
else:
break
epn, i = len(video_list), 0 epn, i = len(archives_info['data']['archives']), 0
for video in video_list: for video in archives_info['data']['archives']:
i += 1; log.w('Extracting %s of %s videos ...' % (i, epn)) i += 1; log.w('Extracting %s of %s videos ...' % (i, epn))
url = 'https://www.bilibili.com/video/av%s' % video['aid'] url = 'https://www.bilibili.com/video/av%s' % video['aid']
self.__class__().download_playlist_by_url(url, **kwargs) self.__class__().download_playlist_by_url(url, **kwargs)
@ -773,20 +765,13 @@ class Bilibili(VideoExtractor):
elif sort == 'space_channel_collection': elif sort == 'space_channel_collection':
m = re.match(r'https?://space\.?bilibili\.com/(\d+)/channel/collectiondetail\?.*sid=(\d+)', self.url) m = re.match(r'https?://space\.?bilibili\.com/(\d+)/channel/collectiondetail\?.*sid=(\d+)', self.url)
mid, sid = m.group(1), m.group(2) mid, sid = m.group(1), m.group(2)
pn = 1 api_url = self.bilibili_space_collection_api(mid, sid)
video_list = [] api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url))
while True: archives_info = json.loads(api_content)
api_url = self.bilibili_space_collection_api(mid, sid, pn) # TBD: channel of more than 100 videos
api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url))
archives_info = json.loads(api_content)
video_list.extend(archives_info['data']['archives'])
if len(video_list) < archives_info['data']['page']['total'] and len(archives_info['data']['archives']) > 0:
pn += 1
else:
break
epn, i = len(video_list), 0 epn, i = len(archives_info['data']['archives']), 0
for video in video_list: for video in archives_info['data']['archives']:
i += 1; log.w('Extracting %s of %s videos ...' % (i, epn)) i += 1; log.w('Extracting %s of %s videos ...' % (i, epn))
url = 'https://www.bilibili.com/video/av%s' % video['aid'] url = 'https://www.bilibili.com/video/av%s' % video['aid']
self.__class__().download_playlist_by_url(url, **kwargs) self.__class__().download_playlist_by_url(url, **kwargs)