diff --git a/.gitignore b/.gitignore index 0888e5ab..99b18775 100644 --- a/.gitignore +++ b/.gitignore @@ -83,3 +83,9 @@ _* /.idea *.m4a *.DS_Store +*.txt + +*.zip + +.vscode + diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 2710b332..e5ddbafc 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -123,7 +123,7 @@ class Bilibili(VideoExtractor): self.stream_qualities = {s['quality']: s for s in self.stream_types} try: - html_content = get_content(self.url, headers=self.bilibili_headers()) + html_content = get_content(self.url, headers=self.bilibili_headers(referer=self.url)) except: html_content = '' # live always returns 400 (why?) #self.title = match1(html_content, @@ -255,17 +255,21 @@ class Bilibili(VideoExtractor): size = self.url_size(baseurl, headers=self.bilibili_headers(referer=self.url)) # find matching audio track - audio_baseurl = playinfo['data']['dash']['audio'][0]['baseUrl'] - for audio in playinfo['data']['dash']['audio']: - if int(audio['id']) == audio_quality: - audio_baseurl = audio['baseUrl'] - break - if not audio_size_cache.get(audio_quality, False): - audio_size_cache[audio_quality] = self.url_size(audio_baseurl, headers=self.bilibili_headers(referer=self.url)) - size += audio_size_cache[audio_quality] + if playinfo['data']['dash']['audio']: + audio_baseurl = playinfo['data']['dash']['audio'][0]['baseUrl'] + for audio in playinfo['data']['dash']['audio']: + if int(audio['id']) == audio_quality: + audio_baseurl = audio['baseUrl'] + break + if not audio_size_cache.get(audio_quality, False): + audio_size_cache[audio_quality] = self.url_size(audio_baseurl, headers=self.bilibili_headers(referer=self.url)) + size += audio_size_cache[audio_quality] - self.dash_streams[format_id] = {'container': container, 'quality': desc, - 'src': [[baseurl], [audio_baseurl]], 'size': size} + self.dash_streams[format_id] = {'container': container, 'quality': desc, + 'src': [[baseurl], [audio_baseurl]], 'size': size} + else: + self.dash_streams[format_id] = {'container': container, 'quality': desc, + 'src': [[baseurl]], 'size': size} # get danmaku self.danmaku = get_content('http://comment.bilibili.com/%s.xml' % cid) @@ -420,6 +424,98 @@ class Bilibili(VideoExtractor): self.streams['mp4'] = {'container': container, 'size': size, 'src': [playurl]} + + def prepare_by_cid(self,avid,cid,title,html_content,playinfo,playinfo_,url): + #response for interaction video + #主要针对互动视频,使用cid而不是url来相互区分 + + self.stream_qualities = {s['quality']: s for s in self.stream_types} + self.title = title + self.url = url + + current_quality, best_quality = None, None + if playinfo is not None: + current_quality = playinfo['data']['quality'] or None # 0 indicates an error, fallback to None + if 'accept_quality' in playinfo['data'] and playinfo['data']['accept_quality'] != []: + best_quality = playinfo['data']['accept_quality'][0] + playinfos = [] + if playinfo is not None: + playinfos.append(playinfo) + if playinfo_ is not None: + playinfos.append(playinfo_) + # get alternative formats from API + for qn in [80, 64, 32, 16]: + # automatic format for durl: qn=0 + # for dash, qn does not matter + if current_quality is None or qn < current_quality: + api_url = self.bilibili_api(avid, cid, qn=qn) + api_content = get_content(api_url, headers=self.bilibili_headers()) + api_playinfo = json.loads(api_content) + if api_playinfo['code'] == 0: # success + playinfos.append(api_playinfo) + else: + message = api_playinfo['data']['message'] + if best_quality is None or qn <= best_quality: + api_url = self.bilibili_interface_api(cid, qn=qn) + api_content = get_content(api_url, headers=self.bilibili_headers()) + api_playinfo_data = json.loads(api_content) + if api_playinfo_data.get('quality'): + playinfos.append({'code': 0, 'message': '0', 'ttl': 1, 'data': api_playinfo_data}) + if not playinfos: + log.w(message) + # use bilibili error video instead + url = 'https://static.hdslb.com/error.mp4' + _, container, size = url_info(url) + self.streams['flv480'] = {'container': container, 'size': size, 'src': [url]} + return + + for playinfo in playinfos: + quality = playinfo['data']['quality'] + format_id = self.stream_qualities[quality]['id'] + container = self.stream_qualities[quality]['container'].lower() + desc = self.stream_qualities[quality]['desc'] + + if 'durl' in playinfo['data']: + src, size = [], 0 + for durl in playinfo['data']['durl']: + src.append(durl['url']) + size += durl['size'] + self.streams[format_id] = {'container': container, 'quality': desc, 'size': size, 'src': src} + + # DASH formats + if 'dash' in playinfo['data']: + audio_size_cache = {} + for video in playinfo['data']['dash']['video']: + # prefer the latter codecs! + s = self.stream_qualities[video['id']] + format_id = 'dash-' + s['id'] # prefix + container = 'mp4' # enforce MP4 container + desc = s['desc'] + audio_quality = s['audio_quality'] + baseurl = video['baseUrl'] + size = self.url_size(baseurl, headers=self.bilibili_headers(referer=self.url)) + + # find matching audio track + if playinfo['data']['dash']['audio']: + audio_baseurl = playinfo['data']['dash']['audio'][0]['baseUrl'] + for audio in playinfo['data']['dash']['audio']: + if int(audio['id']) == audio_quality: + audio_baseurl = audio['baseUrl'] + break + if not audio_size_cache.get(audio_quality, False): + audio_size_cache[audio_quality] = self.url_size(audio_baseurl, + headers=self.bilibili_headers(referer=self.url)) + size += audio_size_cache[audio_quality] + + self.dash_streams[format_id] = {'container': container, 'quality': desc, + 'src': [[baseurl], [audio_baseurl]], 'size': size} + else: + self.dash_streams[format_id] = {'container': container, 'quality': desc, + 'src': [[baseurl]], 'size': size} + + # get danmaku + self.danmaku = get_content('http://comment.bilibili.com/%s.xml' % cid) + def extract(self, **kwargs): # set UA and referer for downloading headers = self.bilibili_headers(referer=self.url) @@ -474,9 +570,66 @@ class Bilibili(VideoExtractor): initial_state = json.loads(initial_state_text) aid = initial_state['videoData']['aid'] pn = initial_state['videoData']['videos'] - for pi in range(1, pn + 1): - purl = 'https://www.bilibili.com/video/av%s?p=%s' % (aid, pi) - self.__class__().download_by_url(purl, **kwargs) + if pn!= len(initial_state['videoData']['pages']):#interaction video 互动视频 + search_node_list = [] + download_cid_set = set([initial_state['videoData']['cid']]) + params = { + 'id': 'cid:{}'.format(initial_state['videoData']['cid']), + 'aid': str(aid) + } + urlcontent = get_content('https://api.bilibili.com/x/player.so?'+parse.urlencode(params), headers=self.bilibili_headers(referer='https://www.bilibili.com/video/av{}'.format(aid))) + graph_version = json.loads(urlcontent[urlcontent.find('')+13:urlcontent.find('')])['graph_version'] + params = { + 'aid': str(aid), + 'graph_version': graph_version, + 'platform': 'pc', + 'portal': 0, + 'screen': 0, + } + node_info = json.loads(get_content('https://api.bilibili.com/x/stein/nodeinfo?'+parse.urlencode(params))) + + playinfo_text = match1(html_content, r'__playinfo__=(.*?)