2012-09-02 00:02:14 +04:00
|
|
|
#!/usr/bin/env python
|
|
|
|
|
2016-08-21 22:44:05 +03:00
|
|
|
from ..common import *
|
2019-02-17 01:14:59 +03:00
|
|
|
from ..extractor import VideoExtractor
|
2016-08-29 06:39:19 +03:00
|
|
|
|
2017-07-03 08:37:58 +03:00
|
|
|
class Bilibili(VideoExtractor):
|
2019-02-17 01:14:59 +03:00
|
|
|
name = "Bilibili"
|
2017-09-20 23:50:37 +03:00
|
|
|
|
2019-02-17 01:14:59 +03:00
|
|
|
# Bilibili media encoding options, in descending quality order.
|
2017-07-03 08:37:58 +03:00
|
|
|
stream_types = [
|
2019-02-17 01:14:59 +03:00
|
|
|
{'id': 'flv_p60', 'quality': 116, 'audio_quality': 30280,
|
|
|
|
'container': 'MP4', 'video_resolution': '1080p', 'desc': '高清 1080P60'},
|
2019-02-17 04:34:11 +03:00
|
|
|
# 'id': 'hdflv2', 'quality': 112?
|
2019-02-17 01:14:59 +03:00
|
|
|
{'id': 'flv', 'quality': 80, 'audio_quality': 30280,
|
|
|
|
'container': 'MP4', 'video_resolution': '1080p', 'desc': '高清 1080P'},
|
|
|
|
{'id': 'flv720_p60', 'quality': 74, 'audio_quality': 30280,
|
|
|
|
'container': 'MP4', 'video_resolution': '720p', 'desc': '高清 720P60'},
|
|
|
|
{'id': 'flv720', 'quality': 64, 'audio_quality': 30280,
|
|
|
|
'container': 'MP4', 'video_resolution': '720p', 'desc': '高清 720P'},
|
|
|
|
{'id': 'flv480', 'quality': 32, 'audio_quality': 30280,
|
2019-02-17 02:55:29 +03:00
|
|
|
'container': 'MP4', 'video_resolution': '480p', 'desc': '清晰 480P'},
|
2019-02-17 01:14:59 +03:00
|
|
|
{'id': 'flv360', 'quality': 16, 'audio_quality': 30216,
|
|
|
|
'container': 'MP4', 'video_resolution': '360p', 'desc': '流畅 360P'},
|
2019-02-17 02:55:29 +03:00
|
|
|
|
|
|
|
{'id': 'default', 'quality': 0}
|
2017-07-03 08:37:58 +03:00
|
|
|
]
|
|
|
|
|
|
|
|
@staticmethod
|
2019-02-17 01:14:59 +03:00
|
|
|
def bilibili_headers(referer=None, cookie=None):
|
|
|
|
# a reasonable UA
|
|
|
|
ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'
|
|
|
|
headers = {'User-Agent': ua}
|
|
|
|
if referer is not None:
|
|
|
|
headers.update({'Referer': referer})
|
|
|
|
if cookie is not None:
|
|
|
|
headers.update({'Cookie': cookie})
|
|
|
|
return headers
|
2017-07-03 08:37:58 +03:00
|
|
|
|
2019-02-18 01:04:18 +03:00
|
|
|
@staticmethod
|
|
|
|
def bilibili_api(avid, cid, qn=0):
|
|
|
|
return 'https://api.bilibili.com/x/player/playurl?avid=%s&cid=%s&qn=%s&type=&otype=json&fnver=0&fnval=16' % (avid, cid, qn, ep_id)
|
|
|
|
|
2019-02-17 04:34:11 +03:00
|
|
|
@staticmethod
|
2019-02-17 19:21:27 +03:00
|
|
|
def bilibili_bangumi_api(avid, cid, ep_id, qn=0):
|
|
|
|
return 'https://api.bilibili.com/pgc/player/web/playurl?avid=%s&cid=%s&qn=%s&type=&otype=json&ep_id=%s&fnver=0&fnval=16' % (avid, cid, qn, ep_id)
|
2019-02-17 04:34:11 +03:00
|
|
|
|
2017-07-03 08:37:58 +03:00
|
|
|
def prepare(self, **kwargs):
|
2019-02-17 01:14:59 +03:00
|
|
|
self.stream_qualities = {s['quality']: s for s in self.stream_types}
|
2017-10-13 17:31:51 +03:00
|
|
|
|
2019-02-17 01:14:59 +03:00
|
|
|
html_content = get_content(self.url, headers=self.bilibili_headers())
|
|
|
|
#self.title = match1(html_content,
|
|
|
|
# r'<h1 title="([^"]+)"')
|
2017-10-16 09:02:55 +03:00
|
|
|
|
2019-02-17 16:40:36 +03:00
|
|
|
# redirect: bangumi/play/ss -> bangumi/play/ep
|
|
|
|
if re.match(r'https?://(www)?\.bilibili\.com/bangumi/play/ss(\d+)', self.url):
|
|
|
|
initial_state_text = match1(html_content, r'__INITIAL_STATE__=(.*?);\(function\(\)') # FIXME
|
|
|
|
initial_state = json.loads(initial_state_text)
|
|
|
|
ep_id = initial_state['epList'][0]['id']
|
|
|
|
self.url = 'https://www.bilibili.com/bangumi/play/ep%s' % ep_id
|
|
|
|
html_content = get_content(self.url, headers=self.bilibili_headers())
|
|
|
|
|
2019-02-17 17:47:53 +03:00
|
|
|
# sort it out
|
2019-02-17 16:40:36 +03:00
|
|
|
if re.match(r'https?://(www)?\.bilibili\.com/bangumi/play/ep(\d+)', self.url):
|
|
|
|
sort = 'bangumi'
|
|
|
|
elif match1(html_content, r'<meta property="og:url" content="(https://www.bilibili.com/bangumi/play/[^"]+)"'):
|
|
|
|
sort = 'bangumi'
|
|
|
|
elif re.match(r'https?://(www)?\.bilibili\.com/video/av(\d+)', self.url):
|
|
|
|
sort = 'video'
|
|
|
|
|
|
|
|
# regular av video
|
|
|
|
if sort == 'video':
|
2019-02-17 01:14:59 +03:00
|
|
|
playinfo_text = match1(html_content, r'__playinfo__=(.*?)</script><script>') # FIXME
|
2019-02-17 17:47:53 +03:00
|
|
|
playinfo = json.loads(playinfo_text) if playinfo_text else None
|
2016-08-29 06:39:19 +03:00
|
|
|
|
2019-02-17 01:14:59 +03:00
|
|
|
initial_state_text = match1(html_content, r'__INITIAL_STATE__=(.*?);\(function\(\)') # FIXME
|
|
|
|
initial_state = json.loads(initial_state_text)
|
2019-02-17 02:55:29 +03:00
|
|
|
|
|
|
|
# warn if it is a multi-part video
|
|
|
|
pn = initial_state['videoData']['videos']
|
|
|
|
if pn > 1 and not kwargs.get('playlist'):
|
|
|
|
log.w('This is a multipart video. (use --playlist to download all parts.)')
|
|
|
|
|
2019-02-18 01:04:18 +03:00
|
|
|
# set video title
|
|
|
|
self.title = initial_state['videoData']['title']
|
|
|
|
# refine title for a specific part, if it is a multi-part video
|
|
|
|
p = int(match1(self.url, r'[\?&]p=(\d+)') or '1') # use URL to decide p-number, not initial_state['p']
|
|
|
|
if pn > 1:
|
|
|
|
part = initial_state['videoData']['pages'][p - 1]['part']
|
|
|
|
self.title = '%s (P%s. %s)' % (self.title, p, part)
|
|
|
|
|
2019-02-17 17:47:53 +03:00
|
|
|
# no playinfo is found
|
|
|
|
if playinfo is None:
|
|
|
|
# use bilibili error video instead
|
|
|
|
url = 'https://static.hdslb.com/error.mp4'
|
|
|
|
_, container, size = url_info(url)
|
|
|
|
self.streams['default'] = {'container': container, 'size': size, 'src': [url]}
|
|
|
|
return
|
|
|
|
|
2019-02-17 01:14:59 +03:00
|
|
|
# determine default quality / format
|
|
|
|
quality = int(playinfo['data']['quality'])
|
|
|
|
format_id = self.stream_qualities[quality]['id']
|
|
|
|
container = self.stream_qualities[quality]['container'].lower()
|
|
|
|
desc = self.stream_qualities[quality]['desc']
|
|
|
|
|
|
|
|
# determine default source URL and size
|
|
|
|
src, size = [], 0
|
|
|
|
for durl in playinfo['data']['durl']:
|
|
|
|
src.append(durl['url'])
|
|
|
|
size += durl['size']
|
|
|
|
self.streams['default'] = {'container': container, 'quality': desc, 'size': size, 'src': src}
|
|
|
|
|
|
|
|
# DASH formats
|
|
|
|
html_content_ = get_content(self.url, headers=self.bilibili_headers(cookie='CURRENT_FNVAL=16'))
|
|
|
|
playinfo_text_ = match1(html_content_, r'__playinfo__=(.*?)</script><script>') # FIXME
|
|
|
|
playinfo_ = json.loads(playinfo_text_)
|
2019-02-17 19:21:27 +03:00
|
|
|
if 'dash' in playinfo_['data']:
|
|
|
|
for video in playinfo_['data']['dash']['video']:
|
|
|
|
# prefer the latter codecs!
|
|
|
|
s = self.stream_qualities[video['id']]
|
|
|
|
format_id = s['id']
|
|
|
|
container = s['container'].lower()
|
|
|
|
desc = s['desc']
|
|
|
|
audio_quality = s['audio_quality']
|
|
|
|
baseurl = video['baseUrl']
|
|
|
|
size = url_size(baseurl, headers=self.bilibili_headers(referer=self.url))
|
|
|
|
|
|
|
|
# find matching audio track
|
|
|
|
audio_baseurl = playinfo_['data']['dash']['audio'][0]['baseUrl']
|
|
|
|
for audio in playinfo_['data']['dash']['audio']:
|
|
|
|
if int(audio['id']) == audio_quality:
|
|
|
|
audio_baseurl = audio['baseUrl']
|
|
|
|
break
|
|
|
|
size += url_size(audio_baseurl, headers=self.bilibili_headers(referer=self.url))
|
|
|
|
|
|
|
|
self.dash_streams[format_id] = {'container': container, 'quality': desc,
|
|
|
|
'src': [[baseurl], [audio_baseurl]], 'size': size}
|
2018-08-10 12:26:29 +03:00
|
|
|
|
2019-02-17 04:34:11 +03:00
|
|
|
# bangumi
|
2019-02-17 16:40:36 +03:00
|
|
|
elif sort == 'bangumi':
|
2019-02-17 04:34:11 +03:00
|
|
|
initial_state_text = match1(html_content, r'__INITIAL_STATE__=(.*?);\(function\(\)') # FIXME
|
|
|
|
initial_state = json.loads(initial_state_text)
|
2019-02-18 01:04:18 +03:00
|
|
|
|
|
|
|
# set video title
|
2019-02-17 04:34:11 +03:00
|
|
|
self.title = initial_state['h1Title']
|
|
|
|
|
2019-02-17 17:47:53 +03:00
|
|
|
# warn if this bangumi has more than 1 video
|
|
|
|
epn = len(initial_state['epList'])
|
|
|
|
if epn > 1 and not kwargs.get('playlist'):
|
|
|
|
log.w('This bangumi currently has %s videos. (use --playlist to download all videos.)' % epn)
|
|
|
|
|
2019-02-17 04:34:11 +03:00
|
|
|
ep_id = initial_state['epInfo']['id']
|
|
|
|
avid = initial_state['epInfo']['aid']
|
|
|
|
cid = initial_state['epInfo']['cid']
|
|
|
|
api_url = self.bilibili_bangumi_api(avid, cid, ep_id)
|
|
|
|
api_content = get_content(api_url, headers=self.bilibili_headers())
|
|
|
|
data = json.loads(api_content)
|
2019-02-17 17:47:53 +03:00
|
|
|
if data['code'] < 0: # error
|
|
|
|
log.e(data['message'])
|
|
|
|
return
|
|
|
|
|
2019-02-17 04:34:11 +03:00
|
|
|
for video in data['result']['dash']['video']:
|
|
|
|
# convert height to quality code
|
|
|
|
if video['height'] == 360:
|
|
|
|
quality = 16
|
|
|
|
elif video['height'] == 480:
|
|
|
|
quality = 32
|
|
|
|
elif video['height'] == 720:
|
|
|
|
quality = 64
|
|
|
|
elif video['height'] == 1080:
|
|
|
|
quality = 80
|
|
|
|
s = self.stream_qualities[quality]
|
|
|
|
format_id = s['id']
|
|
|
|
container = s['container'].lower()
|
|
|
|
desc = s['desc']
|
|
|
|
audio_quality = s['audio_quality']
|
|
|
|
baseurl = video['baseUrl']
|
|
|
|
size = url_size(baseurl, headers=self.bilibili_headers(referer=self.url))
|
|
|
|
|
|
|
|
# find matching audio track
|
|
|
|
audio_baseurl = data['result']['dash']['audio'][0]['baseUrl']
|
|
|
|
for audio in data['result']['dash']['audio']:
|
|
|
|
if int(audio['id']) == audio_quality:
|
|
|
|
audio_baseurl = audio['baseUrl']
|
|
|
|
break
|
|
|
|
size += url_size(audio_baseurl, headers=self.bilibili_headers(referer=self.url))
|
|
|
|
|
|
|
|
self.dash_streams[format_id] = {'container': container, 'quality': desc,
|
|
|
|
'src': [[baseurl], [audio_baseurl]], 'size': size}
|
|
|
|
|
|
|
|
|
2018-08-10 12:26:29 +03:00
|
|
|
else:
|
2019-02-17 01:14:59 +03:00
|
|
|
# NOT IMPLEMENTED
|
|
|
|
pass
|
2018-08-10 12:26:29 +03:00
|
|
|
|
2019-02-17 01:14:59 +03:00
|
|
|
def extract(self, **kwargs):
|
|
|
|
# set UA and referer for downloading
|
|
|
|
headers = self.bilibili_headers(referer=self.url)
|
|
|
|
self.ua, self.referer = headers['User-Agent'], headers['Referer']
|
2018-11-11 08:03:29 +03:00
|
|
|
|
2019-02-17 01:14:59 +03:00
|
|
|
if not self.streams_sorted:
|
|
|
|
# no stream is available
|
|
|
|
return
|
2018-08-10 12:26:29 +03:00
|
|
|
|
2019-02-17 01:14:59 +03:00
|
|
|
if 'stream_id' in kwargs and kwargs['stream_id']:
|
|
|
|
# extract the stream
|
|
|
|
stream_id = kwargs['stream_id']
|
|
|
|
if stream_id not in self.streams and stream_id not in self.dash_streams:
|
|
|
|
log.e('[Error] Invalid video format.')
|
|
|
|
log.e('Run \'-i\' command with no specific video format to view all available formats.')
|
|
|
|
exit(2)
|
2018-11-26 15:57:46 +03:00
|
|
|
else:
|
2019-02-17 01:14:59 +03:00
|
|
|
# extract stream with the best quality
|
|
|
|
stream_id = self.streams_sorted[0]['id']
|
2017-07-03 08:37:58 +03:00
|
|
|
|
2019-02-17 02:55:29 +03:00
|
|
|
def download_playlist_by_url(self, url, **kwargs):
|
|
|
|
self.url = url
|
|
|
|
kwargs['playlist'] = True
|
|
|
|
|
|
|
|
html_content = get_content(self.url, headers=self.bilibili_headers())
|
|
|
|
|
2019-02-17 17:47:53 +03:00
|
|
|
# sort it out
|
|
|
|
if re.match(r'https?://(www)?\.bilibili\.com/bangumi/play/ep(\d+)', self.url):
|
|
|
|
sort = 'bangumi'
|
|
|
|
elif match1(html_content, r'<meta property="og:url" content="(https://www.bilibili.com/bangumi/play/[^"]+)"'):
|
|
|
|
sort = 'bangumi'
|
|
|
|
elif re.match(r'https?://(www)?\.bilibili\.com/video/av(\d+)', self.url):
|
|
|
|
sort = 'video'
|
|
|
|
|
|
|
|
# regular av video
|
|
|
|
if sort == 'video':
|
2019-02-17 02:55:29 +03:00
|
|
|
initial_state_text = match1(html_content, r'__INITIAL_STATE__=(.*?);\(function\(\)') # FIXME
|
|
|
|
initial_state = json.loads(initial_state_text)
|
|
|
|
aid = initial_state['videoData']['aid']
|
|
|
|
pn = initial_state['videoData']['videos']
|
|
|
|
for pi in range(1, pn + 1):
|
|
|
|
purl = 'https://www.bilibili.com/video/av%s?p=%s' % (aid, pi)
|
|
|
|
self.__class__().download_by_url(purl, **kwargs)
|
|
|
|
|
2019-02-17 17:47:53 +03:00
|
|
|
elif sort == 'bangumi':
|
|
|
|
initial_state_text = match1(html_content, r'__INITIAL_STATE__=(.*?);\(function\(\)') # FIXME
|
|
|
|
initial_state = json.loads(initial_state_text)
|
|
|
|
for ep in initial_state['epList']:
|
|
|
|
ep_id = ep['id']
|
|
|
|
epurl = 'https://www.bilibili.com/bangumi/play/ep%s/' % ep_id
|
|
|
|
self.__class__().download_by_url(epurl, **kwargs)
|
|
|
|
|
2017-07-03 08:37:58 +03:00
|
|
|
|
|
|
|
site = Bilibili()
|
|
|
|
download = site.download_by_url
|
2019-02-17 02:55:29 +03:00
|
|
|
download_playlist = site.download_playlist_by_url
|
2017-07-03 08:37:58 +03:00
|
|
|
|
|
|
|
bilibili_download = download
|