#!/usr/bin/env python
__all__ = ['bilibili_download']
import hashlib
import re
import time
import json
import http.cookiejar
import urllib.request
import urllib.parse
from xml.dom.minidom import parseString
from ..common import *
from ..util.log import *
from ..extractor import *
from .qq import qq_download_by_vid
from .sina import sina_download_by_vid
from .tudou import tudou_download_by_id
from .youku import youku_download_by_vid
class Bilibili(VideoExtractor):
name = 'Bilibili'
live_api = 'http://live.bilibili.com/api/playurl?cid={}&otype=json'
api_url = 'http://interface.bilibili.com/playurl?'
bangumi_api_url = 'http://bangumi.bilibili.com/player/web_api/playurl?'
live_room_init_api_url = 'https://api.live.bilibili.com/room/v1/Room/room_init?id={}'
live_room_info_api_url = 'https://api.live.bilibili.com/room/v1/Room/get_info?room_id={}'
SEC1 = '1c15888dc316e05a15fdd0a02ed6584f'
SEC2 = '9b288147e5474dd2aa67085f716c560d'
stream_types = [
{'id': 'hdflv'},
{'id': 'flv720'},
{'id': 'flv'},
{'id': 'hdmp4'},
{'id': 'mp4'},
{'id': 'live'},
{'id': 'vc'}
]
fmt2qlt = dict(hdflv=4, flv=3, hdmp4=2, mp4=1)
@staticmethod
def bilibili_stream_type(urls):
url = urls[0]
if 'hd.flv' in url or '-112.flv' in url:
return 'hdflv', 'flv'
if '-64.flv' in url:
return 'flv720', 'flv'
if '.flv' in url:
return 'flv', 'flv'
if 'hd.mp4' in url or '-48.mp4' in url:
return 'hdmp4', 'mp4'
if '.mp4' in url:
return 'mp4', 'mp4'
raise Exception('Unknown stream type')
def api_req(self, cid, quality, bangumi, bangumi_movie=False, **kwargs):
ts = str(int(time.time()))
if not bangumi:
params_str = 'cid={}&player=1&quality={}&ts={}'.format(cid, quality, ts)
chksum = hashlib.md5(bytes(params_str+self.SEC1, 'utf8')).hexdigest()
api_url = self.api_url + params_str + '&sign=' + chksum
else:
mod = 'movie' if bangumi_movie else 'bangumi'
params_str = 'cid={}&module={}&player=1&quality={}&ts={}'.format(cid, mod, quality, ts)
chksum = hashlib.md5(bytes(params_str+self.SEC2, 'utf8')).hexdigest()
api_url = self.bangumi_api_url + params_str + '&sign=' + chksum
xml_str = get_content(api_url, headers={'referer': self.url, 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'})
return xml_str
def parse_bili_xml(self, xml_str):
urls_list = []
total_size = 0
doc = parseString(xml_str.encode('utf8'))
durls = doc.getElementsByTagName('durl')
for durl in durls:
size = durl.getElementsByTagName('size')[0]
total_size += int(size.firstChild.nodeValue)
url = durl.getElementsByTagName('url')[0]
urls_list.append(url.firstChild.nodeValue)
stream_type, container = self.bilibili_stream_type(urls_list)
if stream_type not in self.streams:
self.streams[stream_type] = {}
self.streams[stream_type]['src'] = urls_list
self.streams[stream_type]['size'] = total_size
self.streams[stream_type]['container'] = container
def download_by_vid(self, cid, bangumi, **kwargs):
stream_id = kwargs.get('stream_id')
# guard here. if stream_id invalid, fallback as not stream_id
if stream_id and stream_id in self.fmt2qlt:
quality = stream_id
else:
quality = 'hdflv' if bangumi else 'flv'
info_only = kwargs.get('info_only')
for qlt in range(4, -1, -1):
api_xml = self.api_req(cid, qlt, bangumi, **kwargs)
self.parse_bili_xml(api_xml)
if not info_only or stream_id:
self.danmuku = get_danmuku_xml(cid)
def prepare(self, **kwargs):
if socket.getdefaulttimeout() == 600: # no timeout specified
socket.setdefaulttimeout(2) # fail fast, very speedy!
# handle "watchlater" URLs
if '/watchlater/' in self.url:
aid = re.search(r'av(\d+)', self.url).group(1)
self.url = 'http://www.bilibili.com/video/av{}/'.format(aid)
self.ua = fake_headers['User-Agent']
self.url = url_locations([self.url])[0]
frag = urllib.parse.urlparse(self.url).fragment
# http://www.bilibili.com/video/av3141144/index_2.html#page=3
if frag:
hit = re.search(r'page=(\d+)', frag)
if hit is not None:
page = hit.group(1)
aid = re.search(r'av(\d+)', self.url).group(1)
self.url = 'http://www.bilibili.com/video/av{}/index_{}.html'.format(aid, page)
self.referer = self.url
self.page = get_content(self.url)
m = re.search(r'
(.*?)', self.page) or re.search(r'
', self.page)
if m is not None:
self.title = m.group(1)
if self.title is None:
m = re.search(r'property="og:title" content="([^"]+)"', self.page)
if m is not None:
self.title = m.group(1)
if 'subtitle' in kwargs:
subtitle = kwargs['subtitle']
self.title = '{} {}'.format(self.title, subtitle)
if 'bangumi.bilibili.com/movie' in self.url:
self.movie_entry(**kwargs)
elif 'bangumi.bilibili.com' in self.url:
self.bangumi_entry(**kwargs)
elif 'bangumi/' in self.url:
self.bangumi_entry(**kwargs)
elif 'live.bilibili.com' in self.url:
self.live_entry(**kwargs)
elif 'vc.bilibili.com' in self.url:
self.vc_entry(**kwargs)
else:
self.entry(**kwargs)
def movie_entry(self, **kwargs):
patt = r"var\s*aid\s*=\s*'(\d+)'"
aid = re.search(patt, self.page).group(1)
page_list = json.loads(get_content('http://www.bilibili.com/widget/getPageList?aid={}'.format(aid)))
# better ideas for bangumi_movie titles?
self.title = page_list[0]['pagename']
self.download_by_vid(page_list[0]['cid'], True, bangumi_movie=True, **kwargs)
def entry(self, **kwargs):
# tencent player
tc_flashvars = re.search(r'"bili-cid=\d+&bili-aid=\d+&vid=([^"]+)"', self.page)
if tc_flashvars:
tc_flashvars = tc_flashvars.group(1)
if tc_flashvars is not None:
self.out = True
qq_download_by_vid(tc_flashvars, self.title, output_dir=kwargs['output_dir'], merge=kwargs['merge'], info_only=kwargs['info_only'])
return
has_plist = re.search(r'