2012-09-02 00:02:14 +04:00
|
|
|
#!/usr/bin/env python
|
|
|
|
|
|
|
|
__all__ = ['bilibili_download']
|
|
|
|
|
|
|
|
from ..common import *
|
|
|
|
|
2013-07-14 19:34:42 +04:00
|
|
|
from .sina import sina_download_by_vid
|
2012-09-02 00:02:14 +04:00
|
|
|
from .tudou import tudou_download_by_id
|
2014-06-24 05:59:47 +04:00
|
|
|
from .youku import youku_download_by_vid
|
2012-09-02 00:02:14 +04:00
|
|
|
|
2014-08-03 18:00:50 +04:00
|
|
|
import hashlib
|
2012-09-02 00:02:14 +04:00
|
|
|
import re
|
|
|
|
|
2016-03-10 21:25:18 +03:00
|
|
|
appkey='8e9fc618fbd41e28'
|
2014-08-03 18:00:50 +04:00
|
|
|
|
2012-09-02 00:02:14 +04:00
|
|
|
def get_srt_xml(id):
|
2014-06-18 03:14:11 +04:00
|
|
|
url = 'http://comment.bilibili.com/%s.xml' % id
|
2012-09-02 00:02:14 +04:00
|
|
|
return get_html(url)
|
|
|
|
|
|
|
|
def parse_srt_p(p):
|
|
|
|
fields = p.split(',')
|
|
|
|
assert len(fields) == 8, fields
|
|
|
|
time, mode, font_size, font_color, pub_time, pool, user_id, history = fields
|
|
|
|
time = float(time)
|
2014-05-21 04:39:35 +04:00
|
|
|
|
2012-09-02 00:02:14 +04:00
|
|
|
mode = int(mode)
|
|
|
|
assert 1 <= mode <= 8
|
|
|
|
# mode 1~3: scrolling
|
|
|
|
# mode 4: bottom
|
|
|
|
# mode 5: top
|
|
|
|
# mode 6: reverse?
|
|
|
|
# mode 7: position
|
|
|
|
# mode 8: advanced
|
2014-05-21 04:39:35 +04:00
|
|
|
|
2012-09-02 00:02:14 +04:00
|
|
|
pool = int(pool)
|
|
|
|
assert 0 <= pool <= 2
|
|
|
|
# pool 0: normal
|
|
|
|
# pool 1: srt
|
|
|
|
# pool 2: special?
|
2014-05-21 04:39:35 +04:00
|
|
|
|
2012-09-02 00:02:14 +04:00
|
|
|
font_size = int(font_size)
|
2014-05-21 04:39:35 +04:00
|
|
|
|
2012-09-02 00:02:14 +04:00
|
|
|
font_color = '#%06x' % int(font_color)
|
2014-05-21 04:39:35 +04:00
|
|
|
|
2012-09-02 00:02:14 +04:00
|
|
|
return pool, mode, font_size, font_color
|
|
|
|
|
|
|
|
def parse_srt_xml(xml):
|
|
|
|
d = re.findall(r'<d p="([^"]+)">(.*)</d>', xml)
|
|
|
|
for x, y in d:
|
|
|
|
p = parse_srt_p(x)
|
|
|
|
raise NotImplementedError()
|
|
|
|
|
2012-09-16 12:50:35 +04:00
|
|
|
def parse_cid_playurl(xml):
|
|
|
|
from xml.dom.minidom import parseString
|
2014-10-05 23:57:38 +04:00
|
|
|
try:
|
|
|
|
doc = parseString(xml.encode('utf-8'))
|
|
|
|
urls = [durl.getElementsByTagName('url')[0].firstChild.nodeValue for durl in doc.getElementsByTagName('durl')]
|
|
|
|
return urls
|
|
|
|
except:
|
|
|
|
return []
|
2012-09-16 12:50:35 +04:00
|
|
|
|
2014-08-03 21:25:43 +04:00
|
|
|
def bilibili_download_by_cids(cids, title, output_dir='.', merge=True, info_only=False):
|
2014-10-05 23:57:38 +04:00
|
|
|
urls = []
|
2014-08-03 21:25:43 +04:00
|
|
|
for cid in cids:
|
2016-03-10 21:25:18 +03:00
|
|
|
url = 'http://interface.bilibili.com/playurl?appkey=' + appkey + '&cid=' + cid
|
2014-08-03 21:25:43 +04:00
|
|
|
urls += [i
|
|
|
|
if not re.match(r'.*\.qqvideo\.tc\.qq\.com', i)
|
|
|
|
else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i)
|
2016-03-10 21:25:18 +03:00
|
|
|
for i in parse_cid_playurl(get_content(url))]
|
2014-08-03 21:25:43 +04:00
|
|
|
|
2015-05-26 17:59:19 +03:00
|
|
|
type_ = ''
|
2014-08-03 21:25:43 +04:00
|
|
|
size = 0
|
|
|
|
for url in urls:
|
2015-05-26 17:59:19 +03:00
|
|
|
_, type_, temp = url_info(url)
|
2014-08-03 21:25:43 +04:00
|
|
|
size += temp
|
|
|
|
|
2015-05-26 17:59:19 +03:00
|
|
|
print_info(site_info, title, type_, size)
|
2014-08-03 21:25:43 +04:00
|
|
|
if not info_only:
|
2015-05-26 17:59:19 +03:00
|
|
|
download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge)
|
2014-08-03 21:25:43 +04:00
|
|
|
|
2015-10-17 02:10:10 +03:00
|
|
|
def bilibili_download_by_cid(cid, title, output_dir='.', merge=True, info_only=False):
|
2016-03-10 21:25:18 +03:00
|
|
|
url = 'http://interface.bilibili.com/playurl?appkey=' + appkey + '&cid=' + cid
|
2014-08-03 21:25:43 +04:00
|
|
|
urls = [i
|
|
|
|
if not re.match(r'.*\.qqvideo\.tc\.qq\.com', i)
|
|
|
|
else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i)
|
2016-03-10 21:25:18 +03:00
|
|
|
for i in parse_cid_playurl(get_content(url))]
|
2014-05-21 04:39:35 +04:00
|
|
|
|
2015-05-26 17:59:19 +03:00
|
|
|
type_ = ''
|
2012-09-16 12:50:35 +04:00
|
|
|
size = 0
|
2016-03-03 06:00:44 +03:00
|
|
|
try:
|
|
|
|
for url in urls:
|
|
|
|
_, type_, temp = url_info(url)
|
|
|
|
size += temp or 0
|
2016-03-10 21:25:18 +03:00
|
|
|
except error.URLError:
|
2016-03-03 06:00:44 +03:00
|
|
|
log.wtf('[Failed] DNS not resolved. Please change your DNS server settings.')
|
2014-05-21 04:39:35 +04:00
|
|
|
|
2015-05-26 17:59:19 +03:00
|
|
|
print_info(site_info, title, type_, size)
|
2012-09-16 12:50:35 +04:00
|
|
|
if not info_only:
|
2015-05-26 17:59:19 +03:00
|
|
|
download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge)
|
2012-09-16 12:50:35 +04:00
|
|
|
|
2016-03-15 07:11:34 +03:00
|
|
|
def bilibili_live_download_by_cid(cid, title, output_dir='.', merge=True, info_only=False):
|
|
|
|
api_url = 'http://live.bilibili.com/api/playurl?cid=' + cid
|
|
|
|
urls = parse_cid_playurl(get_content(api_url))
|
|
|
|
|
|
|
|
for url in urls:
|
|
|
|
_, type_, _ = url_info(url)
|
|
|
|
size = 0
|
|
|
|
print_info(site_info, title, type_, size)
|
|
|
|
if not info_only:
|
|
|
|
download_urls([url], title, type_, total_size=None, output_dir=output_dir, merge=merge)
|
|
|
|
|
2015-09-26 08:45:39 +03:00
|
|
|
def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
2015-10-17 04:37:20 +03:00
|
|
|
html = get_content(url)
|
2014-05-21 04:39:35 +04:00
|
|
|
|
2015-10-17 06:27:10 +03:00
|
|
|
title = r1_of([r'<meta name="title" content="([^<>]{1,999})" />',
|
2015-10-17 02:10:10 +03:00
|
|
|
r'<h1[^>]*>([^<>]+)</h1>'], html)
|
2016-03-15 07:11:34 +03:00
|
|
|
if title:
|
|
|
|
title = unescape_html(title)
|
|
|
|
title = escape_file_path(title)
|
2014-05-21 04:39:35 +04:00
|
|
|
|
2014-10-23 19:18:37 +04:00
|
|
|
flashvars = r1_of([r'(cid=\d+)', r'(cid: \d+)', r'flashvars="([^"]+)"', r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
|
2012-09-02 00:02:14 +04:00
|
|
|
assert flashvars
|
2014-10-23 19:18:37 +04:00
|
|
|
flashvars = flashvars.replace(': ','=')
|
2015-10-17 02:10:10 +03:00
|
|
|
t, cid = flashvars.split('=', 1)
|
|
|
|
cid = cid.split('&')[0]
|
2012-09-16 12:50:35 +04:00
|
|
|
if t == 'cid':
|
2016-03-15 07:11:34 +03:00
|
|
|
if re.match(r'https?://live\.bilibili\.com/', url):
|
|
|
|
title = r1(r'<title>([^<>]+)</title>', html)
|
|
|
|
bilibili_live_download_by_cid(cid, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
2016-03-30 22:37:21 +03:00
|
|
|
|
|
|
|
else:
|
2015-10-17 02:10:10 +03:00
|
|
|
# multi-P
|
|
|
|
cids = []
|
|
|
|
pages = re.findall('<option value=\'([^\']*)\'', html)
|
|
|
|
titles = re.findall('<option value=.*>(.+)</option>', html)
|
|
|
|
for page in pages:
|
|
|
|
html = get_html("http://www.bilibili.com%s" % page)
|
|
|
|
flashvars = r1_of([r'(cid=\d+)',
|
|
|
|
r'flashvars="([^"]+)"',
|
|
|
|
r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
|
2014-10-05 23:57:38 +04:00
|
|
|
if flashvars:
|
|
|
|
t, cid = flashvars.split('=', 1)
|
|
|
|
cids.append(cid.split('&')[0])
|
2016-03-30 22:37:21 +03:00
|
|
|
|
|
|
|
# no multi-P
|
|
|
|
if not pages:
|
|
|
|
cids = [cid]
|
|
|
|
titles = [r1(r'<option value=.* selected>(.+)</option>', html) or title]
|
|
|
|
|
2015-10-17 02:10:10 +03:00
|
|
|
for i in range(len(cids)):
|
|
|
|
bilibili_download_by_cid(cids[i],
|
|
|
|
titles[i],
|
|
|
|
output_dir=output_dir,
|
|
|
|
merge=merge,
|
|
|
|
info_only=info_only)
|
2014-08-03 21:25:43 +04:00
|
|
|
|
2012-09-16 12:50:35 +04:00
|
|
|
elif t == 'vid':
|
2015-10-17 02:10:10 +03:00
|
|
|
sina_download_by_vid(cid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
|
2012-09-02 00:02:14 +04:00
|
|
|
elif t == 'ykid':
|
2015-10-17 02:10:10 +03:00
|
|
|
youku_download_by_vid(cid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
|
2012-09-02 00:02:14 +04:00
|
|
|
elif t == 'uid':
|
2015-10-17 02:10:10 +03:00
|
|
|
tudou_download_by_id(cid, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
2012-09-02 00:02:14 +04:00
|
|
|
else:
|
|
|
|
raise NotImplementedError(flashvars)
|
2014-05-21 04:39:35 +04:00
|
|
|
|
2015-10-16 23:34:19 +03:00
|
|
|
if not info_only and not dry_run:
|
2016-01-08 19:21:49 +03:00
|
|
|
if not kwargs['caption']:
|
|
|
|
print('Skipping danmaku.')
|
|
|
|
return
|
2014-05-21 04:39:35 +04:00
|
|
|
title = get_filename(title)
|
2014-05-29 04:42:57 +04:00
|
|
|
print('Downloading %s ...\n' % (title + '.cmt.xml'))
|
2015-10-17 02:10:10 +03:00
|
|
|
xml = get_srt_xml(cid)
|
2014-03-08 22:49:51 +04:00
|
|
|
with open(os.path.join(output_dir, title + '.cmt.xml'), 'w', encoding='utf-8') as x:
|
2012-09-16 12:50:35 +04:00
|
|
|
x.write(xml)
|
2012-09-02 00:02:14 +04:00
|
|
|
|
2014-06-18 03:14:11 +04:00
|
|
|
site_info = "bilibili.com"
|
2012-09-02 00:02:14 +04:00
|
|
|
download = bilibili_download
|
2016-03-30 22:37:21 +03:00
|
|
|
download_playlist = bilibili_download
|