2012-09-02 00:02:14 +04:00
|
|
|
#!/usr/bin/env python
|
|
|
|
|
|
|
|
__all__ = ['bilibili_download']
|
|
|
|
|
|
|
|
from ..common import *
|
|
|
|
|
|
|
|
from .sina import sina_download_by_id
|
|
|
|
from .tudou import tudou_download_by_id
|
|
|
|
from .youku import youku_download_by_id
|
|
|
|
|
|
|
|
import re
|
|
|
|
|
|
|
|
def get_srt_xml(id):
|
2012-09-16 12:50:35 +04:00
|
|
|
url = 'http://comment.bilibili.tv/%s.xml' % id
|
2012-09-02 00:02:14 +04:00
|
|
|
return get_html(url)
|
|
|
|
|
|
|
|
def parse_srt_p(p):
|
|
|
|
fields = p.split(',')
|
|
|
|
assert len(fields) == 8, fields
|
|
|
|
time, mode, font_size, font_color, pub_time, pool, user_id, history = fields
|
|
|
|
time = float(time)
|
|
|
|
|
|
|
|
mode = int(mode)
|
|
|
|
assert 1 <= mode <= 8
|
|
|
|
# mode 1~3: scrolling
|
|
|
|
# mode 4: bottom
|
|
|
|
# mode 5: top
|
|
|
|
# mode 6: reverse?
|
|
|
|
# mode 7: position
|
|
|
|
# mode 8: advanced
|
|
|
|
|
|
|
|
pool = int(pool)
|
|
|
|
assert 0 <= pool <= 2
|
|
|
|
# pool 0: normal
|
|
|
|
# pool 1: srt
|
|
|
|
# pool 2: special?
|
|
|
|
|
|
|
|
font_size = int(font_size)
|
|
|
|
|
|
|
|
font_color = '#%06x' % int(font_color)
|
|
|
|
|
|
|
|
return pool, mode, font_size, font_color
|
|
|
|
|
|
|
|
def parse_srt_xml(xml):
|
|
|
|
d = re.findall(r'<d p="([^"]+)">(.*)</d>', xml)
|
|
|
|
for x, y in d:
|
|
|
|
p = parse_srt_p(x)
|
|
|
|
raise NotImplementedError()
|
|
|
|
|
2012-09-16 12:50:35 +04:00
|
|
|
def parse_cid_playurl(xml):
|
|
|
|
from xml.dom.minidom import parseString
|
|
|
|
doc = parseString(xml.encode('utf-8'))
|
|
|
|
urls = [durl.getElementsByTagName('url')[0].firstChild.nodeValue for durl in doc.getElementsByTagName('durl')]
|
|
|
|
return urls
|
|
|
|
|
|
|
|
def bilibili_download_by_cid(id, title, output_dir = '.', merge = True, info_only = False):
|
|
|
|
url = 'http://interface.bilibili.tv/playurl?cid=' + id
|
|
|
|
urls = parse_cid_playurl(get_html(url, 'utf-8'))
|
|
|
|
assert re.search(r'\.(flv|hlv)\b', urls[0]), urls[0]
|
|
|
|
|
|
|
|
size = 0
|
|
|
|
for url in urls:
|
|
|
|
_, _, temp = url_info(url)
|
|
|
|
size += temp
|
|
|
|
|
|
|
|
print_info(site_info, title, 'flv', size)
|
|
|
|
if not info_only:
|
|
|
|
download_urls(urls, title, 'flv', total_size = None, output_dir = output_dir, merge = merge)
|
|
|
|
|
2012-09-02 00:02:14 +04:00
|
|
|
def bilibili_download(url, output_dir = '.', merge = True, info_only = False):
|
|
|
|
assert re.match(r'http://(www.bilibili.tv|bilibili.kankanews.com)/video/av(\d+)', url)
|
|
|
|
html = get_html(url)
|
|
|
|
|
|
|
|
title = r1(r'<h2>([^<>]+)</h2>', html)
|
|
|
|
title = unescape_html(title)
|
|
|
|
title = escape_file_path(title)
|
|
|
|
|
2012-09-16 12:50:35 +04:00
|
|
|
flashvars = r1_of([r'flashvars="([^"]+)"', r'"https://secure.bilibili.tv/secure,(cid=\d+)"'], html)
|
2012-09-02 00:02:14 +04:00
|
|
|
assert flashvars
|
|
|
|
t, id = flashvars.split('=', 1)
|
2012-09-16 12:50:35 +04:00
|
|
|
if t == 'cid':
|
|
|
|
bilibili_download_by_cid(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
|
|
|
elif t == 'vid':
|
2012-09-02 00:02:14 +04:00
|
|
|
sina_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
|
|
|
elif t == 'ykid':
|
|
|
|
youku_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
|
|
|
elif t == 'uid':
|
|
|
|
tudou_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
|
|
|
else:
|
|
|
|
raise NotImplementedError(flashvars)
|
|
|
|
|
2012-09-16 12:50:35 +04:00
|
|
|
if not info_only:
|
|
|
|
print('Downloading %s ...' % (title + '.cmt.xml'))
|
|
|
|
xml = get_srt_xml(id)
|
|
|
|
with open(title + '.cmt.xml', 'w') as x:
|
|
|
|
x.write(xml)
|
2012-09-02 00:02:14 +04:00
|
|
|
|
|
|
|
site_info = "bilibili.tv"
|
|
|
|
download = bilibili_download
|
|
|
|
download_playlist = playlist_not_supported('bilibili')
|