diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py
index 55293533..d355eabd 100644
--- a/src/you_get/extractors/bilibili.py
+++ b/src/you_get/extractors/bilibili.py
@@ -11,12 +11,14 @@ from .youku import youku_download_by_vid
import hashlib
import re
-appkey='f3bb208b3d081dc8'
+appkey = 'f3bb208b3d081dc8'
+
def get_srt_xml(id):
url = 'http://comment.bilibili.com/%s.xml' % id
return get_html(url)
+
def parse_srt_p(p):
fields = p.split(',')
assert len(fields) == 8, fields
@@ -44,12 +46,14 @@ def parse_srt_p(p):
return pool, mode, font_size, font_color
+
def parse_srt_xml(xml):
d = re.findall(r'(.*)', xml)
for x, y in d:
p = parse_srt_p(x)
raise NotImplementedError()
+
def parse_cid_playurl(xml):
from xml.dom.minidom import parseString
try:
@@ -59,14 +63,15 @@ def parse_cid_playurl(xml):
except:
return []
+
def bilibili_download_by_cids(cids, title, output_dir='.', merge=True, info_only=False):
urls = []
for cid in cids:
url = 'http://interface.bilibili.com/playurl?appkey=' + appkey + '&cid=' + cid
urls += [i
- if not re.match(r'.*\.qqvideo\.tc\.qq\.com', i)
- else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i)
- for i in parse_cid_playurl(get_content(url))]
+ if not re.match(r'.*\.qqvideo\.tc\.qq\.com', i)
+ else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i)
+ for i in parse_cid_playurl(get_content(url))]
type_ = ''
size = 0
@@ -78,6 +83,7 @@ def bilibili_download_by_cids(cids, title, output_dir='.', merge=True, info_only
if not info_only:
download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge)
+
def bilibili_download_by_cid(cid, title, output_dir='.', merge=True, info_only=False):
url = 'http://interface.bilibili.com/playurl?appkey=' + appkey + '&cid=' + cid
urls = [i
@@ -98,6 +104,7 @@ def bilibili_download_by_cid(cid, title, output_dir='.', merge=True, info_only=F
if not info_only:
download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge)
+
def bilibili_live_download_by_cid(cid, title, output_dir='.', merge=True, info_only=False):
api_url = 'http://live.bilibili.com/api/playurl?cid=' + cid
urls = parse_cid_playurl(get_content(api_url))
@@ -109,6 +116,7 @@ def bilibili_live_download_by_cid(cid, title, output_dir='.', merge=True, info_o
if not info_only:
download_urls([url], title, type_, total_size=None, output_dir=output_dir, merge=merge)
+
def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_content(url)
@@ -118,9 +126,10 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs
title = unescape_html(title)
title = escape_file_path(title)
- flashvars = r1_of([r'(cid=\d+)', r'(cid: \d+)', r'flashvars="([^"]+)"', r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
+ flashvars = r1_of([r'(cid=\d+)', r'(cid: \d+)', r'flashvars="([^"]+)"',
+ r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
assert flashvars
- flashvars = flashvars.replace(': ','=')
+ flashvars = flashvars.replace(': ', '=')
t, cid = flashvars.split('=', 1)
cid = cid.split('&')[0]
if t == 'cid':
@@ -133,7 +142,7 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs
cids = []
pages = re.findall('', html)
- for page in pages:
+ for i, page in enumerate(pages):
html = get_html("http://www.bilibili.com%s" % page)
flashvars = r1_of([r'(cid=\d+)',
r'flashvars="([^"]+)"',
@@ -141,6 +150,10 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs
if flashvars:
t, cid = flashvars.split('=', 1)
cids.append(cid.split('&')[0])
+ if url.endswith(page):
+ cids = [cid.split('&')[0]]
+ titles = [titles[i]]
+ break
# no multi-P
if not pages:
@@ -173,6 +186,7 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs
with open(os.path.join(output_dir, title + '.cmt.xml'), 'w', encoding='utf-8') as x:
x.write(xml)
+
site_info = "bilibili.com"
download = bilibili_download
download_playlist = bilibili_download