diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 55293533..d355eabd 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -11,12 +11,14 @@ from .youku import youku_download_by_vid import hashlib import re -appkey='f3bb208b3d081dc8' +appkey = 'f3bb208b3d081dc8' + def get_srt_xml(id): url = 'http://comment.bilibili.com/%s.xml' % id return get_html(url) + def parse_srt_p(p): fields = p.split(',') assert len(fields) == 8, fields @@ -44,12 +46,14 @@ def parse_srt_p(p): return pool, mode, font_size, font_color + def parse_srt_xml(xml): d = re.findall(r'(.*)', xml) for x, y in d: p = parse_srt_p(x) raise NotImplementedError() + def parse_cid_playurl(xml): from xml.dom.minidom import parseString try: @@ -59,14 +63,15 @@ def parse_cid_playurl(xml): except: return [] + def bilibili_download_by_cids(cids, title, output_dir='.', merge=True, info_only=False): urls = [] for cid in cids: url = 'http://interface.bilibili.com/playurl?appkey=' + appkey + '&cid=' + cid urls += [i - if not re.match(r'.*\.qqvideo\.tc\.qq\.com', i) - else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i) - for i in parse_cid_playurl(get_content(url))] + if not re.match(r'.*\.qqvideo\.tc\.qq\.com', i) + else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i) + for i in parse_cid_playurl(get_content(url))] type_ = '' size = 0 @@ -78,6 +83,7 @@ def bilibili_download_by_cids(cids, title, output_dir='.', merge=True, info_only if not info_only: download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge) + def bilibili_download_by_cid(cid, title, output_dir='.', merge=True, info_only=False): url = 'http://interface.bilibili.com/playurl?appkey=' + appkey + '&cid=' + cid urls = [i @@ -98,6 +104,7 @@ def bilibili_download_by_cid(cid, title, output_dir='.', merge=True, info_only=F if not info_only: download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge) + def bilibili_live_download_by_cid(cid, title, output_dir='.', merge=True, info_only=False): api_url = 'http://live.bilibili.com/api/playurl?cid=' + cid urls = parse_cid_playurl(get_content(api_url)) @@ -109,6 +116,7 @@ def bilibili_live_download_by_cid(cid, title, output_dir='.', merge=True, info_o if not info_only: download_urls([url], title, type_, total_size=None, output_dir=output_dir, merge=merge) + def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = get_content(url) @@ -118,9 +126,10 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs title = unescape_html(title) title = escape_file_path(title) - flashvars = r1_of([r'(cid=\d+)', r'(cid: \d+)', r'flashvars="([^"]+)"', r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html) + flashvars = r1_of([r'(cid=\d+)', r'(cid: \d+)', r'flashvars="([^"]+)"', + r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html) assert flashvars - flashvars = flashvars.replace(': ','=') + flashvars = flashvars.replace(': ', '=') t, cid = flashvars.split('=', 1) cid = cid.split('&')[0] if t == 'cid': @@ -133,7 +142,7 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs cids = [] pages = re.findall('', html) - for page in pages: + for i, page in enumerate(pages): html = get_html("http://www.bilibili.com%s" % page) flashvars = r1_of([r'(cid=\d+)', r'flashvars="([^"]+)"', @@ -141,6 +150,10 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs if flashvars: t, cid = flashvars.split('=', 1) cids.append(cid.split('&')[0]) + if url.endswith(page): + cids = [cid.split('&')[0]] + titles = [titles[i]] + break # no multi-P if not pages: @@ -173,6 +186,7 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs with open(os.path.join(output_dir, title + '.cmt.xml'), 'w', encoding='utf-8') as x: x.write(xml) + site_info = "bilibili.com" download = bilibili_download download_playlist = bilibili_download