diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 7243058d..faa628cd 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -89,9 +89,9 @@ def bilibili_download_by_cids(cids, title, output_dir='.', merge=True, info_only if not info_only: download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge) -def bilibili_download_by_cid(id, title, output_dir='.', merge=True, info_only=False): - sign_this = hashlib.md5(bytes('appkey=' + appkey + '&cid=' + id + secretkey, 'utf-8')).hexdigest() - url = 'http://interface.bilibili.com/playurl?appkey=' + appkey + '&cid=' + id + '&sign=' + sign_this +def bilibili_download_by_cid(cid, title, output_dir='.', merge=True, info_only=False): + sign_this = hashlib.md5(bytes('appkey=' + appkey + '&cid=' + cid + secretkey, 'utf-8')).hexdigest() + url = 'http://interface.bilibili.com/playurl?appkey=' + appkey + '&cid=' + cid + '&sign=' + sign_this urls = [i if not re.match(r'.*\.qqvideo\.tc\.qq\.com', i) else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i) @@ -110,46 +110,66 @@ def bilibili_download_by_cid(id, title, output_dir='.', merge=True, info_only=Fa def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = get_html(url) - title = r1_of([r'',r']*>([^<>]+)'], html) + title = r1_of([r'', + r']*>([^<>]+)'], html) + title = title.split('\r')[0] title = unescape_html(title) title = escape_file_path(title) flashvars = r1_of([r'(cid=\d+)', r'(cid: \d+)', r'flashvars="([^"]+)"', r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html) assert flashvars flashvars = flashvars.replace(': ','=') - t, id = flashvars.split('=', 1) - id = id.split('&')[0] + t, cid = flashvars.split('=', 1) + cid = cid.split('&')[0] if t == 'cid': - # Multi-P - cids = [id] - p = re.findall('', html) + for page in pages: + html = get_html("http://www.bilibili.com%s" % page) + flashvars = r1_of([r'(cid=\d+)', + r'flashvars="([^"]+)"', + r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html) if flashvars: t, cid = flashvars.split('=', 1) cids.append(cid.split('&')[0]) - bilibili_download_by_cids(cids, title, output_dir=output_dir, merge=merge, info_only=info_only) + for i in range(len(cids)): + bilibili_download_by_cid(cids[i], + titles[i], + output_dir=output_dir, + merge=merge, + info_only=info_only) + else: + title = r1(r'', html) or title + bilibili_download_by_cid(cid, title, output_dir=output_dir, merge=merge, info_only=info_only) elif t == 'vid': - sina_download_by_vid(id, title, output_dir = output_dir, merge = merge, info_only = info_only) + sina_download_by_vid(cid, title=title, output_dir=output_dir, merge=merge, info_only=info_only) elif t == 'ykid': - youku_download_by_vid(id, title=title, output_dir = output_dir, merge = merge, info_only = info_only) + youku_download_by_vid(cid, title=title, output_dir=output_dir, merge=merge, info_only=info_only) elif t == 'uid': - tudou_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only) + tudou_download_by_id(cid, title, output_dir=output_dir, merge=merge, info_only=info_only) else: raise NotImplementedError(flashvars) if not info_only and not dry_run: title = get_filename(title) print('Downloading %s ...\n' % (title + '.cmt.xml')) - xml = get_srt_xml(id) + xml = get_srt_xml(cid) with open(os.path.join(output_dir, title + '.cmt.xml'), 'w', encoding='utf-8') as x: x.write(xml) +def bilibili_download_playlist(url, output_dir='.', merge=True, info_only=False, **kwargs): + bilibili_download(url, + output_dir=output_dir, + merge=merge, + info_only=info_only, + playlist=True, + **kwargs) + site_info = "bilibili.com" download = bilibili_download -download_playlist = playlist_not_supported('bilibili') +download_playlist = bilibili_download_playlist