From 80ea797cb688a8022715485255239b9b51d1dd04 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 15 Mar 2016 05:11:34 +0100 Subject: [PATCH] [bilibili] add support for live.bilibili.com (close #986) --- src/you_get/extractors/bilibili.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 7522a6e5..cd918602 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -98,15 +98,25 @@ def bilibili_download_by_cid(cid, title, output_dir='.', merge=True, info_only=F if not info_only: download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge) +def bilibili_live_download_by_cid(cid, title, output_dir='.', merge=True, info_only=False): + api_url = 'http://live.bilibili.com/api/playurl?cid=' + cid + urls = parse_cid_playurl(get_content(api_url)) + + for url in urls: + _, type_, _ = url_info(url) + size = 0 + print_info(site_info, title, type_, size) + if not info_only: + download_urls([url], title, type_, total_size=None, output_dir=output_dir, merge=merge) + def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = get_content(url) title = r1_of([r'', r']*>([^<>]+)'], html) - if not title: - log.wtf('[Failed] Video does not exist. Try to login with --cookies.') - title = unescape_html(title) - title = escape_file_path(title) + if title: + title = unescape_html(title) + title = escape_file_path(title) flashvars = r1_of([r'(cid=\d+)', r'(cid: \d+)', r'flashvars="([^"]+)"', r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html) assert flashvars @@ -114,7 +124,10 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs t, cid = flashvars.split('=', 1) cid = cid.split('&')[0] if t == 'cid': - if 'playlist' in kwargs and kwargs['playlist']: + if re.match(r'https?://live\.bilibili\.com/', url): + title = r1(r'([^<>]+)', html) + bilibili_live_download_by_cid(cid, title, output_dir=output_dir, merge=merge, info_only=info_only) + elif 'playlist' in kwargs and kwargs['playlist']: # multi-P cids = [] pages = re.findall('