From fbf5c491aa8f20263c5fa0c36f7bb01c468f8c91 Mon Sep 17 00:00:00 2001 From: chinat Date: Wed, 27 Jul 2016 13:42:06 +0800 Subject: [PATCH] [bilibili] video title may have space around --- src/you_get/extractors/bilibili.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 24782598..5c010a80 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -125,8 +125,8 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs url = r1(r'"([^"]+)" class="v-av-link"', html) html = get_content(url) - title = r1_of([r'', - r']*>([^<>]+)'], html) + title = r1_of([r'', + r']*>\s*([^<>]+)\s*'], html) if title: title = unescape_html(title) title = escape_file_path(title) @@ -139,14 +139,14 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs cid = cid.split('&')[0] if t == 'cid': if re.match(r'https?://live\.bilibili\.com/', url): - title = r1(r'([^<>]+)', html) + title = r1(r'\s*([^<>]+)\s*', html) bilibili_live_download_by_cid(cid, title, output_dir=output_dir, merge=merge, info_only=info_only) else: # multi-P cids = [] pages = re.findall('', html) + titles = re.findall('', html) for i, page in enumerate(pages): html = get_html("http://www.bilibili.com%s" % page) flashvars = r1_of([r'(cid=\d+)', @@ -163,7 +163,7 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs # no multi-P if not pages: cids = [cid] - titles = [r1(r'', html) or title] + titles = [r1(r'', html) or title] for i in range(len(cids)): bilibili_download_by_cid(cids[i],