From 136f16445258d62ed62a0e013ecd2ed6c7969467 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 30 Mar 2016 21:37:21 +0200 Subject: [PATCH 1/2] [bilibili] download multi parts by default --- src/you_get/extractors/bilibili.py | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index cd918602..1a6ca325 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -127,7 +127,8 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs if re.match(r'https?://live\.bilibili\.com/', url): title = r1(r'([^<>]+)', html) bilibili_live_download_by_cid(cid, title, output_dir=output_dir, merge=merge, info_only=info_only) - elif 'playlist' in kwargs and kwargs['playlist']: + + else: # multi-P cids = [] pages = re.findall('', html) or title] + for i in range(len(cids)): bilibili_download_by_cid(cids[i], titles[i], output_dir=output_dir, merge=merge, info_only=info_only) - else: - title = r1(r'', html) or title - bilibili_download_by_cid(cid, title, output_dir=output_dir, merge=merge, info_only=info_only) elif t == 'vid': sina_download_by_vid(cid, title=title, output_dir=output_dir, merge=merge, info_only=info_only) @@ -169,14 +173,6 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs with open(os.path.join(output_dir, title + '.cmt.xml'), 'w', encoding='utf-8') as x: x.write(xml) -def bilibili_download_playlist(url, output_dir='.', merge=True, info_only=False, **kwargs): - bilibili_download(url, - output_dir=output_dir, - merge=merge, - info_only=info_only, - playlist=True, - **kwargs) - site_info = "bilibili.com" download = bilibili_download -download_playlist = bilibili_download_playlist +download_playlist = bilibili_download From 6b9e2978908fa1f55866b4c65d619756adc08cfe Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 31 Mar 2016 17:42:00 +0200 Subject: [PATCH 2/2] [embed] support netease, close #1001 --- src/you_get/extractors/embed.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/embed.py b/src/you_get/extractors/embed.py index b594b9f4..fd463c92 100644 --- a/src/you_get/extractors/embed.py +++ b/src/you_get/extractors/embed.py @@ -4,6 +4,7 @@ from ..common import * from .iqiyi import iqiyi_download_by_vid from .le import letvcloud_download_by_vu +from .netease import netease_download from .qq import qq_download_by_vid from .sina import sina_download_by_vid from .tudou import tudou_download_by_id @@ -36,10 +37,13 @@ yinyuetai_embed_patterns = [ 'player\.yinyuetai\.com/video/swf/(\d+)' ] iqiyi_embed_patterns = [ 'player\.video\.qiyi\.com/([^/]+)/[^/]+/[^/]+/[^/]+\.swf[^"]+tvId=(\d+)' ] +netease_embed_patterns = [ '(http://\w+\.163\.com/movie/[^\'"]+)' ] + def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwargs): - content = get_content(url) + content = get_content(url, headers=fake_headers) found = False title = match1(content, '([^<>]+)') + vids = matchall(content, youku_embed_patterns) for vid in set(vids): found = True @@ -60,6 +64,11 @@ def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwa found = True iqiyi_download_by_vid((vid[1], vid[0]), title=title, output_dir=output_dir, merge=merge, info_only=info_only) + urls = matchall(content, netease_embed_patterns) + for url in urls: + found = True + netease_download(url, title=title, output_dir=output_dir, merge=merge, info_only=info_only) + if not found: raise NotImplementedError(url)