From b746ac01c9f39de94cac2d56f665285b0523b974 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 29 Apr 2019 00:29:28 +0200 Subject: [PATCH] [universal] update --- src/you_get/extractors/universal.py | 46 +++++++++++++++++------------ 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/src/you_get/extractors/universal.py b/src/you_get/extractors/universal.py index 69ef5d90..8b9a24c9 100644 --- a/src/you_get/extractors/universal.py +++ b/src/you_get/extractors/universal.py @@ -33,27 +33,35 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg meta_videos = re.findall(r']+' + i + r'[^ ;&"\'\\<>]*)', page) p_urls = re.findall(r'(https?%3A%2F%2F[^;&"]+' + i + r'[^;&"]*)', page) urls += [parse.unquote(url) for url in p_urls] - q_urls = re.findall(r'(https?:\\\\/\\\\/[^ ;"\']+' + i + r'[^ ;"\']*)', page) + q_urls = re.findall(r'(https?:\\\\/\\\\/[^ ;"\'<>]+' + i + r'[^ ;"\'<>]*)', page) urls += [url.replace('\\\\/', '/') for url in q_urls] # a link href to an image is often an interesting one