From c34c0af6a4bbc602cc48ab66a657a82da540b2d9 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 12 Apr 2020 01:02:51 +0200 Subject: [PATCH] [universal] support site-relative path --- src/you_get/extractors/universal.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/you_get/extractors/universal.py b/src/you_get/extractors/universal.py index 8b9a24c9..4c3d34e9 100644 --- a/src/you_get/extractors/universal.py +++ b/src/you_get/extractors/universal.py @@ -99,6 +99,14 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg for rel_url in rel_urls: urls += [ r1(r'(.*/)', url) + rel_url ] + # site-relative path + rel_urls = [] + rel_urls += re.findall(r'href="(/[^"]+\.jpe?g)"', page, re.I) + rel_urls += re.findall(r'href="(/[^"]+\.png)"', page, re.I) + rel_urls += re.findall(r'href="(/[^"]+\.gif)"', page, re.I) + for rel_url in rel_urls: + urls += [ r1(r'(https?://[^/]+)', url) + rel_url ] + # MPEG-DASH MPD mpd_urls = re.findall(r'src="(https?://[^"]+\.mpd)"', page) for mpd_url in mpd_urls: