mirror of
https://github.com/soimort/you-get.git
synced 2025-02-03 00:33:58 +03:00
[universal] update
This commit is contained in:
parent
ad08f82a1a
commit
b746ac01c9
@ -33,27 +33,35 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg
|
|||||||
|
|
||||||
meta_videos = re.findall(r'<meta property="og:video:url" content="([^"]*)"', page)
|
meta_videos = re.findall(r'<meta property="og:video:url" content="([^"]*)"', page)
|
||||||
if meta_videos:
|
if meta_videos:
|
||||||
for meta_video in meta_videos:
|
try:
|
||||||
meta_video_url = unescape_html(meta_video)
|
for meta_video in meta_videos:
|
||||||
type_, ext, size = url_info(meta_video_url)
|
meta_video_url = unescape_html(meta_video)
|
||||||
print_info(site_info, page_title, type_, size)
|
type_, ext, size = url_info(meta_video_url)
|
||||||
if not info_only:
|
print_info(site_info, page_title, type_, size)
|
||||||
download_urls([meta_video_url], page_title,
|
if not info_only:
|
||||||
ext, size,
|
download_urls([meta_video_url], page_title,
|
||||||
output_dir=output_dir, merge=merge,
|
ext, size,
|
||||||
faker=True)
|
output_dir=output_dir, merge=merge,
|
||||||
return
|
faker=True)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
return
|
||||||
|
|
||||||
hls_urls = re.findall(r'(https?://[^;"\'\\]+' + '\.m3u8?' +
|
hls_urls = re.findall(r'(https?://[^;"\'\\]+' + '\.m3u8?' +
|
||||||
r'[^;"\'\\]*)', page)
|
r'[^;"\'\\]*)', page)
|
||||||
if hls_urls:
|
if hls_urls:
|
||||||
for hls_url in hls_urls:
|
try:
|
||||||
type_, ext, size = url_info(hls_url)
|
for hls_url in hls_urls:
|
||||||
print_info(site_info, page_title, type_, size)
|
type_, ext, size = url_info(hls_url)
|
||||||
if not info_only:
|
print_info(site_info, page_title, type_, size)
|
||||||
download_url_ffmpeg(url=hls_url, title=page_title,
|
if not info_only:
|
||||||
ext='mp4', output_dir=output_dir)
|
download_url_ffmpeg(url=hls_url, title=page_title,
|
||||||
return
|
ext='mp4', output_dir=output_dir)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
return
|
||||||
|
|
||||||
# most common media file extensions on the Internet
|
# most common media file extensions on the Internet
|
||||||
media_exts = ['\.flv', '\.mp3', '\.mp4', '\.webm',
|
media_exts = ['\.flv', '\.mp3', '\.mp4', '\.webm',
|
||||||
@ -67,12 +75,12 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg
|
|||||||
|
|
||||||
urls = []
|
urls = []
|
||||||
for i in media_exts:
|
for i in media_exts:
|
||||||
urls += re.findall(r'(https?://[^ ;&"\'\\]+' + i + r'[^ ;&"\'\\]*)', page)
|
urls += re.findall(r'(https?://[^ ;&"\'\\<>]+' + i + r'[^ ;&"\'\\<>]*)', page)
|
||||||
|
|
||||||
p_urls = re.findall(r'(https?%3A%2F%2F[^;&"]+' + i + r'[^;&"]*)', page)
|
p_urls = re.findall(r'(https?%3A%2F%2F[^;&"]+' + i + r'[^;&"]*)', page)
|
||||||
urls += [parse.unquote(url) for url in p_urls]
|
urls += [parse.unquote(url) for url in p_urls]
|
||||||
|
|
||||||
q_urls = re.findall(r'(https?:\\\\/\\\\/[^ ;"\']+' + i + r'[^ ;"\']*)', page)
|
q_urls = re.findall(r'(https?:\\\\/\\\\/[^ ;"\'<>]+' + i + r'[^ ;"\'<>]*)', page)
|
||||||
urls += [url.replace('\\\\/', '/') for url in q_urls]
|
urls += [url.replace('\\\\/', '/') for url in q_urls]
|
||||||
|
|
||||||
# a link href to an image is often an interesting one
|
# a link href to an image is often an interesting one
|
||||||
|
Loading…
Reference in New Issue
Block a user