mirror of
https://github.com/soimort/you-get.git
synced 2025-01-23 13:35:16 +03:00
[QQ] more robust url extractor
This commit is contained in:
parent
71ec9cad22
commit
fd2a290ffe
@ -4,7 +4,7 @@ __all__ = ['qq_download']
|
|||||||
|
|
||||||
from ..common import *
|
from ..common import *
|
||||||
from .qie import download as qieDownload
|
from .qie import download as qieDownload
|
||||||
|
from urllib.parse import urlparse,parse_qs
|
||||||
def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):
|
def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):
|
||||||
api = "http://h5vv.video.qq.com/getinfo?otype=json&platform=10901&vid=%s" % vid
|
api = "http://h5vv.video.qq.com/getinfo?otype=json&platform=10901&vid=%s" % vid
|
||||||
content = get_html(api)
|
content = get_html(api)
|
||||||
@ -24,31 +24,35 @@ def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):
|
|||||||
download_urls([url], title, ext, size, output_dir=output_dir, merge=merge)
|
download_urls([url], title, ext, size, output_dir=output_dir, merge=merge)
|
||||||
|
|
||||||
def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||||
|
if 'live.qq.com' in url:
|
||||||
|
qieDownload(url,output_dir=output_dir, merge=merge, info_only=info_only)
|
||||||
|
return
|
||||||
|
|
||||||
|
#do redirect
|
||||||
if 'v.qq.com/page' in url:
|
if 'v.qq.com/page' in url:
|
||||||
# for URLs like this:
|
# for URLs like this:
|
||||||
# http://v.qq.com/page/k/9/7/k0194pwgw97.html
|
# http://v.qq.com/page/k/9/7/k0194pwgw97.html
|
||||||
# it will redirect.
|
content = get_html(url)
|
||||||
vid = match1(url, r'\b(\w+).html')
|
url = match1(content,r'window\.location\.href="(.*?)"')
|
||||||
title = vid
|
|
||||||
elif 'kuaibao.qq.com' in url:
|
if 'kuaibao.qq.com' in url:
|
||||||
content = get_html(url)
|
content = get_html(url)
|
||||||
vid = match1(content, r'vid\s*=\s*"\s*([^"]+)"')
|
vid = match1(content, r'vid\s*=\s*"\s*([^"]+)"')
|
||||||
title = match1(content, r'title">([^"]+)</p>')
|
title = match1(content, r'title">([^"]+)</p>')
|
||||||
title = title.strip() if title else vid
|
title = title.strip() if title else vid
|
||||||
elif 'live.qq.com' in url:
|
|
||||||
qieDownload(url,output_dir=output_dir, merge=merge, info_only=info_only)
|
|
||||||
exit()
|
|
||||||
elif 'iframe/player.html' in url:
|
elif 'iframe/player.html' in url:
|
||||||
vid = match1(url, r'\bvid=(\w+)')
|
vid = match1(url, r'\bvid=(\w+)')
|
||||||
# for embedded URLs; don't know what the title is
|
# for embedded URLs; don't know what the title is
|
||||||
title = vid
|
title = vid
|
||||||
else:
|
else:
|
||||||
content = get_html(url)
|
content = get_html(url)
|
||||||
vid = match1(content, r'vid\s*:\s*"\s*([^"]+)"')
|
vid = parse_qs(urlparse(url).query).get('vid') #for links specified vid like http://v.qq.com/cover/p/ps6mnfqyrfo7es3.html?vid=q0181hpdvo5
|
||||||
title = match1(content, r'title\s*:\s*"\s*([^"]+)"')
|
vid = vid[0] if vid else match1(content, r'vid\s*:\s*"\s*([^"]+)"') #general fallback
|
||||||
# try to get the right title for URLs like this:
|
title = match1(content,r'<a.*?id\s*=\s*"%s".*?title\s*=\s*"(.+?)".*?>'%vid)
|
||||||
# http://v.qq.com/cover/p/ps6mnfqyrfo7es3.html?vid=q0181hpdvo5
|
title = match1(content, r'title">([^"]+)</p>') if not title else title
|
||||||
title = matchall(content, [r'title\s*:\s*"\s*([^"]+)"'])[-1]
|
title = vid if not title else title #general fallback
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
qq_download_by_vid(vid, title, output_dir, merge, info_only)
|
qq_download_by_vid(vid, title, output_dir, merge, info_only)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user