Merge branch 'qq-robust-url' of https://github.com/jackyzy823/you-get into jackyzy823-qq-robust-url

This commit is contained in:
Mort Yao 2016-05-28 18:46:53 +02:00
commit cb39f4b9f3
No known key found for this signature in database
GPG Key ID: 07DA00CB78203251

View File

@ -4,7 +4,7 @@ __all__ = ['qq_download']
from ..common import * from ..common import *
from .qie import download as qieDownload from .qie import download as qieDownload
from urllib.parse import urlparse,parse_qs
def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False): def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):
api = "http://h5vv.video.qq.com/getinfo?otype=json&platform=10901&vid=%s" % vid api = "http://h5vv.video.qq.com/getinfo?otype=json&platform=10901&vid=%s" % vid
content = get_html(api) content = get_html(api)
@ -24,31 +24,35 @@ def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):
download_urls([url], title, ext, size, output_dir=output_dir, merge=merge) download_urls([url], title, ext, size, output_dir=output_dir, merge=merge)
def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs): def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
if 'live.qq.com' in url:
qieDownload(url,output_dir=output_dir, merge=merge, info_only=info_only)
return
#do redirect
if 'v.qq.com/page' in url: if 'v.qq.com/page' in url:
# for URLs like this: # for URLs like this:
# http://v.qq.com/page/k/9/7/k0194pwgw97.html # http://v.qq.com/page/k/9/7/k0194pwgw97.html
# it will redirect. content = get_html(url)
vid = match1(url, r'\b(\w+).html') url = match1(content,r'window\.location\.href="(.*?)"')
title = vid
elif 'kuaibao.qq.com' in url: if 'kuaibao.qq.com' in url:
content = get_html(url) content = get_html(url)
vid = match1(content, r'vid\s*=\s*"\s*([^"]+)"') vid = match1(content, r'vid\s*=\s*"\s*([^"]+)"')
title = match1(content, r'title">([^"]+)</p>') title = match1(content, r'title">([^"]+)</p>')
title = title.strip() if title else vid title = title.strip() if title else vid
elif 'live.qq.com' in url:
qieDownload(url,output_dir=output_dir, merge=merge, info_only=info_only)
exit()
elif 'iframe/player.html' in url: elif 'iframe/player.html' in url:
vid = match1(url, r'\bvid=(\w+)') vid = match1(url, r'\bvid=(\w+)')
# for embedded URLs; don't know what the title is # for embedded URLs; don't know what the title is
title = vid title = vid
else: else:
content = get_html(url) content = get_html(url)
vid = match1(content, r'vid\s*:\s*"\s*([^"]+)"') vid = parse_qs(urlparse(url).query).get('vid') #for links specified vid like http://v.qq.com/cover/p/ps6mnfqyrfo7es3.html?vid=q0181hpdvo5
title = match1(content, r'title\s*:\s*"\s*([^"]+)"') vid = vid[0] if vid else match1(content, r'vid\s*:\s*"\s*([^"]+)"') #general fallback
# try to get the right title for URLs like this: title = match1(content,r'<a.*?id\s*=\s*"%s".*?title\s*=\s*"(.+?)".*?>'%vid)
# http://v.qq.com/cover/p/ps6mnfqyrfo7es3.html?vid=q0181hpdvo5 title = match1(content, r'title">([^"]+)</p>') if not title else title
title = matchall(content, [r'title\s*:\s*"\s*([^"]+)"'])[-1] title = vid if not title else title #general fallback
qq_download_by_vid(vid, title, output_dir, merge, info_only) qq_download_by_vid(vid, title, output_dir, merge, info_only)