you-get/src/you_get/extractors/qq.py

190 lines
7.7 KiB
Python
Raw Normal View History

2012-12-01 19:14:04 +04:00
#!/usr/bin/env python
__all__ = ['qq_download']
from ..common import *
2017-06-16 20:46:31 +03:00
from ..util.log import *
2016-05-20 11:28:30 +03:00
from .qie import download as qieDownload
2017-08-07 22:38:34 +03:00
from .qie_video import download_by_url as qie_video_download
2016-05-28 12:32:07 +03:00
from urllib.parse import urlparse,parse_qs
2016-08-04 04:39:10 +03:00
def qq_download_by_vid(vid, title, default_from, output_dir='.', merge=True, info_only=False):
if default_from:
platform = 11
else:
platform = 4100201
info_api = 'http://vv.video.qq.com/getinfo?otype=json&appver=3.2.19.333&platform={}&defnpayver=1&vid={}'.format(platform, vid)
2017-06-16 20:46:31 +03:00
info = get_content(info_api)
video_json = json.loads(match1(info, r'QZOutputJson=(.*)')[:-1])
2017-06-16 20:46:31 +03:00
fn_pre = video_json['vl']['vi'][0]['lnk']
title = video_json['vl']['vi'][0]['ti']
host = video_json['vl']['vi'][0]['ul']['ui'][0]['url']
streams = video_json['fl']['fi']
seg_cnt = fc_cnt = video_json['vl']['vi'][0]['cl']['fc']
filename = video_json['vl']['vi'][0]['fn']
if seg_cnt == 0:
seg_cnt = 1
else:
fn_pre, magic_str, video_type = filename.split('.')
2017-06-16 20:46:31 +03:00
best_quality = streams[-1]['name']
#part_format_id = streams[-1]['id']
2017-06-16 20:46:31 +03:00
part_urls= []
total_size = 0
for part in range(1, seg_cnt+1):
2017-11-06 10:42:40 +03:00
#if seg_cnt == 1 and video_json['vl']['vi'][0]['vh'] <= 480:
# filename = fn_pre + '.mp4'
#else:
# filename = fn_pre + '.p' + str(part_format_id % 10000) + '.' + str(part) + '.mp4'
#filename = fn_pre + '.p' + str(part_format_id % 10000) + '.' + str(part) + '.mp4'
# fix some error cases("check vid&filename failed" and "format invalid")
# https://v.qq.com/x/page/q06058th9ll.html
# https://v.qq.com/x/page/t060789a21e.html
if fc_cnt == 0:
# fix jason error
# https://v.qq.com/x/page/w0674l9yrrh.html
part_format_id = video_json['vl']['vi'][0]['cl']['keyid'].split('.')[-1]
else:
part_format_id = video_json['vl']['vi'][0]['cl']['ci'][part - 1]['keyid'].split('.')[1]
filename = '.'.join([fn_pre, magic_str, str(part), video_type])
2017-06-16 20:46:31 +03:00
key_api = "http://vv.video.qq.com/getkey?otype=json&platform=11&format={}&vid={}&filename={}&appver=3.2.19.333".format(part_format_id, vid, filename)
part_info = get_content(key_api)
key_json = json.loads(match1(part_info, r'QZOutputJson=(.*)')[:-1])
2017-07-12 15:14:40 +03:00
if key_json.get('key') is None:
vkey = video_json['vl']['vi'][0]['fvkey']
url = '{}{}?vkey={}'.format(video_json['vl']['vi'][0]['ul']['ui'][0]['url'], fn_pre + '.mp4', vkey)
else:
vkey = key_json['key']
url = '{}{}?vkey={}'.format(host, filename, vkey)
if not vkey:
if part == 1:
log.wtf(key_json['msg'])
else:
log.w(key_json['msg'])
2017-07-12 15:14:40 +03:00
break
2018-03-23 00:44:33 +03:00
if key_json.get('filename') is None:
log.w(key_json['msg'])
break
2017-06-16 20:46:31 +03:00
part_urls.append(url)
_, ext, size = url_info(url)
total_size += size
print_info(site_info, title, ext, total_size)
if not info_only:
download_urls(part_urls, title, ext, total_size, output_dir=output_dir, merge=merge)
2012-12-01 19:14:04 +04:00
2017-03-11 10:35:14 +03:00
def kg_qq_download_by_shareid(shareid, output_dir='.', info_only=False, caption=False):
BASE_URL = 'http://cgi.kg.qq.com/fcgi-bin/kg_ugc_getdetail'
params_str = '?dataType=jsonp&jsonp=callback&jsonpCallback=jsopgetsonginfo&v=4&outCharset=utf-8&shareid=' + shareid
url = BASE_URL + params_str
content = get_content(url)
json_str = content[len('jsonpcallback('):-1]
json_data = json.loads(json_str)
playurl = json_data['data']['playurl']
videourl = json_data['data']['playurl_video']
real_url = playurl if playurl else videourl
real_url = real_url.replace('\/', '/')
ksong_mid = json_data['data']['ksong_mid']
2017-10-13 16:58:29 +03:00
lyric_url = 'http://cgi.kg.qq.com/fcgi-bin/fcg_lyric?jsonpCallback=jsopgetlrcdata&outCharset=utf-8&ksongmid=' + ksong_mid
2017-03-11 10:35:14 +03:00
lyric_data = get_content(lyric_url)
lyric_string = lyric_data[len('jsopgetlrcdata('):-1]
lyric_json = json.loads(lyric_string)
lyric = lyric_json['data']['lyric']
title = match1(lyric, r'\[ti:([^\]]*)\]')
type, ext, size = url_info(real_url)
if not title:
title = shareid
print_info('腾讯全民K歌', title, type, size)
if not info_only:
download_urls([real_url], title, ext, size, output_dir, merge=False)
if caption:
caption_filename = title + '.lrc'
caption_path = output_dir + '/' + caption_filename
with open(caption_path, 'w') as f:
lrc_list = lyric.split('\r\n')
for line in lrc_list:
f.write(line)
f.write('\n')
2016-08-04 04:39:10 +03:00
def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
2016-08-04 04:39:10 +03:00
""""""
default_from = True
2017-08-14 11:05:24 +03:00
if re.match(r'https?://egame.qq.com/live\?anchorid=(\d+)', url):
from . import qq_egame
qq_egame.qq_egame_download(url, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)
return
2017-03-11 10:35:14 +03:00
if 'kg.qq.com' in url or 'kg2.qq.com' in url:
shareid = url.split('?s=')[-1]
caption = kwargs['caption']
kg_qq_download_by_shareid(shareid, output_dir=output_dir, info_only=info_only, caption=caption)
return
2016-05-28 12:32:07 +03:00
if 'live.qq.com' in url:
2017-08-07 22:38:34 +03:00
if 'live.qq.com/video/v' in url:
qie_video_download(url, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)
else:
qieDownload(url, output_dir=output_dir, merge=merge, info_only=info_only)
return
2018-01-04 18:34:20 +03:00
if 'mp.weixin.qq.com/s' in url:
2017-06-16 20:46:31 +03:00
content = get_content(url)
2017-07-12 16:11:22 +03:00
vids = matchall(content, [r'\?vid=(\w+)'])
for vid in vids:
qq_download_by_vid(vid, vid, default_from, output_dir, merge, info_only)
2016-08-21 19:54:58 +03:00
return
2016-05-28 12:32:07 +03:00
2016-08-04 04:39:10 +03:00
if 'kuaibao.qq.com' in url or re.match(r'http://daxue.qq.com/content/content/id/\d+', url):
2017-06-16 20:46:31 +03:00
content = get_content(url)
2016-04-19 13:33:38 +03:00
vid = match1(content, r'vid\s*=\s*"\s*([^"]+)"')
title = match1(content, r'title">([^"]+)</p>')
title = title.strip() if title else vid
2016-04-19 12:38:27 +03:00
elif 'iframe/player.html' in url:
vid = match1(url, r'\bvid=(\w+)')
# for embedded URLs; don't know what the title is
title = vid
else:
2017-06-16 20:46:31 +03:00
content = get_content(url)
2017-11-09 06:27:38 +03:00
#vid = parse_qs(urlparse(url).query).get('vid') #for links specified vid like http://v.qq.com/cover/p/ps6mnfqyrfo7es3.html?vid=q0181hpdvo5
rurl = match1(content, r'<link.*?rel\s*=\s*"canonical".*?href\s*="(.+?)".*?>') #https://v.qq.com/x/cover/9hpjiv5fhiyn86u/t0522x58xma.html
vid = ""
if rurl:
vid = rurl.split('/')[-1].split('.')[0]
# https://v.qq.com/x/page/d0552xbadkl.html https://y.qq.com/n/yqq/mv/v/g00268vlkzy.html
if vid == "undefined" or vid == "index":
vid = ""
vid = vid if vid else url.split('/')[-1].split('.')[0] #https://v.qq.com/x/cover/ps6mnfqyrfo7es3/q0181hpdvo5.html?
2017-11-09 06:27:38 +03:00
vid = vid if vid else match1(content, r'vid"*\s*:\s*"\s*([^"]+)"') #general fallback
if not vid:
vid = match1(content, r'id"*\s*:\s*"(.+?)"')
2016-05-28 12:32:07 +03:00
title = match1(content,r'<a.*?id\s*=\s*"%s".*?title\s*=\s*"(.+?)".*?>'%vid)
title = match1(content, r'title">([^"]+)</p>') if not title else title
2016-08-21 19:54:58 +03:00
title = match1(content, r'"title":"([^"]+)"') if not title else title
2016-05-28 12:32:07 +03:00
title = vid if not title else title #general fallback
if 'v.sports.qq.com' in url:
# fix url forbidden
# http://v.sports.qq.com/#/cover/t0fqsm1y83r8v5j/a0026nvw5jr
default_from = False
qq_download_by_vid(vid, title, default_from, output_dir, merge, info_only)
2012-12-01 19:14:04 +04:00
2013-01-11 07:43:30 +04:00
site_info = "QQ.com"
2012-12-01 19:14:04 +04:00
download = qq_download
download_playlist = playlist_not_supported('qq')