From a020c0fe32be6940a6602b0d631b798af294c62c Mon Sep 17 00:00:00 2001 From: MaxwellGoblin Date: Tue, 8 Aug 2017 13:46:15 +0800 Subject: [PATCH] [sina]rewrite; support sina.com.cn/zxt --- src/you_get/extractors/sina.py | 90 ++++++++++++++++++++++++---------- 1 file changed, 63 insertions(+), 27 deletions(-) diff --git a/src/you_get/extractors/sina.py b/src/you_get/extractors/sina.py index 121c5e13..bb94d8e3 100644 --- a/src/you_get/extractors/sina.py +++ b/src/you_get/extractors/sina.py @@ -3,45 +3,50 @@ __all__ = ['sina_download', 'sina_download_by_vid', 'sina_download_by_vkey'] from ..common import * +from ..util.log import * from hashlib import md5 from random import randint from time import time +from xml.dom.minidom import parseString +import urllib.parse -def get_k(vid, rand): - t = str(int('{0:b}'.format(int(time()))[:-6], 2)) - return md5((vid + 'Z6prk18aWxP278cVAH' + t + rand).encode('utf-8')).hexdigest()[:16] + t - -def video_info_xml(vid): +def api_req(vid): rand = "0.{0}{1}".format(randint(10000, 10000000), randint(10000, 10000000)) - url = 'http://ask.ivideo.sina.com.cn/v_play.php?vid={0}&ran={1}&p=i&k={2}'.format(vid, rand, get_k(vid, rand)) - xml = get_content(url, headers=fake_headers, decoded=True) + t = str(int('{0:b}'.format(int(time()))[:-6], 2)) + k = md5((vid + 'Z6prk18aWxP278cVAH' + t + rand).encode('utf-8')).hexdigest()[:16] + t + url = 'http://ask.ivideo.sina.com.cn/v_play.php?vid={0}&ran={1}&p=i&k={2}'.format(vid, rand, k) + xml = get_content(url, headers=fake_headers) return xml def video_info(xml): - urls = re.findall(r'(?:)?', xml) - name = match1(xml, r'(?:)?') - vstr = match1(xml, r'(?:)?') - return urls, name, vstr + video = parseString(xml).getElementsByTagName('video')[0] + result = video.getElementsByTagName('result')[0] + if result.firstChild.nodeValue == 'error': + message = video.getElementsByTagName('message')[0] + return None, message.firstChild.nodeValue, None + vname = video.getElementsByTagName('vname')[0].firstChild.nodeValue + durls = video.getElementsByTagName('durl') + + urls = [] + size = 0 + for durl in durls: + url = durl.getElementsByTagName('url')[0].firstChild.nodeValue + seg_size = durl.getElementsByTagName('filesize')[0].firstChild.nodeValue + urls.append(url) + size += int(seg_size) + + return urls, vname, size def sina_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only=False): """Downloads a Sina video by its unique vid. http://video.sina.com.cn/ """ - - xml = video_info_xml(vid) - sina_download_by_xml(xml, title, output_dir, merge, info_only) - - -def sina_download_by_xml(xml, title, output_dir, merge, info_only): - urls, name, vstr = video_info(xml) - title = title or name - assert title - size = 0 - for url in urls: - _, _, temp = url_info(url) - size += temp - + xml = api_req(vid) + urls, name, size = video_info(xml) + if urls is None: + log.wtf(name) + title = name print_info(site_info, title, 'flv', size) if not info_only: download_urls(urls, title, 'flv', size, output_dir = output_dir, merge = merge) @@ -58,9 +63,40 @@ def sina_download_by_vkey(vkey, title=None, output_dir='.', merge=True, info_onl if not info_only: download_urls([url], title, 'flv', size, output_dir = output_dir, merge = merge) +def sina_zxt(url, output_dir='.', merge=True, info_only=False, **kwargs): + ep = 'http://s.video.sina.com.cn/video/play?video_id=' + frag = urllib.parse.urlparse(url).fragment + if not frag: + log.wtf('No video specified with fragment') + meta = json.loads(get_content(ep + frag)) + if meta['code'] != 1: +# Yes they use 1 for success. + log.wtf(meta['message']) + title = meta['data']['title'] + videos = sorted(meta['data']['videos'], key = lambda i: int(i['size'])) + + if len(videos) == 0: + log.wtf('No video file returned by API server') + + vid = videos[-1]['file_id'] + container = videos[-1]['type'] + size = int(videos[-1]['size']) + + if container == 'hlv': + container = 'flv' + + urls, _, _ = video_info(api_req(vid)) + print_info(site_info, title, container, size) + if not info_only: + download_urls(urls, title, container, size, output_dir=output_dir, merge=merge, **kwargs) + return + def sina_download(url, output_dir='.', merge=True, info_only=False, **kwargs): """Downloads Sina videos by URL. """ + if 'news.sina.com.cn/zxt' in url: + sina_zxt(url, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs) + return vid = match1(url, r'vid=(\d+)') if vid is None: @@ -73,8 +109,8 @@ def sina_download(url, output_dir='.', merge=True, info_only=False, **kwargs): if vid is None: vid = match1(video_page, r'vid:"?(\d+)"?') if vid: - title = match1(video_page, r'title\s*:\s*\'([^\']+)\'') - sina_download_by_vid(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only) + #title = match1(video_page, r'title\s*:\s*\'([^\']+)\'') + sina_download_by_vid(vid, output_dir=output_dir, merge=merge, info_only=info_only) else: vkey = match1(video_page, r'vkey\s*:\s*"([^"]+)"') if vkey is None: