[sina]rewrite; support sina.com.cn/zxt

This commit is contained in:
MaxwellGoblin 2017-08-08 13:46:15 +08:00
parent 6f97cda395
commit a020c0fe32

View File

@ -3,45 +3,50 @@
__all__ = ['sina_download', 'sina_download_by_vid', 'sina_download_by_vkey'] __all__ = ['sina_download', 'sina_download_by_vid', 'sina_download_by_vkey']
from ..common import * from ..common import *
from ..util.log import *
from hashlib import md5 from hashlib import md5
from random import randint from random import randint
from time import time from time import time
from xml.dom.minidom import parseString
import urllib.parse
def get_k(vid, rand): def api_req(vid):
t = str(int('{0:b}'.format(int(time()))[:-6], 2))
return md5((vid + 'Z6prk18aWxP278cVAH' + t + rand).encode('utf-8')).hexdigest()[:16] + t
def video_info_xml(vid):
rand = "0.{0}{1}".format(randint(10000, 10000000), randint(10000, 10000000)) rand = "0.{0}{1}".format(randint(10000, 10000000), randint(10000, 10000000))
url = 'http://ask.ivideo.sina.com.cn/v_play.php?vid={0}&ran={1}&p=i&k={2}'.format(vid, rand, get_k(vid, rand)) t = str(int('{0:b}'.format(int(time()))[:-6], 2))
xml = get_content(url, headers=fake_headers, decoded=True) k = md5((vid + 'Z6prk18aWxP278cVAH' + t + rand).encode('utf-8')).hexdigest()[:16] + t
url = 'http://ask.ivideo.sina.com.cn/v_play.php?vid={0}&ran={1}&p=i&k={2}'.format(vid, rand, k)
xml = get_content(url, headers=fake_headers)
return xml return xml
def video_info(xml): def video_info(xml):
urls = re.findall(r'<url>(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?</url>', xml) video = parseString(xml).getElementsByTagName('video')[0]
name = match1(xml, r'<vname>(?:<!\[CDATA\[)?(.+?)(?:\]\]>)?</vname>') result = video.getElementsByTagName('result')[0]
vstr = match1(xml, r'<vstr>(?:<!\[CDATA\[)?(.+?)(?:\]\]>)?</vstr>') if result.firstChild.nodeValue == 'error':
return urls, name, vstr message = video.getElementsByTagName('message')[0]
return None, message.firstChild.nodeValue, None
vname = video.getElementsByTagName('vname')[0].firstChild.nodeValue
durls = video.getElementsByTagName('durl')
urls = []
size = 0
for durl in durls:
url = durl.getElementsByTagName('url')[0].firstChild.nodeValue
seg_size = durl.getElementsByTagName('filesize')[0].firstChild.nodeValue
urls.append(url)
size += int(seg_size)
return urls, vname, size
def sina_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only=False): def sina_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only=False):
"""Downloads a Sina video by its unique vid. """Downloads a Sina video by its unique vid.
http://video.sina.com.cn/ http://video.sina.com.cn/
""" """
xml = api_req(vid)
xml = video_info_xml(vid) urls, name, size = video_info(xml)
sina_download_by_xml(xml, title, output_dir, merge, info_only) if urls is None:
log.wtf(name)
title = name
def sina_download_by_xml(xml, title, output_dir, merge, info_only):
urls, name, vstr = video_info(xml)
title = title or name
assert title
size = 0
for url in urls:
_, _, temp = url_info(url)
size += temp
print_info(site_info, title, 'flv', size) print_info(site_info, title, 'flv', size)
if not info_only: if not info_only:
download_urls(urls, title, 'flv', size, output_dir = output_dir, merge = merge) download_urls(urls, title, 'flv', size, output_dir = output_dir, merge = merge)
@ -58,9 +63,40 @@ def sina_download_by_vkey(vkey, title=None, output_dir='.', merge=True, info_onl
if not info_only: if not info_only:
download_urls([url], title, 'flv', size, output_dir = output_dir, merge = merge) download_urls([url], title, 'flv', size, output_dir = output_dir, merge = merge)
def sina_zxt(url, output_dir='.', merge=True, info_only=False, **kwargs):
ep = 'http://s.video.sina.com.cn/video/play?video_id='
frag = urllib.parse.urlparse(url).fragment
if not frag:
log.wtf('No video specified with fragment')
meta = json.loads(get_content(ep + frag))
if meta['code'] != 1:
# Yes they use 1 for success.
log.wtf(meta['message'])
title = meta['data']['title']
videos = sorted(meta['data']['videos'], key = lambda i: int(i['size']))
if len(videos) == 0:
log.wtf('No video file returned by API server')
vid = videos[-1]['file_id']
container = videos[-1]['type']
size = int(videos[-1]['size'])
if container == 'hlv':
container = 'flv'
urls, _, _ = video_info(api_req(vid))
print_info(site_info, title, container, size)
if not info_only:
download_urls(urls, title, container, size, output_dir=output_dir, merge=merge, **kwargs)
return
def sina_download(url, output_dir='.', merge=True, info_only=False, **kwargs): def sina_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
"""Downloads Sina videos by URL. """Downloads Sina videos by URL.
""" """
if 'news.sina.com.cn/zxt' in url:
sina_zxt(url, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)
return
vid = match1(url, r'vid=(\d+)') vid = match1(url, r'vid=(\d+)')
if vid is None: if vid is None:
@ -73,8 +109,8 @@ def sina_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
if vid is None: if vid is None:
vid = match1(video_page, r'vid:"?(\d+)"?') vid = match1(video_page, r'vid:"?(\d+)"?')
if vid: if vid:
title = match1(video_page, r'title\s*:\s*\'([^\']+)\'') #title = match1(video_page, r'title\s*:\s*\'([^\']+)\'')
sina_download_by_vid(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only) sina_download_by_vid(vid, output_dir=output_dir, merge=merge, info_only=info_only)
else: else:
vkey = match1(video_page, r'vkey\s*:\s*"([^"]+)"') vkey = match1(video_page, r'vkey\s*:\s*"([^"]+)"')
if vkey is None: if vkey is None: