you-get/src/you_get/extractors/sina.py

#!/usr/bin/env python

__all__ = ['sina_download', 'sina_download_by_vid', 'sina_download_by_vkey']

from ..common import *
from ..util.log import *

from hashlib import md5
from random import randint
from time import time
from xml.dom.minidom import parseString
import urllib.parse

def api_req(vid):
    rand = "0.{0}{1}".format(randint(10000, 10000000), randint(10000, 10000000))
    t = str(int('{0:b}'.format(int(time()))[:-6], 2))
    k = md5((vid + 'Z6prk18aWxP278cVAH' + t + rand).encode('utf-8')).hexdigest()[:16] + t
    url = 'http://ask.ivideo.sina.com.cn/v_play.php?vid={0}&ran={1}&p=i&k={2}'.format(vid, rand, k)
    xml = get_content(url, headers=fake_headers)
    return xml

def video_info(xml):
    video = parseString(xml).getElementsByTagName('video')[0]
    result = video.getElementsByTagName('result')[0]
    if result.firstChild.nodeValue == 'error':
        message = video.getElementsByTagName('message')[0]
        return None, message.firstChild.nodeValue, None
    vname = video.getElementsByTagName('vname')[0].firstChild.nodeValue
    durls = video.getElementsByTagName('durl')

    urls = []
    size = 0
    for durl in durls:
        url = durl.getElementsByTagName('url')[0].firstChild.nodeValue
        seg_size = durl.getElementsByTagName('filesize')[0].firstChild.nodeValue
        urls.append(url)
        size += int(seg_size)

    return urls, vname, size

def sina_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only=False):
    """Downloads a Sina video by its unique vid.
    http://video.sina.com.cn/
    """
    xml = api_req(vid)
    urls, name, size = video_info(xml)
    if urls is None:
        log.wtf(name)
    title = name
    print_info(site_info, title, 'flv', size)
    if not info_only:
        download_urls(urls, title, 'flv', size, output_dir = output_dir, merge = merge)

def sina_download_by_vkey(vkey, title=None, output_dir='.', merge=True, info_only=False):
    """Downloads a Sina video by its unique vkey.
    http://video.sina.com/
    """

    url = 'http://video.sina.com/v/flvideo/%s_0.flv' % vkey
    type, ext, size = url_info(url)

    print_info(site_info, title, 'flv', size)
    if not info_only:
        download_urls([url], title, 'flv', size, output_dir = output_dir, merge = merge)

def sina_zxt(url, output_dir='.', merge=True, info_only=False, **kwargs):
    ep = 'http://s.video.sina.com.cn/video/play?video_id='
    frag = urllib.parse.urlparse(url).fragment
    if not frag:
        log.wtf('No video specified with fragment')
    meta = json.loads(get_content(ep + frag))
    if meta['code'] != 1:
# Yes they use 1 for success.
        log.wtf(meta['message'])
    title = meta['data']['title']
    videos = sorted(meta['data']['videos'], key = lambda i: int(i['size']))

    if len(videos) == 0:
        log.wtf('No video file returned by API server')

    vid = videos[-1]['file_id']
    container = videos[-1]['type']
    size = int(videos[-1]['size'])

    if container == 'hlv':
        container = 'flv'

    urls, _, _ = video_info(api_req(vid))
    print_info(site_info, title, container, size)
    if not info_only:
        download_urls(urls, title, container, size, output_dir=output_dir, merge=merge, **kwargs)
    return

def sina_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
    """Downloads Sina videos by URL.
    """
    if 'news.sina.com.cn/zxt' in url:
        sina_zxt(url, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)
        return

    vid = match1(url, r'vid=(\d+)')
    if vid is None:
        video_page = get_content(url)
        vid = hd_vid = match1(video_page, r'hd_vid\s*:\s*\'([^\']+)\'')
        if hd_vid == '0':
            vids = match1(video_page, r'[^\w]vid\s*:\s*\'([^\']+)\'').split('|')
            vid = vids[-1]

    if vid is None:
        vid = match1(video_page, r'vid:"?(\d+)"?')
    if vid:
        #title = match1(video_page, r'title\s*:\s*\'([^\']+)\'')
        sina_download_by_vid(vid, output_dir=output_dir, merge=merge, info_only=info_only)
    else:
        vkey = match1(video_page, r'vkey\s*:\s*"([^"]+)"')
        if vkey is None:
            vid = match1(url, r'#(\d+)')
            sina_download_by_vid(vid, output_dir=output_dir, merge=merge, info_only=info_only)
            return
        title = match1(video_page, r'title\s*:\s*"([^"]+)"')
        sina_download_by_vkey(vkey, title=title, output_dir=output_dir, merge=merge, info_only=info_only)

site_info = "Sina.com"
download = sina_download
download_playlist = playlist_not_supported('sina')
add support for Sina 2012-09-01 23:03:05 +04:00			`#!/usr/bin/env python`

Sina: fix #207 for video.sina.com 2013-07-14 19:34:42 +04:00			`__all__ = ['sina_download', 'sina_download_by_vid', 'sina_download_by_vkey']`
add support for Sina 2012-09-01 23:03:05 +04:00
			`from ..common import *`
[sina]rewrite; support sina.com.cn/zxt 2017-08-08 08:46:15 +03:00			`from ..util.log import *`
add support for Sina 2012-09-01 23:03:05 +04:00
Sina: key algorithm #298 2014-02-18 05:04:15 +04:00			`from hashlib import md5`
			`from random import randint`
			`from time import time`
[sina]rewrite; support sina.com.cn/zxt 2017-08-08 08:46:15 +03:00			`from xml.dom.minidom import parseString`
			`import urllib.parse`
Sina: key algorithm #298 2014-02-18 05:04:15 +04:00
[sina]rewrite; support sina.com.cn/zxt 2017-08-08 08:46:15 +03:00			`def api_req(vid):`
Sina: key algorithm #298 2014-02-18 05:04:15 +04:00			`rand = "0.{0}{1}".format(randint(10000, 10000000), randint(10000, 10000000))`
[sina]rewrite; support sina.com.cn/zxt 2017-08-08 08:46:15 +03:00			`t = str(int('{0:b}'.format(int(time()))[:-6], 2))`
			`k = md5((vid + 'Z6prk18aWxP278cVAH' + t + rand).encode('utf-8')).hexdigest()[:16] + t`
			`url = 'http://ask.ivideo.sina.com.cn/v_play.php?vid={0}&ran={1}&p=i&k={2}'.format(vid, rand, k)`
			`xml = get_content(url, headers=fake_headers)`
Fixed miomio download of sina video 2014-06-27 19:46:18 +04:00			`return xml`
Sina: key algorithm #298 2014-02-18 05:04:15 +04:00
Fixed miomio download of sina video 2014-06-27 19:46:18 +04:00			`def video_info(xml):`
[sina]rewrite; support sina.com.cn/zxt 2017-08-08 08:46:15 +03:00			`video = parseString(xml).getElementsByTagName('video')[0]`
			`result = video.getElementsByTagName('result')[0]`
			`if result.firstChild.nodeValue == 'error':`
			`message = video.getElementsByTagName('message')[0]`
			`return None, message.firstChild.nodeValue, None`
			`vname = video.getElementsByTagName('vname')[0].firstChild.nodeValue`
			`durls = video.getElementsByTagName('durl')`

			`urls = []`
			`size = 0`
			`for durl in durls:`
			`url = durl.getElementsByTagName('url')[0].firstChild.nodeValue`
			`seg_size = durl.getElementsByTagName('filesize')[0].firstChild.nodeValue`
			`urls.append(url)`
			`size += int(seg_size)`

			`return urls, vname, size`
add support for Sina 2012-09-01 23:03:05 +04:00
Sina: fix #207 for video.sina.com 2013-07-14 19:34:42 +04:00			`def sina_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only=False):`
			`"""Downloads a Sina video by its unique vid.`
			`http://video.sina.com.cn/`
			`"""`
[sina]rewrite; support sina.com.cn/zxt 2017-08-08 08:46:15 +03:00			`xml = api_req(vid)`
			`urls, name, size = video_info(xml)`
			`if urls is None:`
			`log.wtf(name)`
			`title = name`
add support for Sina 2012-09-01 23:03:05 +04:00			`print_info(site_info, title, 'flv', size)`
			`if not info_only:`
			`download_urls(urls, title, 'flv', size, output_dir = output_dir, merge = merge)`

Sina: fix #207 for video.sina.com 2013-07-14 19:34:42 +04:00			`def sina_download_by_vkey(vkey, title=None, output_dir='.', merge=True, info_only=False):`
			`"""Downloads a Sina video by its unique vkey.`
			`http://video.sina.com/`
			`"""`
Sina: key algorithm #298 2014-02-18 05:04:15 +04:00
Sina: fix #207 for video.sina.com 2013-07-14 19:34:42 +04:00			`url = 'http://video.sina.com/v/flvideo/%s_0.flv' % vkey`
			`type, ext, size = url_info(url)`
Sina: key algorithm #298 2014-02-18 05:04:15 +04:00
Sina: fix #207 for video.sina.com 2013-07-14 19:34:42 +04:00			`print_info(site_info, title, 'flv', size)`
			`if not info_only:`
			`download_urls([url], title, 'flv', size, output_dir = output_dir, merge = merge)`

[sina]rewrite; support sina.com.cn/zxt 2017-08-08 08:46:15 +03:00			`def sina_zxt(url, output_dir='.', merge=True, info_only=False, **kwargs):`
			`ep = 'http://s.video.sina.com.cn/video/play?video_id='`
			`frag = urllib.parse.urlparse(url).fragment`
			`if not frag:`
			`log.wtf('No video specified with fragment')`
			`meta = json.loads(get_content(ep + frag))`
			`if meta['code'] != 1:`
			`# Yes they use 1 for success.`
			`log.wtf(meta['message'])`
			`title = meta['data']['title']`
			`videos = sorted(meta['data']['videos'], key = lambda i: int(i['size']))`

			`if len(videos) == 0:`
			`log.wtf('No video file returned by API server')`

			`vid = videos[-1]['file_id']`
			`container = videos[-1]['type']`
			`size = int(videos[-1]['size'])`

			`if container == 'hlv':`
			`container = 'flv'`

			`urls, _, _ = video_info(api_req(vid))`
			`print_info(site_info, title, container, size)`
			`if not info_only:`
			`download_urls(urls, title, container, size, output_dir=output_dir, merge=merge, **kwargs)`
			`return`

fix extractors not use VideoExtractor after add --json option 2015-09-26 08:45:39 +03:00			`def sina_download(url, output_dir='.', merge=True, info_only=False, **kwargs):`
Sina: fix #207 for video.sina.com 2013-07-14 19:34:42 +04:00			`"""Downloads Sina videos by URL.`
			`"""`
[sina]rewrite; support sina.com.cn/zxt 2017-08-08 08:46:15 +03:00			`if 'news.sina.com.cn/zxt' in url:`
			`sina_zxt(url, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)`
			`return`
Sina: key algorithm #298 2014-02-18 05:04:15 +04:00
Sina: fix #207 for video.sina.com 2013-07-14 19:34:42 +04:00			`vid = match1(url, r'vid=(\d+)')`
			`if vid is None:`
			`video_page = get_content(url)`
			`vid = hd_vid = match1(video_page, r'hd_vid\s:\s\'([^\']+)\'')`
			`if hd_vid == '0':`
			`vids = match1(video_page, r'[^\w]vid\s:\s\'([^\']+)\'').split('\|')`
			`vid = vids[-1]`
Sina: key algorithm #298 2014-02-18 05:04:15 +04:00
[sina] match vid from the page fix for http://video.sina.com.cn/view/249851143.html 2015-10-07 15:25:13 +03:00			`if vid is None:`
[sina] fix #1361, close #1332 2016-08-23 00:58:48 +03:00			`vid = match1(video_page, r'vid:"?(\d+)"?')`
Sina: fix #207 for video.sina.com 2013-07-14 19:34:42 +04:00			`if vid:`
[sina]rewrite; support sina.com.cn/zxt 2017-08-08 08:46:15 +03:00			`#title = match1(video_page, r'title\s:\s\'([^\']+)\'')`
			`sina_download_by_vid(vid, output_dir=output_dir, merge=merge, info_only=info_only)`
Sina: fix #207 for video.sina.com 2013-07-14 19:34:42 +04:00			`else:`
			`vkey = match1(video_page, r'vkey\s:\s"([^"]+)"')`
[sina]try to find vid from url 2017-05-17 22:50:07 +03:00			`if vkey is None:`
			`vid = match1(url, r'#(\d+)')`
			`sina_download_by_vid(vid, output_dir=output_dir, merge=merge, info_only=info_only)`
			`return`
Sina: fix #207 for video.sina.com 2013-07-14 19:34:42 +04:00			`title = match1(video_page, r'title\s:\s"([^"]+)"')`
			`sina_download_by_vkey(vkey, title=title, output_dir=output_dir, merge=merge, info_only=info_only)`
add support for Sina 2012-09-01 23:03:05 +04:00
			`site_info = "Sina.com"`
			`download = sina_download`
			`download_playlist = playlist_not_supported('sina')`