you-get/src/you_get/extractors/sohu.py

#!/usr/bin/env python

__all__ = ['sohu_download']

from ..common import *

import json
import time
from random import random
from urllib.parse import urlparse
#http://115.25.217.132/?prot=9&prod=flash&pt=1&
#file=/v/Sample1/BackUp_Sample1/svc/20150604/1663504_2406534_v_H_231452_18500/1663504_2406534_v_H_231452_18500_001.mp4
#&new=/248/222/JwoalHHmSNWLsCVDEPqgTD.mp4
#&key=3q6dEeDbCZwpf-kydU-7TH0YDP5UxFdU&vid=2406534&tvid=1663504&uid=13796019242829873083&sz=1583_434&md=WG4FExsQg2SW3C8BylUDISibt+AaBtYlyoHEkA==179&t=0.928698823787272

def real_url(host,vid,tvid,new,clipURL,ck):
    url = 'http://'+host+'/?prot=9&prod=flash&pt=1&file='+clipURL+'&new='+new +'&key='+ ck+'&vid='+str(vid)+'&uid='+str(int(time.time()*1000))+'&t='+str(random())
    return json.loads(get_html(url))['url']

def sohu_download(url, output_dir = '.', merge = True, info_only = False, extractor_proxy=None):
    if re.match(r'http://share.vrs.sohu.com', url):
        vid = r1('id=(\d+)', url)
    else:
        html = get_html(url)
        vid = r1(r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?', html)
    assert vid

    if re.match(r'http://tv.sohu.com/', url):
        if extractor_proxy:
            set_proxy(tuple(extractor_proxy.split(":")))
        info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid))
        for qtyp in ["oriVid","superVid","highVid" ,"norVid","relativeId"]:
            hqvid = info['data'][qtyp]
            if hqvid != 0 and hqvid != vid :
                info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % hqvid))
                break
        if extractor_proxy:
            unset_proxy()
        host = info['allot']
        prot = info['prot']
        tvid = info['tvid']
        urls = []
        data = info['data']
        title = data['tvName']
        size = sum(data['clipsBytes'])
        assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su'])
        for new,clip,ck, in zip(data['su'], data['clipsURL'], data['ck']):
            clipURL = urlparse(clip).path
            urls.append(real_url(host,hqvid,tvid,new,clipURL,ck))
        # assert data['clipsURL'][0].endswith('.mp4')

    else:
        info = json.loads(get_decoded_html('http://my.tv.sohu.com/play/videonew.do?vid=%s&referer=http://my.tv.sohu.com' % vid))
        host = info['allot']
        prot = info['prot']
        tvid = info['tvid']
        urls = []
        data = info['data']
        title = data['tvName']
        size = sum(map(int,data['clipsBytes']))
        assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su'])
        for new,clip,ck, in zip(data['su'], data['clipsURL'], data['ck']):
            clipURL = urlparse(clip).path
            urls.append(real_url(host,hqvid,tvid,new,clipURL,ck))

    print_info(site_info, title, 'mp4', size)
    if not info_only:
        download_urls(urls, title, 'mp4', size, output_dir, refer = url, merge = merge)

site_info = "Sohu.com"
download = sohu_download
download_playlist = playlist_not_supported('sohu')
add support for Sohu 2012-09-01 14:14:12 +04:00			`#!/usr/bin/env python`

			`__all__ = ['sohu_download']`

			`from ..common import *`

Sohu: fix #53 2013-01-03 19:04:33 +04:00			`import json`
fix sohu 2015-05-20 10:30:06 +03:00			`import time`
sohu new api 2015-06-05 07:19:38 +03:00			`from random import random`
oops 2015-06-05 07:24:20 +03:00			`from urllib.parse import urlparse`
sohu new api 2015-06-05 07:19:38 +03:00			`#http://115.25.217.132/?prot=9&prod=flash&pt=1&`
			`#file=/v/Sample1/BackUp_Sample1/svc/20150604/1663504_2406534_v_H_231452_18500/1663504_2406534_v_H_231452_18500_001.mp4`
			`#&new=/248/222/JwoalHHmSNWLsCVDEPqgTD.mp4`
			`#&key=3q6dEeDbCZwpf-kydU-7TH0YDP5UxFdU&vid=2406534&tvid=1663504&uid=13796019242829873083&sz=1583_434&md=WG4FExsQg2SW3C8BylUDISibt+AaBtYlyoHEkA==179&t=0.928698823787272`
Sohu: fix #53 2013-01-03 19:04:33 +04:00
sohu new api 2015-06-05 07:19:38 +03:00			`def real_url(host,vid,tvid,new,clipURL,ck):`
			`url = 'http://'+host+'/?prot=9&prod=flash&pt=1&file='+clipURL+'&new='+new +'&key='+ ck+'&vid='+str(vid)+'&uid='+str(int(time.time()*1000))+'&t='+str(random())`
fix sohu 2015-05-20 10:30:06 +03:00			`return json.loads(get_html(url))['url']`
add support for Sohu 2012-09-01 14:14:12 +04:00
fix bug #353 also may fix bug #456 Now the -y proxy can be used for tv.sohu.com such as http://tv.sohu.com/20140629/n401523369.shtml?txid=8254069965286abe9ee523a73c256ea7 2015-02-03 07:14:41 +03:00			`def sohu_download(url, output_dir = '.', merge = True, info_only = False, extractor_proxy=None):`
Sohu: fix #264 2013-11-01 02:51:27 +04:00			`if re.match(r'http://share.vrs.sohu.com', url):`
			`vid = r1('id=(\d+)', url)`
			`else:`
			`html = get_html(url)`
			`vid = r1(r'\Wvid\s[\:=]\s[\'"]?(\d+)[\'"]?', html)`
			`assert vid`
Sohu: Fix vid get error 2013-09-17 17:42:46 +04:00
Sohu: fix #264 2013-11-01 02:51:27 +04:00			`if re.match(r'http://tv.sohu.com/', url):`
fix bug #353 also may fix bug #456 Now the -y proxy can be used for tv.sohu.com such as http://tv.sohu.com/20140629/n401523369.shtml?txid=8254069965286abe9ee523a73c256ea7 2015-02-03 07:14:41 +03:00			`if extractor_proxy:`
			`set_proxy(tuple(extractor_proxy.split(":")))`
obfused by arg name 2015-06-05 07:28:08 +03:00			`info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid))`
Sohu: fix #53 2013-01-03 19:04:33 +04:00			`for qtyp in ["oriVid","superVid","highVid" ,"norVid","relativeId"]:`
obfused by arg name 2015-06-05 07:28:08 +03:00			`hqvid = info['data'][qtyp]`
Sohu: fix #53 2013-01-03 19:04:33 +04:00			`if hqvid != 0 and hqvid != vid :`
obfused by arg name 2015-06-05 07:28:08 +03:00			`info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % hqvid))`
Sohu: fix #53 2013-01-03 19:04:33 +04:00			`break`
fix bug #353 also may fix bug #456 Now the -y proxy can be used for tv.sohu.com such as http://tv.sohu.com/20140629/n401523369.shtml?txid=8254069965286abe9ee523a73c256ea7 2015-02-03 07:14:41 +03:00			`if extractor_proxy:`
			`unset_proxy()`
obfused by arg name 2015-06-05 07:28:08 +03:00			`host = info['allot']`
			`prot = info['prot']`
			`tvid = info['tvid']`
Sohu: fix #53 2013-01-03 19:04:33 +04:00			`urls = []`
obfused by arg name 2015-06-05 07:28:08 +03:00			`data = info['data']`
Sohu: fix #53 2013-01-03 19:04:33 +04:00			`title = data['tvName']`
			`size = sum(data['clipsBytes'])`
			`assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su'])`
lack a arg 2015-06-05 07:29:17 +03:00			`for new,clip,ck, in zip(data['su'], data['clipsURL'], data['ck']):`
sohu new api 2015-06-05 07:19:38 +03:00			`clipURL = urlparse(clip).path`
			`urls.append(real_url(host,hqvid,tvid,new,clipURL,ck))`
			`# assert data['clipsURL'][0].endswith('.mp4')`
Sohu: Fix vid get error 2013-09-17 17:42:46 +04:00
Sohu: fix #53 2013-01-03 19:04:33 +04:00			`else:`
obfused by arg name 2015-06-05 07:28:08 +03:00			`info = json.loads(get_decoded_html('http://my.tv.sohu.com/play/videonew.do?vid=%s&referer=http://my.tv.sohu.com' % vid))`
			`host = info['allot']`
			`prot = info['prot']`
some mistake in my.tv.sohu.com 2015-06-05 07:33:10 +03:00			`tvid = info['tvid']`
Sohu: fix #53 2013-01-03 19:04:33 +04:00			`urls = []`
obfused by arg name 2015-06-05 07:28:08 +03:00			`data = info['data']`
Sohu: fix #53 2013-01-03 19:04:33 +04:00			`title = data['tvName']`
some mistake2 in my.tv.sohu.com 2015-06-05 07:37:58 +03:00			`size = sum(map(int,data['clipsBytes']))`
Sohu: fix #53 2013-01-03 19:04:33 +04:00			`assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su'])`
lack a arg 2015-06-05 07:29:17 +03:00			`for new,clip,ck, in zip(data['su'], data['clipsURL'], data['ck']):`
sohu new api 2015-06-05 07:19:38 +03:00			`clipURL = urlparse(clip).path`
			`urls.append(real_url(host,hqvid,tvid,new,clipURL,ck))`
Sohu: Fix vid get error 2013-09-17 17:42:46 +04:00
add support for Sohu 2012-09-01 14:14:12 +04:00			`print_info(site_info, title, 'mp4', size)`
			`if not info_only:`
			`download_urls(urls, title, 'mp4', size, output_dir, refer = url, merge = merge)`

			`site_info = "Sohu.com"`
			`download = sohu_download`
			`download_playlist = playlist_not_supported('sohu')`