Fix sohu download no host error #2268

Signed-off-by: Ein Verne <einverne@gmail.com>
This commit is contained in:
Ein Verne 2017-08-14 17:38:40 +08:00
parent 699ed7e501
commit 0deb78b5f7

View File

@ -2,26 +2,37 @@
__all__ = ['sohu_download'] __all__ = ['sohu_download']
from ..common import *
import json import json
import time import time
from random import random from random import random
from urllib.parse import urlparse from urllib.parse import urlparse
from ..common import *
''' '''
Changelog: Changelog:
1. http://tv.sohu.com/upload/swf/20150604/Main.swf 1. http://tv.sohu.com/upload/swf/20150604/Main.swf
new api new api
''' '''
def real_url(host,vid,tvid,new,clipURL,ck):
url = 'http://'+host+'/?prot=9&prod=flash&pt=1&file='+clipURL+'&new='+new +'&key='+ ck+'&vid='+str(vid)+'&uid='+str(int(time.time()*1000))+'&t='+str(random())+'&rb=1'
return json.loads(get_html(url))['url']
def sohu_download(url, output_dir = '.', merge = True, info_only = False, extractor_proxy=None, **kwargs): def real_url(host, vid, tvid, new, clipURL, ck):
if re.match(r'http://share.vrs.sohu.com', url): if host:
clipURL = urlparse(clipURL).path
url = 'http://' + host + '/?prot=9&prod=flash&pt=1&file=' + clipURL + '&new=' + new + '&key=' + ck + '&vid=' + str(vid) + '&uid=' + str(
int(time.time() * 1000)) + '&t=' + str(random()) + '&rb=1'
return json.loads(get_html(url))['url']
else:
if not clipURL.startswith('http://'):
return 'http://' + clipURL
return clipURL
def sohu_download(url, output_dir='.', merge=True, info_only=False, extractor_proxy=None, **kwargs):
if re.match(r'http://share\.vrs\.sohu\.com', url):
vid = r1('id=(\d+)', url) vid = r1('id=(\d+)', url)
elif re.match(r'https?://my\.tv\.sohu\.com/us/\d+/(\d+)\.shtml', url):
vid = r1(r'http://my\.tv\.sohu\.com/us/\d+/(\d+)\.shtml', url)
else: else:
html = get_html(url) html = get_html(url)
vid = r1(r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?', html) vid = r1(r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?', html)
@ -31,12 +42,12 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac
if extractor_proxy: if extractor_proxy:
set_proxy(tuple(extractor_proxy.split(":"))) set_proxy(tuple(extractor_proxy.split(":")))
info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid)) info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid))
for qtyp in ["oriVid","superVid","highVid" ,"norVid","relativeId"]: for qtyp in ["oriVid", "superVid", "highVid", "norVid", "relativeId"]:
if 'data' in info: if 'data' in info:
hqvid = info['data'][qtyp] hqvid = info['data'][qtyp]
else: else:
hqvid = info[qtyp] hqvid = info[qtyp]
if hqvid != 0 and hqvid != vid : if hqvid != 0 and hqvid != vid:
info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % hqvid)) info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % hqvid))
if not 'allot' in info: if not 'allot' in info:
continue continue
@ -51,10 +62,9 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac
title = data['tvName'] title = data['tvName']
size = sum(data['clipsBytes']) size = sum(data['clipsBytes'])
assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su'])
for new,clip,ck, in zip(data['su'], data['clipsURL'], data['ck']): for new, clip, ck, in zip(data['su'], data['clipsURL'], data['ck']):
clipURL = urlparse(clip).path urls.append(real_url(host, hqvid, tvid, new, clip, ck))
urls.append(real_url(host,hqvid,tvid,new,clipURL,ck)) # assert data['clipsURL'][0].endswith('.mp4')
# assert data['clipsURL'][0].endswith('.mp4')
else: else:
info = json.loads(get_decoded_html('http://my.tv.sohu.com/play/videonew.do?vid=%s&referer=http://my.tv.sohu.com' % vid)) info = json.loads(get_decoded_html('http://my.tv.sohu.com/play/videonew.do?vid=%s&referer=http://my.tv.sohu.com' % vid))
@ -64,15 +74,15 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac
urls = [] urls = []
data = info['data'] data = info['data']
title = data['tvName'] title = data['tvName']
size = sum(map(int,data['clipsBytes'])) size = sum(map(int, data['clipsBytes']))
assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su'])
for new,clip,ck, in zip(data['su'], data['clipsURL'], data['ck']): for new, clip, ck, in zip(data['su'], data['clipsURL'], data['ck']):
clipURL = urlparse(clip).path urls.append(real_url(host, vid, tvid, new, clip, ck))
urls.append(real_url(host,vid,tvid,new,clipURL,ck))
print_info(site_info, title, 'mp4', size) print_info(site_info, title, 'mp4', size)
if not info_only: if not info_only:
download_urls(urls, title, 'mp4', size, output_dir, refer = url, merge = merge) download_urls(urls, title, 'mp4', size, output_dir, refer=url, merge=merge)
site_info = "Sohu.com" site_info = "Sohu.com"
download = sohu_download download = sohu_download