New implement for SohuTV

in order to support sohu_download_by_vid

breaks: http://my.tv.sohu.com/

Signed-off-by: Zhang Ning <zhangn1985@gmail.com>
This commit is contained in:
Zhang Ning 2015-09-04 17:44:01 +08:00
parent b277d8d5d4
commit c9590bd8d5

View File

@ -1,8 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
__all__ = ['sohu_download']
from ..common import * from ..common import *
from ..extractor import VideoExtractor
import json import json
import time import time
@ -15,60 +14,85 @@ Changelog:
new api new api
''' '''
def real_url(host,vid,tvid,new,clipURL,ck):
url = 'http://'+host+'/?prot=9&prod=flash&pt=1&file='+clipURL+'&new='+new +'&key='+ ck+'&vid='+str(vid)+'&uid='+str(int(time.time()*1000))+'&t='+str(random())
return json.loads(get_html(url))['url']
def sohu_download(url, output_dir = '.', merge = True, info_only = False, extractor_proxy=None): class Sohu(VideoExtractor):
if re.match(r'http://share.vrs.sohu.com', url): name = "搜狐 (Sohu)"
vid = r1('id=(\d+)', url)
else:
html = get_html(url)
vid = r1(r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?', html)
assert vid
if re.match(r'http://tv.sohu.com/', url): stream_types = [
if extractor_proxy: {'id': 'oriVid', 'container': 'mp4', 'video_profile': '原画'},
set_proxy(tuple(extractor_proxy.split(":"))) {'id': 'superVid', 'container': 'mp4', 'video_profile': '超清'},
info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid)) {'id': 'highVid', 'container': 'mp4', 'video_profile': '高清'},
for qtyp in ["oriVid","superVid","highVid" ,"norVid","relativeId"]: {'id': 'norVid', 'container': 'mp4', 'video_profile': '标清'},
hqvid = info['data'][qtyp] {'id': 'relativeId', 'container': 'mp4', 'video_profile': '当前'},
if hqvid != 0 and hqvid != vid : ]
info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % hqvid))
break realurls = { 'oriVid': [], 'superVid': [], 'highVid': [], 'norVid': [], 'relativeId': []}
if extractor_proxy: vids = {}
unset_proxy()
host = info['allot'] def real_url(host, vid, tvid, new, clipURL, ck):
prot = info['prot'] return 'http://'+host+'/?prot=9&prod=flash&pt=1&file='+clipURL+'&new='+new +'&key='+ ck+'&vid='+str(vid)+'&uid='+str(int(time.time()*1000))+'&t='+str(random())
tvid = info['tvid']
urls = []
def get_vid_from_url(url):
return match1(url, 'id=(\d+)')
def get_vid_from_content(content):
return match1(content, '\/([0-9]+)\/v\.swf')
def prepare(self, **kwargs):
assert self.url or self.vid
if self.url and not self.vid:
self.vid = self.__class__.get_vid_from_url(self.url) or \
self.__class__.get_vid_from_content(str(get_decoded_html(self.url)))
info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % self.vid))
data = info['data'] data = info['data']
title = data['tvName'] self.title = data['tvName']
size = sum(data['clipsBytes']) for stream in self.stream_types:
assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) lvid = data[stream['id']]
for new,clip,ck, in zip(data['su'], data['clipsURL'], data['ck']): if lvid != 0 and lvid != self.vid :
clipURL = urlparse(clip).path info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % lvid))
urls.append(real_url(host,hqvid,tvid,new,clipURL,ck)) data = info['data']
# assert data['clipsURL'][0].endswith('.mp4') host = info['allot']
prot = info['prot']
tvid = info['tvid']
size = sum(data['clipsBytes'])
assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su'])
for new, clip, ck, in zip(data['su'], data['clipsURL'], data['ck']):
clipURL = urlparse(clip).path
self.realurls[stream['id']].append(self.__class__.real_url(host, lvid, tvid, new, clipURL, ck))
self.streams[stream['id']] = {'container': 'mp4', 'video_profile': stream['video_profile'], 'size' : size}
self.vids[stream['id']] = lvid
def extract(self, **kwargs):
if 'stream_id' in kwargs and kwargs['stream_id']:
# Extract the stream
stream_id = kwargs['stream_id']
if stream_id not in self.streams:
log.e('[Error] Invalid video format.')
log.e('Run \'-i\' command with no specific video format to view all available formats.')
exit(2)
else:
# Extract stream with the best quality
stream_id = self.streams_sorted[0]['id']
new_stream_id = stream_id
if self.vids[new_stream_id] == 0:
for stream in self.stream_types:
if self.vids[stream['id']] != 0:
new_stream_id = stream['id']
break
else:
info = json.loads(get_decoded_html('http://my.tv.sohu.com/play/videonew.do?vid=%s&referer=http://my.tv.sohu.com' % vid))
host = info['allot']
prot = info['prot']
tvid = info['tvid']
urls = [] urls = []
data = info['data'] for url in self.realurls[new_stream_id]:
title = data['tvName'] info = json.loads(get_html(url))
size = sum(map(int,data['clipsBytes'])) urls.append(info['url'])
assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) self.streams[stream_id]['src'] = urls
for new,clip,ck, in zip(data['su'], data['clipsURL'], data['ck']):
clipURL = urlparse(clip).path
urls.append(real_url(host,vid,tvid,new,clipURL,ck))
print_info(site_info, title, 'mp4', size)
if not info_only:
download_urls(urls, title, 'mp4', size, output_dir, refer = url, merge = merge)
site_info = "Sohu.com" site = Sohu()
download = sohu_download download = site.download_by_url
sohu_download_by_vid = site.download_by_vid
download_playlist = playlist_not_supported('sohu') download_playlist = playlist_not_supported('sohu')