From 8663ef14bdde7cda3f0bc77ed112c2dd545d5270 Mon Sep 17 00:00:00 2001 From: Zhang Ning Date: Fri, 4 Sep 2015 18:45:35 +0800 Subject: [PATCH] support my.tv.sohu.com try tv.sohu.com first then my.tv.sohu.com Signed-off-by: Zhang Ning --- src/you_get/extractors/sohu.py | 61 ++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 25 deletions(-) diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py index 39be6026..df894e02 100644 --- a/src/you_get/extractors/sohu.py +++ b/src/you_get/extractors/sohu.py @@ -23,47 +23,58 @@ class Sohu(VideoExtractor): {'id': 'superVid', 'container': 'mp4', 'video_profile': '超清'}, {'id': 'highVid', 'container': 'mp4', 'video_profile': '高清'}, {'id': 'norVid', 'container': 'mp4', 'video_profile': '标清'}, - {'id': 'relativeId', 'container': 'mp4', 'video_profile': '当前'}, ] realurls = { 'oriVid': [], 'superVid': [], 'highVid': [], 'norVid': [], 'relativeId': []} - vids = {} + vids = { 'oriVid': 0, 'superVid': 0, 'highVid': 0, 'norVid': 0, 'relativeId': 0} def real_url(host, vid, tvid, new, clipURL, ck): return 'http://'+host+'/?prot=9&prod=flash&pt=1&file='+clipURL+'&new='+new +'&key='+ ck+'&vid='+str(vid)+'&uid='+str(int(time.time()*1000))+'&t='+str(random()) - def get_vid_from_url(url): - return match1(url, 'id=(\d+)') - def get_vid_from_content(content): - return match1(content, '\/([0-9]+)\/v\.swf') + return match1(content, '\/([0-9]+)\/v\.swf') or \ + match1(content, '\&id=(\d+)') + + def parser_info(self, info, stream, lvid): + host = info['allot'] + prot = info['prot'] + tvid = info['tvid'] + data = info['data'] + size = sum(map(int,data['clipsBytes'])) + assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) + for new, clip, ck, in zip(data['su'], data['clipsURL'], data['ck']): + clipURL = urlparse(clip).path + self.realurls[stream['id']].append(self.__class__.real_url(host, lvid, tvid, new, clipURL, ck)) + self.streams[stream['id']] = {'container': 'mp4', 'video_profile': stream['video_profile'], 'size' : size} + self.vids[stream['id']] = lvid + def prepare(self, **kwargs): assert self.url or self.vid if self.url and not self.vid: - self.vid = self.__class__.get_vid_from_url(self.url) or \ - self.__class__.get_vid_from_content(str(get_decoded_html(self.url))) + self.vid = self.__class__.get_vid_from_content(str(get_decoded_html(self.url))) info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % self.vid)) - data = info['data'] - self.title = data['tvName'] - for stream in self.stream_types: - lvid = data[stream['id']] - if lvid != 0 and lvid != self.vid : - info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % lvid)) - data = info['data'] - host = info['allot'] - prot = info['prot'] - tvid = info['tvid'] - size = sum(data['clipsBytes']) - assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) - for new, clip, ck, in zip(data['su'], data['clipsURL'], data['ck']): - clipURL = urlparse(clip).path - self.realurls[stream['id']].append(self.__class__.real_url(host, lvid, tvid, new, clipURL, ck)) - self.streams[stream['id']] = {'container': 'mp4', 'video_profile': stream['video_profile'], 'size' : size} - self.vids[stream['id']] = lvid + if info['status'] == 1: + data = info['data'] + self.title = data['tvName'] + for stream in self.stream_types: + lvid = data[stream['id']] + if lvid != 0 and lvid != self.vid : + info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % lvid)) + self.parser_info(info, stream, lvid) + return + info = json.loads(get_decoded_html('http://my.tv.sohu.com/play/videonew.do?vid=%s&referer=http://my.tv.sohu.com' % self.vid)) + if info['status'] == 1: + data = info['data'] + self.title = data['tvName'] + for stream in self.stream_types: + lvid = data[stream['id']] + info = json.loads(get_decoded_html('http://my.tv.sohu.com/play/videonew.do?vid=%s&referer=http://my.tv.sohu.com' % lvid)) + self.parser_info(info, stream, lvid) + return def extract(self, **kwargs): if 'stream_id' in kwargs and kwargs['stream_id']: