diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py index 607bf44c..a1afc126 100644 --- a/src/you_get/extractors/sohu.py +++ b/src/you_get/extractors/sohu.py @@ -15,11 +15,13 @@ Changelog: new api ''' + def real_url(fileName, key, ch): url = "https://data.vod.itc.cn/ip?new=" + fileName + "&num=1&key=" + key + "&ch=" + ch + "&pt=1&pg=2&prod=h5n" return json.loads(get_html(url))['servers'][0]['url'] -def sohu_download(url, output_dir = '.', merge = True, info_only = False, extractor_proxy=None, **kwargs): + +def sohu_download(url, output_dir='.', merge=True, info_only=False, extractor_proxy=None, **kwargs): if re.match(r'http://share.vrs.sohu.com', url): vid = r1('id=(\d+)', url) else: @@ -27,16 +29,16 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac vid = r1(r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?', html) assert vid - if re.match(r'http[s]://tv.sohu.com/', url): - if extractor_proxy: - set_proxy(tuple(extractor_proxy.split(":"))) - info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid)) - for qtyp in ["oriVid","superVid","highVid" ,"norVid","relativeId"]: + if extractor_proxy: + set_proxy(tuple(extractor_proxy.split(":"))) + info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid)) + if info and info.get("data", ""): + for qtyp in ["oriVid", "superVid", "highVid", "norVid", "relativeId"]: if 'data' in info: hqvid = info['data'][qtyp] else: hqvid = info[qtyp] - if hqvid != 0 and hqvid != vid : + if hqvid != 0 and hqvid != vid: info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % hqvid)) if not 'allot' in info: continue @@ -63,14 +65,15 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac urls = [] data = info['data'] title = data['tvName'] - size = sum(map(int,data['clipsBytes'])) + size = sum(map(int, data['clipsBytes'])) assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) for fileName, key in zip(data['su'], data['ck']): urls.append(real_url(fileName, key, data['ch'])) print_info(site_info, title, 'mp4', size) if not info_only: - download_urls(urls, title, 'mp4', size, output_dir, refer = url, merge = merge) + download_urls(urls, title, 'mp4', size, output_dir, refer=url, merge=merge) + site_info = "Sohu.com" download = sohu_download diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py index 0629035b..eea31503 100644 --- a/src/you_get/extractors/youtube.py +++ b/src/you_get/extractors/youtube.py @@ -216,6 +216,7 @@ class YouTube(VideoExtractor): self.html5player = 'https://www.youtube.com' + ytplayer_config['assets']['js'] # Workaround: get_video_info returns bad s. Why? stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',') + #stream_list = ytplayer_config['args']['adaptive_fmts'].split(',') except: stream_list = video_info['url_encoded_fmt_stream_map'][0].split(',') if re.search('([^"]*/base\.js)"', video_page): @@ -306,7 +307,8 @@ class YouTube(VideoExtractor): 'url': metadata['url'][0], 'sig': metadata['sig'][0] if 'sig' in metadata else None, 's': metadata['s'][0] if 's' in metadata else None, - 'quality': metadata['quality'][0], + 'quality': metadata['quality'][0] if 'quality' in metadata else None, + #'quality': metadata['quality_label'][0] if 'quality_label' in metadata else None, 'type': metadata['type'][0], 'mime': metadata['type'][0].split(';')[0], 'container': mime_to_container(metadata['type'][0].split(';')[0]), @@ -433,13 +435,13 @@ class YouTube(VideoExtractor): dash_mp4_a_url = stream['url'] if 's' in stream: sig = self.__class__.decipher(self.js, stream['s']) - dash_mp4_a_url += '&signature={}'.format(sig) + dash_mp4_a_url += '&sig={}'.format(sig) dash_mp4_a_size = stream['clen'] elif stream['type'].startswith('audio/webm'): dash_webm_a_url = stream['url'] if 's' in stream: sig = self.__class__.decipher(self.js, stream['s']) - dash_webm_a_url += '&signature={}'.format(sig) + dash_webm_a_url += '&sig={}'.format(sig) dash_webm_a_size = stream['clen'] for stream in streams: # video if 'size' in stream: @@ -448,7 +450,7 @@ class YouTube(VideoExtractor): dash_url = stream['url'] if 's' in stream: sig = self.__class__.decipher(self.js, stream['s']) - dash_url += '&signature={}'.format(sig) + dash_url += '&sig={}'.format(sig) dash_size = stream['clen'] itag = stream['itag'] dash_urls = self.__class__.chunk_by_range(dash_url, int(dash_size)) @@ -467,7 +469,7 @@ class YouTube(VideoExtractor): dash_url = stream['url'] if 's' in stream: sig = self.__class__.decipher(self.js, stream['s']) - dash_url += '&signature={}'.format(sig) + dash_url += '&sig={}'.format(sig) dash_size = stream['clen'] itag = stream['itag'] audio_url = None @@ -510,13 +512,13 @@ class YouTube(VideoExtractor): src = self.streams[stream_id]['url'] if self.streams[stream_id]['sig'] is not None: sig = self.streams[stream_id]['sig'] - src += '&signature={}'.format(sig) + src += '&sig={}'.format(sig) elif self.streams[stream_id]['s'] is not None: if not hasattr(self, 'js'): self.js = get_content(self.html5player) s = self.streams[stream_id]['s'] sig = self.__class__.decipher(self.js, s) - src += '&signature={}'.format(sig) + src += '&sig={}'.format(sig) self.streams[stream_id]['src'] = [src] self.streams[stream_id]['size'] = urls_size(self.streams[stream_id]['src']) diff --git a/src/you_get/version.py b/src/you_get/version.py index 654732ce..a31efa48 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1302' +__version__ = '0.4.1314'