From 705d96c43c9c357ea256cefd6688d29b2315809b Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Wed, 20 May 2015 15:30:06 +0800 Subject: [PATCH 01/12] fix sohu --- src/you_get/extractors/sohu.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py index c0e46545..132f3ec4 100644 --- a/src/you_get/extractors/sohu.py +++ b/src/you_get/extractors/sohu.py @@ -5,11 +5,13 @@ __all__ = ['sohu_download'] from ..common import * import json +import time -def real_url(host, prot, file, new): - url = 'http://%s/?prot=%s&file=%s&new=%s' % (host, prot, file, new) - start, _, host, key = get_html(url).split('|')[:4] - return '%s%s?key=%s' % (start[:-1], new, key) + + +def real_url(vid,new): + url = 'http://data.vod.itc.cn/cdnList?new='+new+'&vid='+str(vid)+'&uid='+str(int(time.time()*1000)) + return json.loads(get_html(url))['url'] def sohu_download(url, output_dir = '.', merge = True, info_only = False, extractor_proxy=None): if re.match(r'http://share.vrs.sohu.com', url): @@ -37,8 +39,8 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac title = data['tvName'] size = sum(data['clipsBytes']) assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) - for file, new in zip(data['clipsURL'], data['su']): - urls.append(real_url(host, prot, file, new)) + for new in data['su']: + urls.append(real_url(hqvid, new)) assert data['clipsURL'][0].endswith('.mp4') else: From ce4f116a31e166a2de43fa1edb925e8c903a952e Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Thu, 21 May 2015 12:54:20 +0800 Subject: [PATCH 02/12] forget to fix another code branch --- src/you_get/extractors/sohu.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py index 132f3ec4..0c9f8f49 100644 --- a/src/you_get/extractors/sohu.py +++ b/src/you_get/extractors/sohu.py @@ -52,8 +52,8 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac title = data['tvName'] size = sum([int(clipsBytes) for clipsBytes in data['clipsBytes']]) assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) - for file, new in zip(data['clipsURL'], data['su']): - urls.append(real_url(host, prot, file, new)) + for new in data['su']: + urls.append(real_url(hqvid, new)) assert data['clipsURL'][0].endswith('.mp4') print_info(site_info, title, 'mp4', size) From 50954908aa71c0758e1a4530027c3cd93f3c5fc4 Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Thu, 21 May 2015 13:01:44 +0800 Subject: [PATCH 03/12] copy-paste typo --- src/you_get/extractors/sohu.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py index 0c9f8f49..abe41b9e 100644 --- a/src/you_get/extractors/sohu.py +++ b/src/you_get/extractors/sohu.py @@ -50,11 +50,11 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac urls = [] data = data['data'] title = data['tvName'] + print(data) size = sum([int(clipsBytes) for clipsBytes in data['clipsBytes']]) assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) for new in data['su']: - urls.append(real_url(hqvid, new)) - assert data['clipsURL'][0].endswith('.mp4') + urls.append(real_url(vid, new)) print_info(site_info, title, 'mp4', size) if not info_only: From 0222cbb19febd68d9995768e2ab19673f0334553 Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Thu, 21 May 2015 13:05:39 +0800 Subject: [PATCH 04/12] remove debug info --- src/you_get/extractors/sohu.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py index abe41b9e..fdc52f17 100644 --- a/src/you_get/extractors/sohu.py +++ b/src/you_get/extractors/sohu.py @@ -50,7 +50,6 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac urls = [] data = data['data'] title = data['tvName'] - print(data) size = sum([int(clipsBytes) for clipsBytes in data['clipsBytes']]) assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) for new in data['su']: From 4d0bb3d4fe41a24aad1dd3ddb9174c7d4aa648c1 Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Fri, 5 Jun 2015 12:19:38 +0800 Subject: [PATCH 05/12] sohu new api --- src/you_get/extractors/sohu.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py index fdc52f17..c273f78f 100644 --- a/src/you_get/extractors/sohu.py +++ b/src/you_get/extractors/sohu.py @@ -6,11 +6,15 @@ from ..common import * import json import time +from random import random +from url.parse import urlparse +#http://115.25.217.132/?prot=9&prod=flash&pt=1& +#file=/v/Sample1/BackUp_Sample1/svc/20150604/1663504_2406534_v_H_231452_18500/1663504_2406534_v_H_231452_18500_001.mp4 +#&new=/248/222/JwoalHHmSNWLsCVDEPqgTD.mp4 +#&key=3q6dEeDbCZwpf-kydU-7TH0YDP5UxFdU&vid=2406534&tvid=1663504&uid=13796019242829873083&sz=1583_434&md=WG4FExsQg2SW3C8BylUDISibt+AaBtYlyoHEkA==179&t=0.928698823787272 - - -def real_url(vid,new): - url = 'http://data.vod.itc.cn/cdnList?new='+new+'&vid='+str(vid)+'&uid='+str(int(time.time()*1000)) +def real_url(host,vid,tvid,new,clipURL,ck): + url = 'http://'+host+'/?prot=9&prod=flash&pt=1&file='+clipURL+'&new='+new +'&key='+ ck+'&vid='+str(vid)+'&uid='+str(int(time.time()*1000))+'&t='+str(random()) return json.loads(get_html(url))['url'] def sohu_download(url, output_dir = '.', merge = True, info_only = False, extractor_proxy=None): @@ -37,11 +41,13 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac urls = [] data = data['data'] title = data['tvName'] + tvid = data['tvid'] size = sum(data['clipsBytes']) assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) - for new in data['su']: - urls.append(real_url(hqvid, new)) - assert data['clipsURL'][0].endswith('.mp4') + for new,clip,ck, in zip(data['su'],data['clipsURL']): + clipURL = urlparse(clip).path + urls.append(real_url(host,hqvid,tvid,new,clipURL,ck)) + # assert data['clipsURL'][0].endswith('.mp4') else: data = json.loads(get_decoded_html('http://my.tv.sohu.com/play/videonew.do?vid=%s&referer=http://my.tv.sohu.com' % vid)) @@ -50,10 +56,12 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac urls = [] data = data['data'] title = data['tvName'] - size = sum([int(clipsBytes) for clipsBytes in data['clipsBytes']]) + tvid = data['tvid'] + size = sum(data['clipsBytes']) assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) - for new in data['su']: - urls.append(real_url(vid, new)) + for new,clip,ck, in zip(data['su'],data['clipsURL']): + clipURL = urlparse(clip).path + urls.append(real_url(host,hqvid,tvid,new,clipURL,ck)) print_info(site_info, title, 'mp4', size) if not info_only: From e26e149fb84ade180beb9accf0d4093e0c58ef93 Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Fri, 5 Jun 2015 12:24:20 +0800 Subject: [PATCH 06/12] oops --- src/you_get/extractors/sohu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py index c273f78f..79770d28 100644 --- a/src/you_get/extractors/sohu.py +++ b/src/you_get/extractors/sohu.py @@ -7,7 +7,7 @@ from ..common import * import json import time from random import random -from url.parse import urlparse +from urllib.parse import urlparse #http://115.25.217.132/?prot=9&prod=flash&pt=1& #file=/v/Sample1/BackUp_Sample1/svc/20150604/1663504_2406534_v_H_231452_18500/1663504_2406534_v_H_231452_18500_001.mp4 #&new=/248/222/JwoalHHmSNWLsCVDEPqgTD.mp4 From 2384e49fb25690e516f837c4fb770bfdbccf0199 Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Fri, 5 Jun 2015 12:28:08 +0800 Subject: [PATCH 07/12] obfused by arg name --- src/you_get/extractors/sohu.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py index 79770d28..cbbc4b62 100644 --- a/src/you_get/extractors/sohu.py +++ b/src/you_get/extractors/sohu.py @@ -28,20 +28,20 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac if re.match(r'http://tv.sohu.com/', url): if extractor_proxy: set_proxy(tuple(extractor_proxy.split(":"))) - data = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid)) + info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid)) for qtyp in ["oriVid","superVid","highVid" ,"norVid","relativeId"]: - hqvid = data['data'][qtyp] + hqvid = info['data'][qtyp] if hqvid != 0 and hqvid != vid : - data = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % hqvid)) + info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % hqvid)) break if extractor_proxy: unset_proxy() - host = data['allot'] - prot = data['prot'] + host = info['allot'] + prot = info['prot'] + tvid = info['tvid'] urls = [] - data = data['data'] + data = info['data'] title = data['tvName'] - tvid = data['tvid'] size = sum(data['clipsBytes']) assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) for new,clip,ck, in zip(data['su'],data['clipsURL']): @@ -50,11 +50,11 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac # assert data['clipsURL'][0].endswith('.mp4') else: - data = json.loads(get_decoded_html('http://my.tv.sohu.com/play/videonew.do?vid=%s&referer=http://my.tv.sohu.com' % vid)) - host = data['allot'] - prot = data['prot'] + info = json.loads(get_decoded_html('http://my.tv.sohu.com/play/videonew.do?vid=%s&referer=http://my.tv.sohu.com' % vid)) + host = info['allot'] + prot = info['prot'] urls = [] - data = data['data'] + data = info['data'] title = data['tvName'] tvid = data['tvid'] size = sum(data['clipsBytes']) From f629a20eb829405d8702768f2a033a1e7205c3a3 Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Fri, 5 Jun 2015 12:29:17 +0800 Subject: [PATCH 08/12] lack a arg --- src/you_get/extractors/sohu.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py index cbbc4b62..4d8db749 100644 --- a/src/you_get/extractors/sohu.py +++ b/src/you_get/extractors/sohu.py @@ -44,7 +44,7 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac title = data['tvName'] size = sum(data['clipsBytes']) assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) - for new,clip,ck, in zip(data['su'],data['clipsURL']): + for new,clip,ck, in zip(data['su'], data['clipsURL'], data['ck']): clipURL = urlparse(clip).path urls.append(real_url(host,hqvid,tvid,new,clipURL,ck)) # assert data['clipsURL'][0].endswith('.mp4') @@ -59,7 +59,7 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac tvid = data['tvid'] size = sum(data['clipsBytes']) assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) - for new,clip,ck, in zip(data['su'],data['clipsURL']): + for new,clip,ck, in zip(data['su'], data['clipsURL'], data['ck']): clipURL = urlparse(clip).path urls.append(real_url(host,hqvid,tvid,new,clipURL,ck)) From e7414d6f3808df0982ea490fd4a590dcf4a18f02 Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Fri, 5 Jun 2015 12:33:10 +0800 Subject: [PATCH 09/12] some mistake in my.tv.sohu.com --- src/you_get/extractors/sohu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py index 4d8db749..0bfea54d 100644 --- a/src/you_get/extractors/sohu.py +++ b/src/you_get/extractors/sohu.py @@ -53,10 +53,10 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac info = json.loads(get_decoded_html('http://my.tv.sohu.com/play/videonew.do?vid=%s&referer=http://my.tv.sohu.com' % vid)) host = info['allot'] prot = info['prot'] + tvid = info['tvid'] urls = [] data = info['data'] title = data['tvName'] - tvid = data['tvid'] size = sum(data['clipsBytes']) assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) for new,clip,ck, in zip(data['su'], data['clipsURL'], data['ck']): From 62e0ec0219c5e12054f8b9edf6b796380ed0cb0d Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Fri, 5 Jun 2015 12:37:58 +0800 Subject: [PATCH 10/12] some mistake2 in my.tv.sohu.com --- src/you_get/extractors/sohu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py index 0bfea54d..fbf04f6c 100644 --- a/src/you_get/extractors/sohu.py +++ b/src/you_get/extractors/sohu.py @@ -57,7 +57,7 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac urls = [] data = info['data'] title = data['tvName'] - size = sum(data['clipsBytes']) + size = sum(map(int,data['clipsBytes'])) assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) for new,clip,ck, in zip(data['su'], data['clipsURL'], data['ck']): clipURL = urlparse(clip).path From 3b97487505e9b26ba27a2afee446663ffb8059b8 Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Fri, 5 Jun 2015 12:38:56 +0800 Subject: [PATCH 11/12] some mistake3 in my.tv.sohu.com --- src/you_get/extractors/sohu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py index fbf04f6c..96448e71 100644 --- a/src/you_get/extractors/sohu.py +++ b/src/you_get/extractors/sohu.py @@ -61,7 +61,7 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) for new,clip,ck, in zip(data['su'], data['clipsURL'], data['ck']): clipURL = urlparse(clip).path - urls.append(real_url(host,hqvid,tvid,new,clipURL,ck)) + urls.append(real_url(host,vid,tvid,new,clipURL,ck)) print_info(site_info, title, 'mp4', size) if not info_only: From 412bfaf517663b326f44620eac0c31dce3f67c12 Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Fri, 5 Jun 2015 14:21:30 +0800 Subject: [PATCH 12/12] add some coments --- src/you_get/extractors/sohu.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py index 96448e71..68e9b8ad 100644 --- a/src/you_get/extractors/sohu.py +++ b/src/you_get/extractors/sohu.py @@ -8,10 +8,12 @@ import json import time from random import random from urllib.parse import urlparse -#http://115.25.217.132/?prot=9&prod=flash&pt=1& -#file=/v/Sample1/BackUp_Sample1/svc/20150604/1663504_2406534_v_H_231452_18500/1663504_2406534_v_H_231452_18500_001.mp4 -#&new=/248/222/JwoalHHmSNWLsCVDEPqgTD.mp4 -#&key=3q6dEeDbCZwpf-kydU-7TH0YDP5UxFdU&vid=2406534&tvid=1663504&uid=13796019242829873083&sz=1583_434&md=WG4FExsQg2SW3C8BylUDISibt+AaBtYlyoHEkA==179&t=0.928698823787272 + +''' +Changelog: + 1. http://tv.sohu.com/upload/swf/20150604/Main.swf + new api +''' def real_url(host,vid,tvid,new,clipURL,ck): url = 'http://'+host+'/?prot=9&prod=flash&pt=1&file='+clipURL+'&new='+new +'&key='+ ck+'&vid='+str(vid)+'&uid='+str(int(time.time()*1000))+'&t='+str(random())