From cb375b6cfb210e3282337102642b97cb8244e591 Mon Sep 17 00:00:00 2001 From: pastebt Date: Mon, 2 Feb 2015 19:16:20 -0800 Subject: [PATCH 1/2] fix bilibili.com MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sample: python3 you-get -i http://www.bilibili.com/video/av663903/ Before fix it return: Video Site: bilibili.com Title: 【美版无间道】无间道风云【小李子-马呆萌】【犯罪】[2006] Type: Flash video (video/x-flv) Size: 0.17 MiB (180635 Bytes) This is a error.mp4 file After fix it return: Video Site: bilibili.com Title: 【美版无间道】无间道风云【小李子-马呆萌】【犯罪】[2006] Type: Flash video (video/x-flv) Size: 1989.14 MiB (2085762683 Bytes) --- src/you_get/extractors/bilibili.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 6f8d7af3..190d62c3 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -19,7 +19,8 @@ client = { 'Accept-Charset': 'UTF-8,*;q=0.5', 'Accept-Encoding': 'gzip,deflate,sdch', 'Accept-Language': 'en-US,en;q=0.8', - 'User-Agent': 'Biligrab /0.8 (cnbeining@gmail.com)' + #'User-Agent': 'Biligrab /0.8 (cnbeining@gmail.com)' + 'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.94 Safari/537.36" } def get_srt_xml(id): From 1763986ca4992ef45f63ad56f68176ac6ddf0ec5 Mon Sep 17 00:00:00 2001 From: pastebt Date: Mon, 2 Feb 2015 20:14:41 -0800 Subject: [PATCH 2/2] fix bug #353 also may fix bug #456 Now the -y proxy can be used for tv.sohu.com such as http://tv.sohu.com/20140629/n401523369.shtml?txid=8254069965286abe9ee523a73c256ea7 --- src/you_get/extractors/sohu.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py index 6ee472e0..c0e46545 100644 --- a/src/you_get/extractors/sohu.py +++ b/src/you_get/extractors/sohu.py @@ -11,7 +11,7 @@ def real_url(host, prot, file, new): start, _, host, key = get_html(url).split('|')[:4] return '%s%s?key=%s' % (start[:-1], new, key) -def sohu_download(url, output_dir = '.', merge = True, info_only = False): +def sohu_download(url, output_dir = '.', merge = True, info_only = False, extractor_proxy=None): if re.match(r'http://share.vrs.sohu.com', url): vid = r1('id=(\d+)', url) else: @@ -20,12 +20,16 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False): assert vid if re.match(r'http://tv.sohu.com/', url): + if extractor_proxy: + set_proxy(tuple(extractor_proxy.split(":"))) data = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid)) for qtyp in ["oriVid","superVid","highVid" ,"norVid","relativeId"]: hqvid = data['data'][qtyp] if hqvid != 0 and hqvid != vid : data = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % hqvid)) break + if extractor_proxy: + unset_proxy() host = data['allot'] prot = data['prot'] urls = []