From 24578efa1e34c24c7828d82cc27c70b478e6740a Mon Sep 17 00:00:00 2001 From: perror <15058342792@163.com> Date: Fri, 11 May 2018 12:01:31 +0800 Subject: [PATCH] repair douyutv 403 error --- src/you_get/extractors/douyutv.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/you_get/extractors/douyutv.py b/src/you_get/extractors/douyutv.py index b7b15e74..72a41a0a 100644 --- a/src/you_get/extractors/douyutv.py +++ b/src/you_get/extractors/douyutv.py @@ -9,6 +9,10 @@ import hashlib import time import re +headers = { + 'user-agent': 'Mozilla/5.0 (iPad; CPU OS 8_1_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B466 Safari/600.1.4' + } + def douyutv_video_download(url, output_dir='.', merge=True, info_only=False, **kwargs): ep = 'http://vmobile.douyu.com/video/getInfo?vid=' patt = r'show/([0-9A-Za-z]+)' @@ -19,7 +23,7 @@ def douyutv_video_download(url, output_dir='.', merge=True, info_only=False, **k log.wtf('Unknown url pattern') vid = hit.group(1) - page = get_content(url) + page = get_content(url, headers=headers) hit = re.search(title_patt, page) if hit is None: title = vid @@ -35,21 +39,18 @@ def douyutv_video_download(url, output_dir='.', merge=True, info_only=False, **k urls = general_m3u8_extractor(m3u8_url) download_urls(urls, title, 'ts', 0, output_dir=output_dir, merge=merge, **kwargs) -def douyutv_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): + +def douyutv_download(url, output_dir='.', merge=True, info_only=False, **kwargs): if 'v.douyu.com/show/' in url: douyutv_video_download(url, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs) return - headers = { - 'user-agent': 'Mozilla/5.0 (iPad; CPU OS 8_1_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B466 Safari/600.1.4' - } - - url = re.sub(r'[w.]*douyu.com','m.douyu.com',url) + url = re.sub(r'[w.]*douyu.com', 'm.douyu.com', url) html = get_content(url, headers) room_id_patt = r'room_id\s*:\s*(\d+),' room_id = match1(html, room_id_patt) if room_id == "0": - room_id = url[url.rfind('/')+1:] + room_id = url[url.rfind('/') + 1:] api_url = "http://www.douyutv.com/api/v1/" args = "room/%s?aid=wp&client_sys=wp&time=%d" % (room_id, int(time.time())) @@ -60,7 +61,7 @@ def douyutv_download(url, output_dir = '.', merge = True, info_only = False, **k content = get_content(json_request_url, headers) json_content = json.loads(content) data = json_content['data'] - server_status = json_content.get('error',0) + server_status = json_content.get('error', 0) if server_status is not 0: raise ValueError("Server returned error:%s" % server_status) @@ -73,7 +74,8 @@ def douyutv_download(url, output_dir = '.', merge = True, info_only = False, **k print_info(site_info, title, 'flv', float('inf')) if not info_only: - download_url_ffmpeg(real_url, title, 'flv', params={}, output_dir = output_dir, merge = merge) + download_url_ffmpeg(real_url, title, 'flv', params={}, output_dir=output_dir, merge=merge) + site_info = "douyu.com" download = douyutv_download