From 0b50fdfab430cff3b1e02c17def260ae0a5b47a3 Mon Sep 17 00:00:00 2001 From: perror <15058342792@163.com> Date: Wed, 28 Feb 2018 16:45:48 +0800 Subject: [PATCH] [ixigua] fix URL request error and video download error and video encryption parameters acquisition --- src/you_get/extractors/ixigua.py | 40 ++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/src/you_get/extractors/ixigua.py b/src/you_get/extractors/ixigua.py index aaed195d..0c668e82 100644 --- a/src/you_get/extractors/ixigua.py +++ b/src/you_get/extractors/ixigua.py @@ -5,30 +5,35 @@ import random import binascii from ..common import * -def get_video_id(text): - re_id = r"videoId: '(.*?)'" - return re.findall(re_id, text)[0] +headers = { + 'User-Agent': 'Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36' + ' (KHTML, like Gecko) Chrome/61.0.3163.100 Mobile Safari/537.36' +} + def get_r(): return str(random.random())[2:] + def right_shift(val, n): return val >> n if val >= 0 else (val + 0x100000000) >> n + def get_s(text): """get video info""" - id = get_video_id(text) + js_data = json.loads(text) + id = js_data['data']['video_id'] p = get_r() url = 'http://i.snssdk.com/video/urls/v/1/toutiao/mp4/%s' % id n = parse.urlparse(url).path + '?r=%s' % p c = binascii.crc32(n.encode('utf-8')) s = right_shift(c, 0) - title = ''.join(re.findall(r"title: '(.*?)',", text)) - return url + '?r=%s&s=%s' % (p, s), title + return url + '?r=%s&s=%s' % (p, s), js_data['data']['title'] + def get_moment(url, user_id, base_url, video_list): """Recursively obtaining a video list""" - video_list_data = json.loads(get_content(url)) + video_list_data = json.loads(get_content(url, headers=headers)) if not video_list_data['next']['max_behot_time']: return video_list [video_list.append(i["display_url"]) for i in video_list_data["data"]] @@ -41,23 +46,33 @@ def get_moment(url, user_id, base_url, video_list): } return get_moment(**_param) + def ixigua_download(url, output_dir='.', info_only=False, **kwargs): """ Download a single video Sample URL: https://www.ixigua.com/a6487187567887254029/#mid=59051127876 """ try: - video_info_url, title = get_s(get_content(url)) - video_info = json.loads(get_content(video_info_url)) + video_page_id = re.findall('(\d+)', [i for i in url.split('/') if i][3])[0] if 'toutiao.com' in url \ + else re.findall('(\d+)', [i for i in url.split('/') if i][2])[0] + + video_start_info_url = r'https://m.ixigua.com/i{}/info/'.format(video_page_id) + video_info_url, title = get_s(get_content(video_start_info_url, headers=headers or kwargs.get('headers', {}))) + video_info = json.loads(get_content(video_info_url, headers=headers or kwargs.get('headers', {}))) except Exception: raise NotImplementedError(url) try: video_url = base64.b64decode(video_info["data"]["video_list"]["video_1"]["main_url"]).decode() except Exception: raise NotImplementedError(url) - filetype, ext, size = url_info(video_url) + filetype, ext, size = url_info(video_url, headers=headers or kwargs.get('headers', {})) print_info(site_info, title, filetype, size) if not info_only: - download_urls([video_url], title, ext, size, output_dir=output_dir) + _param = { + 'output_dir': output_dir, + 'headers': headers or kwargs.get('headers', {}) + } + download_urls([video_url], title, ext, size, **_param) + def ixigua_download_playlist(url, output_dir='.', info_only=False, **kwargs): """Download all video from the user's video list @@ -80,6 +95,7 @@ def ixigua_download_playlist(url, output_dir='.', info_only=False, **kwargs): for i in get_moment(**_param): ixigua_download(i, output_dir, info_only, **kwargs) + site_info = "ixigua.com" download = ixigua_download -download_playlist = ixigua_download_playlist \ No newline at end of file +download_playlist = ixigua_download_playlist