From b9d1987d23b937629cd4bba9b97ad8f018d57798 Mon Sep 17 00:00:00 2001 From: perror <15058342792@163.com> Date: Tue, 5 Dec 2017 19:34:05 +0800 Subject: [PATCH] New site support: https://www.ixigua.com/ --- src/you_get/extractors/ixigua.py | 102 +++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 src/you_get/extractors/ixigua.py diff --git a/src/you_get/extractors/ixigua.py b/src/you_get/extractors/ixigua.py new file mode 100644 index 00000000..b37c5e6d --- /dev/null +++ b/src/you_get/extractors/ixigua.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python +__all__ = ['ixigua_download', ''] +import base64 +import random +import binascii +from ..common import * + + +# radio_id: e.g. 549759 from http://www.lizhi.fm/549759/ +# +# Returns a list of tuples (audio_id, title, url) for each episode +# (audio) in the radio playlist. url is the direct link to the audio +# file. +def get_video_id(text): + re_id = r"videoId: '(.*?)'" + return re.findall(re_id, text)[0] + + +def get_r(): + return str(random.random())[2:] + + +def right_shift(val, n): + return val >> n if val >= 0 else (val + 0x100000000) >> n + + +def get_s(text): + """get video info + """ + id = get_video_id(text) + p = get_r() + url = 'http://i.snssdk.com/video/urls/v/1/toutiao/mp4/%s' % id + n = parse.urlparse(url).path + '?r=%s' % p + c = binascii.crc32(n.encode('utf-8')) + s = right_shift(c, 0) + title = ''.join(re.findall(r"title: '(.*?)',", text)) + return url + '?r=%s&s=%s' % (p, s), title + + +def get_moment(url, user_id, base_url, video_list): + """Recursively obtaining a video list + """ + video_list_data = json.loads(get_content(url)) + if not video_list_data['next']['max_behot_time']: + return video_list + [video_list.append(i["display_url"]) for i in video_list_data["data"]] + max_behot_time = video_list_data['next']['max_behot_time'] + _param = { + 'user_id': user_id, + 'base_url': base_url, + 'video_list': video_list, + 'url': base_url.format(user_id=user_id, max_behot_time=max_behot_time), + } + return get_moment(**_param) + + +def ixigua_download(url, output_dir='.', info_only=False, **kwargs): + """ Download a single video + Sample URL: https://www.ixigua.com/a6487187567887254029/#mid=59051127876 + """ + try: + video_info_url, title = get_s(get_content(url)) + video_info = json.loads(get_content(video_info_url)) + except Exception: + raise NotImplementedError(url) + try: + video_url = base64.b64decode(video_info["data"]["video_list"]["video_1"]["main_url"]).decode() + except Exception: + import traceback + traceback.print_exc() + raise NotImplementedError(url) + filetype, ext, size = url_info(video_url) + print_info(site_info, title, filetype, size) + if not info_only: + download_urls([video_url], title, ext, size, output_dir=output_dir) + + +def ixigua_download_playlist(url, output_dir='.', info_only=False, **kwargs): + """Download all video from the user's video list + Sample URL: https://www.ixigua.com/c/user/71141690831/ + """ + if 'user' not in url: + raise NotImplementedError(url) + user_id = url.split('/')[-2] + max_behot_time = 0 + if not user_id: + raise NotImplementedError(url) + base_url = "https://www.ixigua.com/c/user/article/?user_id={user_id}" \ + "&max_behot_time={max_behot_time}&max_repin_time=0&count=20&page_type=0" + _param = { + 'user_id': user_id, + 'base_url': base_url, + 'video_list': [], + 'url': base_url.format(user_id=user_id, max_behot_time=max_behot_time), + } + for i in get_moment(**_param): + ixigua_download(i, output_dir, info_only, **kwargs) + + +site_info = "ixigua.com" +download = ixigua_download +download_playlist = ixigua_download_playlist