diff --git a/src/you_get/common.py b/src/you_get/common.py index 49de21a1..54819741 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -94,6 +94,7 @@ SITES = { 'youtu' : 'youtube', 'youtube' : 'youtube', 'zhanqi' : 'zhanqi', + '365yg': 'toutiao', } import getopt diff --git a/src/you_get/extractors/toutiao.py b/src/you_get/extractors/toutiao.py new file mode 100644 index 00000000..ebd3013f --- /dev/null +++ b/src/you_get/extractors/toutiao.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python +import base64 + +from ..common import * +import random +from json import loads + +__all__ = ['toutiao_download', ] + + +# magic function +def int_overflow(val): + maxint = 2147483647 + if not -maxint - 1 <= val <= maxint: + val = (val + (maxint + 1)) % (2 * (maxint + 1)) - maxint - 1 + return val + +import ctypes + + +def unsigned_right_shitf(n, i): + if n < 0: + n = ctypes.c_uint32(n).value + if i < 0: + return -int_overflow(n << abs(i)) + return int_overflow(n >> i) + + +def gen_table(): + t = [0] * 256 + for r in range(256): + e = r + e = (-306674912 ^ unsigned_right_shitf(e, 1) + ) if 1 & e else unsigned_right_shitf(e, 1) + e = (-306674912 ^ unsigned_right_shitf(e, 1) + ) if 1 & e else unsigned_right_shitf(e, 1) + e = (-306674912 ^ unsigned_right_shitf(e, 1) + ) if 1 & e else unsigned_right_shitf(e, 1) + e = (-306674912 ^ unsigned_right_shitf(e, 1) + ) if 1 & e else unsigned_right_shitf(e, 1) + e = (-306674912 ^ unsigned_right_shitf(e, 1) + ) if 1 & e else unsigned_right_shitf(e, 1) + e = (-306674912 ^ unsigned_right_shitf(e, 1) + ) if 1 & e else unsigned_right_shitf(e, 1) + e = (-306674912 ^ unsigned_right_shitf(e, 1) + ) if 1 & e else unsigned_right_shitf(e, 1) + e = (-306674912 ^ unsigned_right_shitf(e, 1) + ) if 1 & e else unsigned_right_shitf(e, 1) + t[r] = e + return t + +table = gen_table() + + +def sign_url(r, url): + a = len(url) + t = -1 + n = -1 + o = -1 + for i in range(a): + t = ord(url[i]) + if t < 128: + o = unsigned_right_shitf(o, 8) ^ r[255 & (o ^ t)] + return o ^ -1 + + +def sign_video_url(vid): + href = "http://i.snssdk.com/video/urls/v/1/toutiao/mp4/" + vid + o = "/video/urls/v/1/toutiao/mp4/" + vid + "?r=" + \ + str(random.randint(10000000000000000, 999999999999999999)) + t = sign_url(table, o) + i = 4294967296 + t if t < 0 else t + return "http:" + "//" + "i.snssdk.com" + o + "&s=" + str(i) + + +class ToutiaoVideoInfo(object): + + def __init__(self): + self.bitrate = None + self.definition = None + self.size = None + self.height = None + self.width = None + self.type = None + self.url = None + + def __str__(self): + return json.dumps(self.__dict__) + + +def get_file_by_vid(video_id): + vRet = [] + url = sign_video_url(video_id) + ret = get_content(url) + ret = loads(ret) + vlist = ret.get('data').get('video_list') + if len(vlist) > 0: + vInfo = vlist.get(sorted(vlist.keys(), reverse=True)[0]) + vUrl = vInfo.get('main_url') + vUrl = base64.decodestring(vUrl.encode('ascii')).decode('ascii') + videoInfo = ToutiaoVideoInfo() + videoInfo.bitrate = vInfo.get('bitrate') + videoInfo.definition = vInfo.get('definition') + videoInfo.size = vInfo.get('size') + videoInfo.height = vInfo.get('vheight') + videoInfo.width = vInfo.get('vwidth') + videoInfo.type = vInfo.get('vtype') + videoInfo.url = vUrl + vRet.append(videoInfo) + return vRet + + +def toutiao_download(url, output_dir='.', merge=True, info_only=False, **kwargs): + html = get_html(url, faker=True) + video_id = match1(html, r"videoid\s*:\s*'([^']+)',\n") + title = match1(html, r"title: '([^']+)'.replace") + video_file_list = get_file_by_vid(video_id) # 调api获取视频源文件 + type, ext, size = url_info(video_file_list[0].url, faker=True) + log.d(video_file_list[0].url) + print_info(site_info=site_info, title=title, type=type, size=size) + if not info_only: + download_urls( + [video_file_list[0].url], + title, + ext, + size, + output_dir, + merge=merge, + faker=True) + + +site_info = "Toutiao.com" +download = toutiao_download +download_playlist = playlist_not_supported("toutiao")