From 10cc42f1fb9cbff01df4cc14b7be58fe90615f95 Mon Sep 17 00:00:00 2001 From: jason Date: Fri, 29 Mar 2019 22:01:08 +0800 Subject: [PATCH] fix toutiao errors --- src/you_get/extractors/toutiao.py | 45 ++++++++++++++++++------------- tests/test.py | 5 ++++ 2 files changed, 32 insertions(+), 18 deletions(-) diff --git a/src/you_get/extractors/toutiao.py b/src/you_get/extractors/toutiao.py index 03f7a13b..1c356055 100644 --- a/src/you_get/extractors/toutiao.py +++ b/src/you_get/extractors/toutiao.py @@ -1,27 +1,36 @@ #!/usr/bin/env python -import base64 - import binascii - -from ..common import * import random from json import loads +from urllib.parse import urlparse + +from ..common import * + +try: + from base64 import decodebytes +except ImportError: + from base64 import decodestring + + decodebytes = decodestring __all__ = ['toutiao_download', ] +def random_with_n_digits(n): + return random.randint(10 ** (n - 1), (10 ** n) - 1) + + def sign_video_url(vid): - # some code from http://codecloud.net/110854.html - r = str(random.random())[2:] + r = str(random_with_n_digits(16)) - def right_shift(val, n): - return val >> n if val >= 0 else (val + 0x100000000) >> n - - url = 'http://i.snssdk.com/video/urls/v/1/toutiao/mp4/%s' % vid - n = url.replace("http://i.snssdk.com", "")+ '?r=' + r - c = binascii.crc32(n.encode("ascii")) - s = right_shift(c, 0) - return url + '?r=%s&s=%s' % (r, s) + url = 'https://ib.365yg.com/video/urls/v/1/toutiao/mp4/{vid}'.format(vid=vid) + n = urlparse(url).path + '?r=' + r + b_n = bytes(n, encoding="utf-8") + s = binascii.crc32(b_n) + aid = 1364 + ts = int(time.time() * 1000) + return url + '?r={r}&s={s}&aid={aid}&vfrom=xgplayer&callback=axiosJsonpCallback1&_={ts}'.format(r=r, s=s, aid=aid, + ts=ts) class ToutiaoVideoInfo(object): @@ -43,12 +52,12 @@ def get_file_by_vid(video_id): vRet = [] url = sign_video_url(video_id) ret = get_content(url) - ret = loads(ret) + ret = loads(ret[20:-1]) vlist = ret.get('data').get('video_list') if len(vlist) > 0: vInfo = vlist.get(sorted(vlist.keys(), reverse=True)[0]) vUrl = vInfo.get('main_url') - vUrl = base64.decodestring(vUrl.encode('ascii')).decode('ascii') + vUrl = decodebytes(vUrl.encode('ascii')).decode('ascii') videoInfo = ToutiaoVideoInfo() videoInfo.bitrate = vInfo.get('bitrate') videoInfo.definition = vInfo.get('definition') @@ -63,8 +72,8 @@ def get_file_by_vid(video_id): def toutiao_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = get_html(url, faker=True) - video_id = match1(html, r"videoid\s*:\s*'([^']+)',\n") - title = match1(html, r"title: '([^']+)'.replace") + video_id = match1(html, r".*?videoId: '(?P.*)'") + title = match1(html, '.*?(?P<title>.*?)') video_file_list = get_file_by_vid(video_id) # 调api获取视频源文件 type, ext, size = url_info(video_file_list[0].url, faker=True) print_info(site_info=site_info, title=title, type=type, size=size) diff --git a/tests/test.py b/tests/test.py index 047cdb0f..20b8de50 100644 --- a/tests/test.py +++ b/tests/test.py @@ -7,6 +7,7 @@ from you_get.extractors import ( magisto, youtube, bilibili, + toutiao, ) @@ -31,5 +32,9 @@ class YouGetTests(unittest.TestCase): info_only=True ) + def test_toutiao(self): + toutiao.download('https://www.365yg.com/i6640053613567675662/#mid=1611922564114440', info_only=True) + + if __name__ == '__main__': unittest.main()