diff --git a/src/you_get/extractors/ixigua.py b/src/you_get/extractors/ixigua.py index 20e45616..151107a6 100644 --- a/src/you_get/extractors/ixigua.py +++ b/src/you_get/extractors/ixigua.py @@ -5,6 +5,8 @@ import binascii from ..common import * import random +import requests +import string import ctypes from json import loads @@ -80,7 +82,23 @@ def get_video_url_from_video_id(video_id): def ixigua_download(url, output_dir='.', merge=True, info_only=False, **kwargs): # example url: https://www.ixigua.com/i6631065141750268420/#mid=63024814422 - html = get_html(url, faker=True) + sess = requests.session() + html = sess.get(url, headers=headers).text + conf = loads(match1(html, r"window\.config = (.+);")) + if not conf: + log.e("Get window.config from url failed, url: {}".format(url)) + return + verify_url = conf['prefix'] + conf['url'] + '?key=' + conf['key'] + '&psm=' + conf['psm'] \ + + '&_signature=' + ''.join(random.sample(string.ascii_letters + string.digits, 31)) + try: + ok = get_content(verify_url) + except Exception as e: + ok = e.msg + if ok != 'OK': + log.e("Verify failed, verify_url: {}, result: {}".format(verify_url, ok)) + return + html = sess.get(url, headers=headers).text + video_id = match1(html, r"\"vid\":\"([^\"]+)") title = match1(html, r"\"player__videoTitle\">.*?(.*)<\/h1><\/div>") if not video_id: