Merge branch 'develop' of https://github.com/albertli2333/you-get into albertli2333-develop

2025-01-23 21:45:02 +03:00 · 2022-04-19 17:59:02 +02:00 · 2022-04-19 17:59:02 +02:00 · 7f94fd6e53
commit 7f94fd6e53
parent 37ca277e72 f3fe2baa74
1 changed files with 78 additions and 104 deletions
--- a/src/you_get/extractors/ixigua.py
+++ b/src/you_get/extractors/ixigua.py
@ -18,121 +18,95 @@ headers = {
 }
-def int_overflow(val):
+def ixigua_download(url, output_dir='.', merge=True, info_only=False, stream_id='', **kwargs):
    maxint = 2147483647
    if not -maxint - 1 <= val <= maxint:
        val = (val + (maxint + 1)) % (2 * (maxint + 1)) - maxint - 1
    return val
 def unsigned_right_shitf(n, i):
    if n < 0:
        n = ctypes.c_uint32(n).value
    if i < 0:
        return -int_overflow(n << abs(i))
    return int_overflow(n >> i)
 def get_video_url_from_video_id(video_id):
    """Splicing URLs according to video ID to get video details"""
    # from js
    data = [""] * 256
    for index, _ in enumerate(data):
        t = index
        for i in range(8):
            t = -306674912 ^ unsigned_right_shitf(t, 1) if 1 & t else unsigned_right_shitf(t, 1)
        data[index] = t
    def tmp():
        rand_num = random.random()
        path = "/video/urls/v/1/toutiao/mp4/{video_id}?r={random_num}".format(video_id=video_id,
                                                                              random_num=str(rand_num)[2:])
        e = o = r = -1
        i, a = 0, len(path)
        while i < a:
            e = ord(path[i])
            i += 1
            if e < 128:
                r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ e)]
            else:
                if e < 2048:
                    r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (192 | e >> 6 & 31))]
                    r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | 63 & e))]
                else:
                    if 55296 <= e < 57344:
                        e = (1023 & e) + 64
                        i += 1
                        o = 1023 & t.url(i)
                        r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (240 | e >> 8 & 7))]
                        r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | e >> 2 & 63))]
                        r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | o >> 6 & 15 | (3 & e) << 4))]
                        r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | 63 & o))]
                    else:
                        r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (224 | e >> 12 & 15))]
                        r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | e >> 6 & 63))]
                        r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | 63 & e))]
        return "https://ib.365yg.com{path}&s={param}".format(path=path, param=unsigned_right_shitf(r ^ -1, 0))
    while 1:
        url = tmp()
        if url.split("=")[-1][0] != "-":  # 参数s不能为负数
            return url
 def ixigua_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
    # example url: https://www.ixigua.com/i6631065141750268420/#mid=63024814422
-    resp = urlopen_with_retry(request.Request(url))
+    headers['cookie'] = "MONITOR_WEB_ID=7892c49b-296e-4499-8704-e47c1b15123; " \
                        "ixigua-a-s=1; ttcid=af99669b6304453480454f1507011d5c234; BD_REF=1; " \
                        "__ac_nonce=060d88ff000a75e8d17eb; __ac_signature=_02B4Z6wo100f01kX9ZpgAAIDAKIBBQUIPYT5F2WIAAPG2ad; " \
                        "ttwid=1%7CcIsVF_3vqSIk4XErhPB0H2VaTxT0tdsTMRbMjrJOPN8%7C1624806049%7C08ce7dd6f7d20506a41ba0a331ef96a6505d96731e6ad9f6c8c709f53f227ab1; "
    resp = urlopen_with_retry(request.Request(url, headers=headers))
    html = resp.read().decode('utf-8')
    _cookies = []
    for c in resp.getheader('Set-Cookie').split("httponly,"):
        _cookies.append(c.strip().split(' ')[0])
-    headers['cookie'] = ' '.join(_cookies)
+    headers['cookie'] += ' '.join(_cookies)
-    conf = loads(match1(html, r"window\.config = (.+);"))
+    match_txt = match1(html, r"<script id=\"SSR_HYDRATED_DATA\">window._SSR_HYDRATED_DATA=(.*?)<\/script>")
-    if not conf:
+    if not match_txt:
-        log.e("Get window.config from url failed, url: {}".format(url))
+        log.e("Get video info from url failed, url: {}".format(url))
        return
-    verify_url = conf['prefix'] + conf['url'] + '?key=' + conf['key'] + '&psm=' + conf['psm'] \
+    video_info = loads(match_txt.replace('":undefined', '":null'))
-        + '&_signature=' + ''.join(random.sample(string.ascii_letters + string.digits, 31))
+    if not video_info:
-    try:
+        log.e("video_info not found, url:{}".format(url))
        ok = get_content(verify_url)
    except Exception as e:
        ok = e.msg
    if ok != 'OK':
        log.e("Verify failed, verify_url: {}, result: {}".format(verify_url, ok))
        return
    html = get_content(url, headers=headers)
-    video_id = match1(html, r"\"vid\":\"([^\"]+)")
+    title = video_info['anyVideo']['gidInformation']['packerData']['video']['title']
-    title = match1(html, r"\"player__videoTitle\">.*?<h1.*?>(.*)<\/h1><\/div>")
+    video_resource = video_info['anyVideo']['gidInformation']['packerData']['video']['videoResource']
-    if not video_id:
+    if video_resource.get('dash', None):
-        log.e("video_id not found, url:{}".format(url))
+        video_list = video_resource['dash']
    elif video_resource.get('dash_120fps', None):
        video_list = video_resource['dash_120fps']
    elif video_resource.get('normal', None):
        video_list = video_resource['normal']
    else:
        log.e("video_list not found, url:{}".format(url))
        return
-    video_info_url = get_video_url_from_video_id(video_id)
+
-    video_info = loads(get_content(video_info_url))
+    streams = [
-    if video_info.get("code", 1) != 0:
+        # {'file_id': 'fc1b9bf8e8e04a849d90a5172d3f6919', 'quality': "normal", 'size': 0,
-        log.e("Get video info from {} error: server return code {}".format(video_info_url, video_info.get("code", 1)))
+        #  'definition': '720p', 'video_url': '','audio_url':'','v_type':'dash'},
-        return
+    ]
-    if not video_info.get("data", None):
+    # 先用无水印的视频与音频合成，没有的话，再直接用有水印的mp4
-        log.e("Get video info from {} error: The server returns JSON value"
+    if video_list.get('dynamic_video', None):
-              " without data or data is empty".format(video_info_url))
+        audio_url = base64.b64decode(
-        return
+            video_list['dynamic_video']['dynamic_audio_list'][0]['main_url'].encode("utf-8")).decode("utf-8")
-    if not video_info["data"].get("video_list", None):
+        dynamic_video_list = video_list['dynamic_video']['dynamic_video_list']
-        log.e("Get video info from {} error: The server returns JSON value"
+        streams = convertStreams(dynamic_video_list, audio_url)
-              " without data.video_list or data.video_list is empty".format(video_info_url))
+    elif video_list.get('video_list', None):
-        return
+        dynamic_video_list = video_list['video_list']
-    if not video_info["data"]["video_list"].get("video_1", None):
+        streams = convertStreams(dynamic_video_list, "")
-        log.e("Get video info from {} error: The server returns JSON value"
+
-              " without data.video_list.video_1 or data.video_list.video_1 is empty".format(video_info_url))
+    print("title:          %s" % title)
-        return
+    for stream in streams:
-    bestQualityVideo = list(video_info["data"]["video_list"].keys())[-1] #There is not only video_1, there might be video_2
+        if stream_id != "" and stream_id != stream['definition']:
-    size = int(video_info["data"]["video_list"][bestQualityVideo]["size"])
+            continue
-    print_info(site_info=site_info, title=title, type="mp4", size=size)  # 该网站只有mp4类型文件
+
        print("    - format:        %s" % stream['definition'])
        print("      size:          %s MiB (%s bytes)" % (round(stream['size'] / 1048576, 1), stream['size']))
        print("      quality:       %s " % stream['quality'])
        print("      v_type:        %s " % stream['v_type'])
        # print("      video_url:          %s " % stream['video_url'])
        # print("      audio_url:          %s " % stream['audio_url'])
        print()
        # 不是只看信息的话，就下载第一个
        if not info_only:
-        video_url = base64.b64decode(video_info["data"]["video_list"][bestQualityVideo]["main_url"].encode("utf-8"))
+            urls = [stream['video_url']]
-        download_urls([video_url.decode("utf-8")], title, "mp4", size, output_dir, merge=merge, headers=headers, **kwargs)
+            if stream['audio_url'] != "":
                urls.append(stream['audio_url'])
                kwargs['av'] = 'av'  # 这将会合并音视频
            download_urls(urls, title, "mp4", stream['size'], output_dir, merge=merge, headers=headers,
                          **kwargs)
            return
 def convertStreams(video_list, audio_url):
    streams = []
    for dynamic_video in video_list:
        streams.append({
            'file_id': dynamic_video['file_hash'],
            'quality': dynamic_video['quality'],
            'size': dynamic_video['size'],
            'definition': dynamic_video['definition'],
            'video_url': base64.b64decode(dynamic_video['main_url'].encode("utf-8")).decode("utf-8"),
            'audio_url': audio_url,
            'v_type': dynamic_video['vtype'],
        })
    return streams
 def ixigua_download_playlist_by_url(url, output_dir='.', merge=True, info_only=False, **kwargs):