[tiktok] fix extraction

2025-01-23 21:45:02 +03:00 · 2022-04-19 15:53:33 +02:00 · 2022-04-19 15:53:33 +02:00 · 1b567d0830
commit 1b567d0830
parent db6ed38c6a
1 changed files with 24 additions and 30 deletions
--- a/src/you_get/extractors/tiktok.py
+++ b/src/you_get/extractors/tiktok.py
@ -5,42 +5,36 @@ __all__ = ['tiktok_download']
 from ..common import *
 def tiktok_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
-    referUrl = url.split('?')[0]
+    while True:
-    headers = fake_headers
+        m = re.match('https://([^/]+)(/.*)', url)
        host = m.group(1)
        if host == 'www.tiktok.com':  # canonical URL reached
            url = m.group(2).split('?')[0]
            vid = url.split('/')[3]  # should be a string of numbers
            break
        else:
            url = get_location(url)
-    # trick or treat
+    headers = {
-    html = get_content(url, headers=headers)
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0',
-    data = r1(r'<script id="__NEXT_DATA__".*?>(.*?)</script>', html)
+        'Accept-Encoding': 'gzip, deflate',
        'Accept': '*/*',
        'Connection': 'keep-alive'  # important
    }
    html = getHttps(host, url, headers=headers)
    data = r1(r'window\[\'SIGI_STATE\'\]=(.*?);window\[\'SIGI_RETRY\'\]', html)
    info = json.loads(data)
-    wid = info['props']['initialProps']['$wid']
+    downloadAddr = info['ItemModule'][vid]['video']['downloadAddr']
-    cookie = 'tt_webid=%s; tt_webid_v2=%s' % (wid, wid)
+    author = info['ItemModule'][vid]['author']  # same as uniqueId
    nickname = info['UserModule']['users'][author]['nickname']
    title = '%s [%s]' % (nickname or author, vid)
-    # here's the cookie
+    mime, ext, size = url_info(downloadAddr, headers=headers)
    headers['Cookie'] = cookie
    # try again
    html = get_content(url, headers=headers)
    data = r1(r'<script id="__NEXT_DATA__".*?>(.*?)</script>', html)
    info = json.loads(data)
    wid = info['props']['initialProps']['$wid']
    cookie = 'tt_webid=%s; tt_webid_v2=%s' % (wid, wid)
    videoData = info['props']['pageProps']['itemInfo']['itemStruct']
    videoId = videoData['id']
    videoUrl = videoData['video']['downloadAddr']
    uniqueId = videoData['author'].get('uniqueId')
    nickName = videoData['author'].get('nickname')
    title = '%s [%s]' % (nickName or uniqueId, videoId)
    # we also need the referer
    headers['Referer'] = referUrl
    mime, ext, size = url_info(videoUrl, headers=headers)
    print_info(site_info, title, mime, size)
    if not info_only:
-        download_urls([videoUrl], title, ext, size, output_dir=output_dir, merge=merge, headers=headers)
+        download_urls([downloadAddr], title, ext, size, output_dir=output_dir, merge=merge, headers=headers)
 site_info = "TikTok.com"
 download = tiktok_download