From 84db11759e05e4b6ee525806743a2f1b1aae4f90 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 11 Dec 2022 17:08:34 +0100 Subject: [PATCH] [tiktok] fix extraction --- src/you_get/common.py | 2 +- src/you_get/extractors/tiktok.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index c5c19d01..1558baf6 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -358,7 +358,7 @@ def getHttps(host, url, headers, gzip=True, deflate=False, debuglevel=0): if deflate: data = undeflate(data) - return str(data, encoding='utf-8') + return str(data, encoding='utf-8'), resp.getheader('set-cookie') # DEPRECATED in favor of get_content() diff --git a/src/you_get/extractors/tiktok.py b/src/you_get/extractors/tiktok.py index b5a6d4bf..641e5e97 100644 --- a/src/you_get/extractors/tiktok.py +++ b/src/you_get/extractors/tiktok.py @@ -9,6 +9,7 @@ def tiktok_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0', 'Accept-Encoding': 'gzip, deflate', 'Accept': '*/*', + 'Referer': 'https://www.tiktok.com/', 'Connection': 'keep-alive' # important } @@ -22,7 +23,9 @@ def tiktok_download(url, output_dir='.', merge=True, info_only=False, **kwargs): url = m.group(3).split('?')[0] vid = url.split('/')[3] # should be a string of numbers - html = getHttps(host, url, headers=headers) + html, set_cookie = getHttps(host, url, headers=headers) + tt_chain_token = r1('tt_chain_token=([^;]+);', set_cookie) + headers['Cookie'] = 'tt_chain_token=%s' % tt_chain_token data = r1(r'window\[\'SIGI_STATE\'\]=(.*?);window\[\'SIGI_RETRY\'\]', html) or \ r1(r'', html)