[tiktok] fix extraction

This commit is contained in:
Mort Yao 2022-12-11 17:08:34 +01:00
parent e674bfbc2b
commit 84db11759e
No known key found for this signature in database
GPG Key ID: 07DA00CB78203251
2 changed files with 5 additions and 2 deletions

View File

@ -358,7 +358,7 @@ def getHttps(host, url, headers, gzip=True, deflate=False, debuglevel=0):
if deflate:
data = undeflate(data)
return str(data, encoding='utf-8')
return str(data, encoding='utf-8'), resp.getheader('set-cookie')
# DEPRECATED in favor of get_content()

View File

@ -9,6 +9,7 @@ def tiktok_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0',
'Accept-Encoding': 'gzip, deflate',
'Accept': '*/*',
'Referer': 'https://www.tiktok.com/',
'Connection': 'keep-alive' # important
}
@ -22,7 +23,9 @@ def tiktok_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
url = m.group(3).split('?')[0]
vid = url.split('/')[3] # should be a string of numbers
html = getHttps(host, url, headers=headers)
html, set_cookie = getHttps(host, url, headers=headers)
tt_chain_token = r1('tt_chain_token=([^;]+);', set_cookie)
headers['Cookie'] = 'tt_chain_token=%s' % tt_chain_token
data = r1(r'window\[\'SIGI_STATE\'\]=(.*?);window\[\'SIGI_RETRY\'\]', html) or \
r1(r'<script id="SIGI_STATE" type="application/json">(.*?)</script>', html)