[tiktok] fix extraction for alternative URLs

This commit is contained in:
Mort Yao 2022-12-11 17:43:07 +01:00
parent 84db11759e
commit 7b845b34ce
No known key found for this signature in database
GPG Key ID: 07DA00CB78203251
2 changed files with 15 additions and 12 deletions

View File

@ -344,21 +344,24 @@ def undeflate(data):
# an http.client implementation of get_content()
# because urllib does not support "Connection: keep-alive"
def getHttps(host, url, headers, gzip=True, deflate=False, debuglevel=0):
def getHttps(host, url, headers, debuglevel=0):
import http.client
conn = http.client.HTTPSConnection(host)
conn.set_debuglevel(debuglevel)
conn.request("GET", url, headers=headers)
resp = conn.getresponse()
set_cookie = resp.getheader('set-cookie')
data = resp.read()
if gzip:
data = ungzip(data)
if deflate:
data = undeflate(data)
try:
data = ungzip(data) # gzip
data = undeflate(data) # deflate
except:
pass
return str(data, encoding='utf-8'), resp.getheader('set-cookie')
conn.close()
return str(data, encoding='utf-8'), set_cookie
# DEPRECATED in favor of get_content()

View File

@ -16,10 +16,10 @@ def tiktok_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
m = re.match('(https?://)?([^/]+)(/.*)', url)
host = m.group(2)
if host != 'www.tiktok.com': # non-canonical URL
url = get_location(url, headers=headers)
m = re.match('(https?://)?([^/]+)(/.*)', url)
host = m.group(2)
vid = r1(r'/video/(\d+)', url)
url = 'https://www.tiktok.com/@/video/%s/' % vid
host = 'www.tiktok.com'
else:
url = m.group(3).split('?')[0]
vid = url.split('/')[3] # should be a string of numbers