mirror of
https://github.com/soimort/you-get.git
synced 2025-02-02 16:24:00 +03:00
[tiktok] fix extraction for alternative URLs
This commit is contained in:
parent
84db11759e
commit
7b845b34ce
@ -344,21 +344,24 @@ def undeflate(data):
|
|||||||
|
|
||||||
# an http.client implementation of get_content()
|
# an http.client implementation of get_content()
|
||||||
# because urllib does not support "Connection: keep-alive"
|
# because urllib does not support "Connection: keep-alive"
|
||||||
def getHttps(host, url, headers, gzip=True, deflate=False, debuglevel=0):
|
def getHttps(host, url, headers, debuglevel=0):
|
||||||
import http.client
|
import http.client
|
||||||
|
|
||||||
conn = http.client.HTTPSConnection(host)
|
conn = http.client.HTTPSConnection(host)
|
||||||
conn.set_debuglevel(debuglevel)
|
conn.set_debuglevel(debuglevel)
|
||||||
conn.request("GET", url, headers=headers)
|
conn.request("GET", url, headers=headers)
|
||||||
resp = conn.getresponse()
|
resp = conn.getresponse()
|
||||||
|
set_cookie = resp.getheader('set-cookie')
|
||||||
|
|
||||||
data = resp.read()
|
data = resp.read()
|
||||||
if gzip:
|
try:
|
||||||
data = ungzip(data)
|
data = ungzip(data) # gzip
|
||||||
if deflate:
|
data = undeflate(data) # deflate
|
||||||
data = undeflate(data)
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
return str(data, encoding='utf-8'), resp.getheader('set-cookie')
|
conn.close()
|
||||||
|
return str(data, encoding='utf-8'), set_cookie
|
||||||
|
|
||||||
|
|
||||||
# DEPRECATED in favor of get_content()
|
# DEPRECATED in favor of get_content()
|
||||||
|
@ -16,12 +16,12 @@ def tiktok_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
|||||||
m = re.match('(https?://)?([^/]+)(/.*)', url)
|
m = re.match('(https?://)?([^/]+)(/.*)', url)
|
||||||
host = m.group(2)
|
host = m.group(2)
|
||||||
if host != 'www.tiktok.com': # non-canonical URL
|
if host != 'www.tiktok.com': # non-canonical URL
|
||||||
url = get_location(url, headers=headers)
|
vid = r1(r'/video/(\d+)', url)
|
||||||
m = re.match('(https?://)?([^/]+)(/.*)', url)
|
url = 'https://www.tiktok.com/@/video/%s/' % vid
|
||||||
host = m.group(2)
|
host = 'www.tiktok.com'
|
||||||
|
else:
|
||||||
url = m.group(3).split('?')[0]
|
url = m.group(3).split('?')[0]
|
||||||
vid = url.split('/')[3] # should be a string of numbers
|
vid = url.split('/')[3] # should be a string of numbers
|
||||||
|
|
||||||
html, set_cookie = getHttps(host, url, headers=headers)
|
html, set_cookie = getHttps(host, url, headers=headers)
|
||||||
tt_chain_token = r1('tt_chain_token=([^;]+);', set_cookie)
|
tt_chain_token = r1('tt_chain_token=([^;]+);', set_cookie)
|
||||||
|
Loading…
Reference in New Issue
Block a user