mirror of
https://github.com/soimort/you-get.git
synced 2025-02-02 16:24:00 +03:00
[tiktok] fix extraction for alternative URLs
This commit is contained in:
parent
c768b29153
commit
e2ba3ecdb3
@ -5,16 +5,6 @@ __all__ = ['tiktok_download']
|
|||||||
from ..common import *
|
from ..common import *
|
||||||
|
|
||||||
def tiktok_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
def tiktok_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||||
while True:
|
|
||||||
m = re.match('(https?://)?([^/]+)(/.*)', url)
|
|
||||||
host = m.group(2)
|
|
||||||
if host == 'www.tiktok.com': # canonical URL reached
|
|
||||||
url = m.group(3).split('?')[0]
|
|
||||||
vid = url.split('/')[3] # should be a string of numbers
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
url = get_location(url)
|
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0',
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0',
|
||||||
'Accept-Encoding': 'gzip, deflate',
|
'Accept-Encoding': 'gzip, deflate',
|
||||||
@ -22,7 +12,20 @@ def tiktok_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
|||||||
'Connection': 'keep-alive' # important
|
'Connection': 'keep-alive' # important
|
||||||
}
|
}
|
||||||
|
|
||||||
|
m = re.match('(https?://)?([^/]+)(/.*)', url)
|
||||||
|
host = m.group(2)
|
||||||
|
if host != 'www.tiktok.com': # non-canonical URL
|
||||||
|
html = getHttps(host, url, headers=headers, gzip=False)
|
||||||
|
url = r1(r'(https://www.tiktok.com/[^?"]+)', html)
|
||||||
|
# use canonical URL
|
||||||
|
m = re.match('(https?://)?([^/]+)(/.*)', url)
|
||||||
|
host = m.group(2)
|
||||||
|
|
||||||
|
url = m.group(3).split('?')[0]
|
||||||
|
vid = url.split('/')[3] # should be a string of numbers
|
||||||
|
|
||||||
html = getHttps(host, url, headers=headers)
|
html = getHttps(host, url, headers=headers)
|
||||||
|
|
||||||
data = r1(r'window\[\'SIGI_STATE\'\]=(.*?);window\[\'SIGI_RETRY\'\]', html) or \
|
data = r1(r'window\[\'SIGI_STATE\'\]=(.*?);window\[\'SIGI_RETRY\'\]', html) or \
|
||||||
r1(r'<script id="SIGI_STATE" type="application/json">(.*?)</script>', html)
|
r1(r'<script id="SIGI_STATE" type="application/json">(.*?)</script>', html)
|
||||||
info = json.loads(data)
|
info = json.loads(data)
|
||||||
|
Loading…
Reference in New Issue
Block a user