mirror of
https://github.com/soimort/you-get.git
synced 2025-01-23 21:45:02 +03:00
[tiktok] fix extraction
This commit is contained in:
parent
db6ed38c6a
commit
1b567d0830
@ -5,42 +5,36 @@ __all__ = ['tiktok_download']
|
|||||||
from ..common import *
|
from ..common import *
|
||||||
|
|
||||||
def tiktok_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
def tiktok_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||||
referUrl = url.split('?')[0]
|
while True:
|
||||||
headers = fake_headers
|
m = re.match('https://([^/]+)(/.*)', url)
|
||||||
|
host = m.group(1)
|
||||||
|
if host == 'www.tiktok.com': # canonical URL reached
|
||||||
|
url = m.group(2).split('?')[0]
|
||||||
|
vid = url.split('/')[3] # should be a string of numbers
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
url = get_location(url)
|
||||||
|
|
||||||
# trick or treat
|
headers = {
|
||||||
html = get_content(url, headers=headers)
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0',
|
||||||
data = r1(r'<script id="__NEXT_DATA__".*?>(.*?)</script>', html)
|
'Accept-Encoding': 'gzip, deflate',
|
||||||
|
'Accept': '*/*',
|
||||||
|
'Connection': 'keep-alive' # important
|
||||||
|
}
|
||||||
|
|
||||||
|
html = getHttps(host, url, headers=headers)
|
||||||
|
data = r1(r'window\[\'SIGI_STATE\'\]=(.*?);window\[\'SIGI_RETRY\'\]', html)
|
||||||
info = json.loads(data)
|
info = json.loads(data)
|
||||||
wid = info['props']['initialProps']['$wid']
|
downloadAddr = info['ItemModule'][vid]['video']['downloadAddr']
|
||||||
cookie = 'tt_webid=%s; tt_webid_v2=%s' % (wid, wid)
|
author = info['ItemModule'][vid]['author'] # same as uniqueId
|
||||||
|
nickname = info['UserModule']['users'][author]['nickname']
|
||||||
|
title = '%s [%s]' % (nickname or author, vid)
|
||||||
|
|
||||||
# here's the cookie
|
mime, ext, size = url_info(downloadAddr, headers=headers)
|
||||||
headers['Cookie'] = cookie
|
|
||||||
|
|
||||||
# try again
|
|
||||||
html = get_content(url, headers=headers)
|
|
||||||
data = r1(r'<script id="__NEXT_DATA__".*?>(.*?)</script>', html)
|
|
||||||
info = json.loads(data)
|
|
||||||
wid = info['props']['initialProps']['$wid']
|
|
||||||
cookie = 'tt_webid=%s; tt_webid_v2=%s' % (wid, wid)
|
|
||||||
|
|
||||||
videoData = info['props']['pageProps']['itemInfo']['itemStruct']
|
|
||||||
videoId = videoData['id']
|
|
||||||
videoUrl = videoData['video']['downloadAddr']
|
|
||||||
uniqueId = videoData['author'].get('uniqueId')
|
|
||||||
nickName = videoData['author'].get('nickname')
|
|
||||||
|
|
||||||
title = '%s [%s]' % (nickName or uniqueId, videoId)
|
|
||||||
|
|
||||||
# we also need the referer
|
|
||||||
headers['Referer'] = referUrl
|
|
||||||
|
|
||||||
mime, ext, size = url_info(videoUrl, headers=headers)
|
|
||||||
|
|
||||||
print_info(site_info, title, mime, size)
|
print_info(site_info, title, mime, size)
|
||||||
if not info_only:
|
if not info_only:
|
||||||
download_urls([videoUrl], title, ext, size, output_dir=output_dir, merge=merge, headers=headers)
|
download_urls([downloadAddr], title, ext, size, output_dir=output_dir, merge=merge, headers=headers)
|
||||||
|
|
||||||
site_info = "TikTok.com"
|
site_info = "TikTok.com"
|
||||||
download = tiktok_download
|
download = tiktok_download
|
||||||
|
Loading…
Reference in New Issue
Block a user