you-get/src/you_get/extractors/tiktok.py

43 lines
1.5 KiB
Python
Raw Normal View History

2018-11-30 20:29:22 +03:00
#!/usr/bin/env python
__all__ = ['tiktok_download']
from ..common import *
def tiktok_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
2022-04-19 16:53:33 +03:00
while True:
m = re.match('https://([^/]+)(/.*)', url)
host = m.group(1)
if host == 'www.tiktok.com': # canonical URL reached
url = m.group(2).split('?')[0]
vid = url.split('/')[3] # should be a string of numbers
break
else:
url = get_location(url)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0',
'Accept-Encoding': 'gzip, deflate',
'Accept': '*/*',
'Connection': 'keep-alive' # important
}
html = getHttps(host, url, headers=headers)
2022-04-22 20:29:52 +03:00
data = r1(r'window\[\'SIGI_STATE\'\]=(.*?);window\[\'SIGI_RETRY\'\]', html) or \
r1(r'<script id="SIGI_STATE" type="application/json">(.*?)</script>', html)
2020-10-25 19:02:28 +03:00
info = json.loads(data)
2022-04-19 16:53:33 +03:00
downloadAddr = info['ItemModule'][vid]['video']['downloadAddr']
author = info['ItemModule'][vid]['author'] # same as uniqueId
nickname = info['UserModule']['users'][author]['nickname']
title = '%s [%s]' % (nickname or author, vid)
2020-10-25 19:02:28 +03:00
2022-04-19 16:53:33 +03:00
mime, ext, size = url_info(downloadAddr, headers=headers)
2020-10-25 19:02:28 +03:00
print_info(site_info, title, mime, size)
if not info_only:
2022-04-19 16:53:33 +03:00
download_urls([downloadAddr], title, ext, size, output_dir=output_dir, merge=merge, headers=headers)
2018-11-30 20:29:22 +03:00
site_info = "TikTok.com"
download = tiktok_download
download_playlist = playlist_not_supported('tiktok')