mirror of
https://github.com/soimort/you-get.git
synced 2025-01-23 21:45:02 +03:00
Update douyin.py
The site douyin.com changed to a more user friendly website, this is the updated extractor for that.
This commit is contained in:
parent
3881ed3f94
commit
5445f5ecde
@ -1,7 +1,7 @@
|
|||||||
# coding=utf-8
|
# coding=utf-8
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
from urllib.parse import unquote
|
||||||
|
|
||||||
from ..common import (
|
from ..common import (
|
||||||
url_size,
|
url_size,
|
||||||
@ -18,17 +18,17 @@ __all__ = ['douyin_download_by_url']
|
|||||||
|
|
||||||
def douyin_download_by_url(url, **kwargs):
|
def douyin_download_by_url(url, **kwargs):
|
||||||
page_content = get_content(url, headers=fake_headers)
|
page_content = get_content(url, headers=fake_headers)
|
||||||
match_rule = re.compile(r'var data = \[(.*?)\];')
|
# The easiest way to get the title is, obviously, from <title>
|
||||||
video_info = json.loads(match_rule.findall(page_content)[0])
|
title = re.findall(r'<title.*>(.*)</title>', page_content)[0].strip()
|
||||||
video_url = video_info['video']['play_addr']['url_list'][0]
|
# Remove the site name from title
|
||||||
# fix: https://www.douyin.com/share/video/6553248251821165832
|
site_name = ' - 抖音'
|
||||||
# if there is no title, use desc
|
if title.endswith(site_name):
|
||||||
cha_list = video_info['cha_list']
|
title = title[:-len(site_name)]
|
||||||
if cha_list:
|
|
||||||
title = cha_list[0]['cha_name']
|
|
||||||
else:
|
|
||||||
title = video_info['desc']
|
|
||||||
video_format = 'mp4'
|
video_format = 'mp4'
|
||||||
|
# The video url is url escaped, as of today, there are 4 working CDN video
|
||||||
|
# urls for the same video, I chose the shortest one.
|
||||||
|
cdn_pattern = r'(api\.amemv\.com.*PackSourceEnum_AWEME_DETAIL)'
|
||||||
|
video_url = 'https://' + unquote(re.findall(cdn_pattern, page_content)[0])
|
||||||
size = url_size(video_url, faker=True)
|
size = url_size(video_url, faker=True)
|
||||||
print_info(
|
print_info(
|
||||||
site_info='douyin.com', title=title,
|
site_info='douyin.com', title=title,
|
||||||
|
Loading…
Reference in New Issue
Block a user