mirror of
https://github.com/soimort/you-get.git
synced 2025-01-23 21:45:02 +03:00
fix douyin extractor
This commit is contained in:
parent
82b376a0c6
commit
4119a1493e
@ -1,8 +1,6 @@
|
|||||||
# coding=utf-8
|
# coding=utf-8
|
||||||
|
|
||||||
import re
|
|
||||||
import json
|
import json
|
||||||
from urllib.parse import unquote
|
|
||||||
|
|
||||||
from ..common import (
|
from ..common import (
|
||||||
url_size,
|
url_size,
|
||||||
@ -11,25 +9,52 @@ from ..common import (
|
|||||||
fake_headers,
|
fake_headers,
|
||||||
download_urls,
|
download_urls,
|
||||||
playlist_not_supported,
|
playlist_not_supported,
|
||||||
|
match1,
|
||||||
|
get_location,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['douyin_download_by_url']
|
__all__ = ['douyin_download_by_url']
|
||||||
|
|
||||||
|
|
||||||
|
def get_value(source: dict, path):
|
||||||
|
try:
|
||||||
|
value = source
|
||||||
|
for key in path:
|
||||||
|
if type(key) is str:
|
||||||
|
if key in value.keys():
|
||||||
|
value = value[key]
|
||||||
|
else:
|
||||||
|
value = None
|
||||||
|
break
|
||||||
|
elif type(key) is int:
|
||||||
|
if len(value) != 0:
|
||||||
|
value = value[key]
|
||||||
|
else:
|
||||||
|
value = None
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
value = None
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
def douyin_download_by_url(url, **kwargs):
|
def douyin_download_by_url(url, **kwargs):
|
||||||
|
# if short link, get the real url
|
||||||
|
if 'v.douyin.com' in url:
|
||||||
|
url = get_location(url)
|
||||||
|
aweme_id = match1(url, r'/(\d+)/?')
|
||||||
|
# get video info
|
||||||
|
video_info_api = 'https://www.douyin.com/web/api/v2/aweme/iteminfo/?item_ids={}'
|
||||||
|
url = video_info_api.format(aweme_id)
|
||||||
page_content = get_content(url, headers=fake_headers)
|
page_content = get_content(url, headers=fake_headers)
|
||||||
# The video player and video source are rendered client-side, the data
|
video_info = json.loads(page_content)
|
||||||
# contains in a <script id="RENDER_DATA" type="application/json"> tag
|
|
||||||
# quoted, unquote the whole page content then search using regex with
|
# get video id and title
|
||||||
# regular string.
|
video_id = get_value(video_info, ['item_list', 0, 'video', 'vid'])
|
||||||
page_content = unquote(page_content)
|
title = get_value(video_info, ['item_list', 0, 'desc'])
|
||||||
title = re.findall(r'"desc":"([^"]*)"', page_content)[0].strip()
|
|
||||||
|
# get video play url
|
||||||
|
video_url = "https://aweme.snssdk.com/aweme/v1/playwm/?ratio=720p&line=0&video_id={}".format(video_id)
|
||||||
video_format = 'mp4'
|
video_format = 'mp4'
|
||||||
# video URLs are in this pattern {"src":"THE_URL"}, in json format
|
|
||||||
urls_pattern = r'"playAddr":(\[.*?\])'
|
|
||||||
urls = json.loads(re.findall(urls_pattern, page_content)[0])
|
|
||||||
video_url = 'https:' + urls[0]['src']
|
|
||||||
size = url_size(video_url, faker=True)
|
size = url_size(video_url, faker=True)
|
||||||
print_info(
|
print_info(
|
||||||
site_info='douyin.com', title=title,
|
site_info='douyin.com', title=title,
|
||||||
|
Loading…
Reference in New Issue
Block a user