2017-12-15 12:21:34 +03:00
|
|
|
# coding=utf-8
|
|
|
|
|
|
|
|
import re
|
2021-06-23 00:51:17 +03:00
|
|
|
from urllib.parse import unquote
|
2017-12-15 12:21:34 +03:00
|
|
|
|
|
|
|
from ..common import (
|
|
|
|
url_size,
|
|
|
|
print_info,
|
|
|
|
get_content,
|
2018-05-11 12:12:01 +03:00
|
|
|
fake_headers,
|
2017-12-15 12:21:34 +03:00
|
|
|
download_urls,
|
|
|
|
playlist_not_supported,
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
__all__ = ['douyin_download_by_url']
|
|
|
|
|
|
|
|
|
|
|
|
def douyin_download_by_url(url, **kwargs):
|
2018-05-11 12:12:01 +03:00
|
|
|
page_content = get_content(url, headers=fake_headers)
|
2021-06-23 00:51:17 +03:00
|
|
|
# The easiest way to get the title is, obviously, from <title>
|
|
|
|
title = re.findall(r'<title.*>(.*)</title>', page_content)[0].strip()
|
|
|
|
# Remove the site name from title
|
|
|
|
site_name = ' - 抖音'
|
|
|
|
if title.endswith(site_name):
|
|
|
|
title = title[:-len(site_name)]
|
2017-12-15 12:21:34 +03:00
|
|
|
video_format = 'mp4'
|
2021-06-23 00:51:17 +03:00
|
|
|
# The video url is url escaped, as of today, there are 4 working CDN video
|
|
|
|
# urls for the same video, I chose the shortest one.
|
|
|
|
cdn_pattern = r'(api\.amemv\.com.*PackSourceEnum_AWEME_DETAIL)'
|
|
|
|
video_url = 'https://' + unquote(re.findall(cdn_pattern, page_content)[0])
|
2018-05-11 12:12:01 +03:00
|
|
|
size = url_size(video_url, faker=True)
|
2017-12-15 12:21:34 +03:00
|
|
|
print_info(
|
|
|
|
site_info='douyin.com', title=title,
|
|
|
|
type=video_format, size=size
|
|
|
|
)
|
|
|
|
if not kwargs['info_only']:
|
|
|
|
download_urls(
|
|
|
|
urls=[video_url], title=title, ext=video_format, total_size=size,
|
2018-05-11 12:12:01 +03:00
|
|
|
faker=True,
|
2017-12-15 12:21:34 +03:00
|
|
|
**kwargs
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
download = douyin_download_by_url
|
|
|
|
download_playlist = playlist_not_supported('douyin')
|