you-get/src/you_get/extractors/douyin.py

# coding=utf-8

import re
from urllib.parse import unquote

from ..common import (
    url_size,
    print_info,
    get_content,
    fake_headers,
    download_urls,
    playlist_not_supported,
)


__all__ = ['douyin_download_by_url']


def douyin_download_by_url(url, **kwargs):
    page_content = get_content(url, headers=fake_headers)
    # The easiest way to get the title is, obviously, from <title>
    title = re.findall(r'<title.*>(.*)</title>', page_content)[0].strip()
    # Remove the site name from title
    site_name = ' - 抖音'
    if title.endswith(site_name):
        title = title[:-len(site_name)]
    video_format = 'mp4'
    # The video url is url escaped, as of today, there are 4 working CDN video
    # urls for the same video, I chose the shortest one.
    cdn_pattern = r'(api\.amemv\.com.*PackSourceEnum_AWEME_DETAIL)'
    video_url = 'https://' + unquote(re.findall(cdn_pattern, page_content)[0])
    size = url_size(video_url, faker=True)
    print_info(
        site_info='douyin.com', title=title,
        type=video_format, size=size
    )
    if not kwargs['info_only']:
        download_urls(
            urls=[video_url], title=title, ext=video_format, total_size=size,
            faker=True,
            **kwargs
        )


download = douyin_download_by_url
download_playlist = playlist_not_supported('douyin')
✨ add douyin support 2017-12-15 12:21:34 +03:00			`# coding=utf-8`

			`import re`
Update douyin.py The site douyin.com changed to a more user friendly website, this is the updated extractor for that. 2021-06-23 00:51:17 +03:00			`from urllib.parse import unquote`
✨ add douyin support 2017-12-15 12:21:34 +03:00
			`from ..common import (`
			`url_size,`
			`print_info,`
			`get_content,`
[douyin] send the request without fake headers, the douyin website will return fake body or the 403 response! 2018-05-11 12:12:01 +03:00			`fake_headers,`
✨ add douyin support 2017-12-15 12:21:34 +03:00			`download_urls,`
			`playlist_not_supported,`
			`)`


			`__all__ = ['douyin_download_by_url']`


			`def douyin_download_by_url(url, **kwargs):`
[douyin] send the request without fake headers, the douyin website will return fake body or the 403 response! 2018-05-11 12:12:01 +03:00			`page_content = get_content(url, headers=fake_headers)`
Update douyin.py The site douyin.com changed to a more user friendly website, this is the updated extractor for that. 2021-06-23 00:51:17 +03:00			`# The easiest way to get the title is, obviously, from <title>`
			`title = re.findall(r'<title.>(.)</title>', page_content)[0].strip()`
			`# Remove the site name from title`
			`site_name = ' - 抖音'`
			`if title.endswith(site_name):`
			`title = title[:-len(site_name)]`
✨ add douyin support 2017-12-15 12:21:34 +03:00			`video_format = 'mp4'`
Update douyin.py The site douyin.com changed to a more user friendly website, this is the updated extractor for that. 2021-06-23 00:51:17 +03:00			`# The video url is url escaped, as of today, there are 4 working CDN video`
			`# urls for the same video, I chose the shortest one.`
			`cdn_pattern = r'(api\.amemv\.com.*PackSourceEnum_AWEME_DETAIL)'`
			`video_url = 'https://' + unquote(re.findall(cdn_pattern, page_content)[0])`
[douyin] send the request without fake headers, the douyin website will return fake body or the 403 response! 2018-05-11 12:12:01 +03:00			`size = url_size(video_url, faker=True)`
✨ add douyin support 2017-12-15 12:21:34 +03:00			`print_info(`
			`site_info='douyin.com', title=title,`
			`type=video_format, size=size`
			`)`
			`if not kwargs['info_only']:`
			`download_urls(`
			`urls=[video_url], title=title, ext=video_format, total_size=size,`
[douyin] send the request without fake headers, the douyin website will return fake body or the 403 response! 2018-05-11 12:12:01 +03:00			`faker=True,`
✨ add douyin support 2017-12-15 12:21:34 +03:00			`**kwargs`
			`)`


			`download = douyin_download_by_url`
			`download_playlist = playlist_not_supported('douyin')`