2012-08-31 19:20:38 +04:00
|
|
|
#!/usr/bin/env python
|
2012-08-20 19:54:03 +04:00
|
|
|
|
2012-08-31 19:20:38 +04:00
|
|
|
from ..common import *
|
2014-07-21 04:39:40 +04:00
|
|
|
from ..extractor import VideoExtractor
|
2012-08-20 19:54:03 +04:00
|
|
|
|
2015-10-18 03:03:16 +03:00
|
|
|
from xml.dom.minidom import parseString
|
|
|
|
|
2014-07-17 11:01:44 +04:00
|
|
|
class YouTube(VideoExtractor):
|
|
|
|
name = "YouTube"
|
|
|
|
|
2019-03-07 15:39:45 +03:00
|
|
|
# Non-DASH YouTube media encoding options, in descending quality order.
|
2014-07-17 11:01:44 +04:00
|
|
|
# http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs. Retrieved July 17, 2014.
|
|
|
|
stream_types = [
|
2019-03-07 15:39:45 +03:00
|
|
|
{'itag': '38', 'container': 'MP4', 'video_resolution': '3072p',
|
|
|
|
'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3.5-5',
|
|
|
|
'audio_encoding': 'AAC', 'audio_bitrate': '192'},
|
2014-07-17 11:01:44 +04:00
|
|
|
#{'itag': '85', 'container': 'MP4', 'video_resolution': '1080p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '3-4', 'audio_encoding': 'AAC', 'audio_bitrate': '192'},
|
2019-03-07 15:39:45 +03:00
|
|
|
{'itag': '46', 'container': 'WebM', 'video_resolution': '1080p',
|
|
|
|
'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '',
|
|
|
|
'audio_encoding': 'Vorbis', 'audio_bitrate': '192'},
|
|
|
|
{'itag': '37', 'container': 'MP4', 'video_resolution': '1080p',
|
|
|
|
'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3-4.3',
|
|
|
|
'audio_encoding': 'AAC', 'audio_bitrate': '192'},
|
2014-07-17 11:01:44 +04:00
|
|
|
#{'itag': '102', 'container': 'WebM', 'video_resolution': '720p', 'video_encoding': 'VP8', 'video_profile': '3D', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'},
|
2019-03-07 15:39:45 +03:00
|
|
|
{'itag': '45', 'container': 'WebM', 'video_resolution': '720p',
|
|
|
|
'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '2',
|
|
|
|
'audio_encoding': 'Vorbis', 'audio_bitrate': '192'},
|
2014-07-17 11:01:44 +04:00
|
|
|
#{'itag': '84', 'container': 'MP4', 'video_resolution': '720p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '2-3', 'audio_encoding': 'AAC', 'audio_bitrate': '192'},
|
2019-03-07 15:39:45 +03:00
|
|
|
{'itag': '22', 'container': 'MP4', 'video_resolution': '720p',
|
|
|
|
'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '2-3',
|
|
|
|
'audio_encoding': 'AAC', 'audio_bitrate': '192'},
|
|
|
|
{'itag': '120', 'container': 'FLV', 'video_resolution': '720p',
|
|
|
|
'video_encoding': 'H.264', 'video_profile': 'Main@L3.1', 'video_bitrate': '2',
|
|
|
|
'audio_encoding': 'AAC', 'audio_bitrate': '128'}, # Live streaming only
|
|
|
|
{'itag': '44', 'container': 'WebM', 'video_resolution': '480p',
|
|
|
|
'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '1',
|
|
|
|
'audio_encoding': 'Vorbis', 'audio_bitrate': '128'},
|
|
|
|
{'itag': '35', 'container': 'FLV', 'video_resolution': '480p',
|
|
|
|
'video_encoding': 'H.264', 'video_profile': 'Main', 'video_bitrate': '0.8-1',
|
|
|
|
'audio_encoding': 'AAC', 'audio_bitrate': '128'},
|
2014-07-17 11:01:44 +04:00
|
|
|
#{'itag': '101', 'container': 'WebM', 'video_resolution': '360p', 'video_encoding': 'VP8', 'video_profile': '3D', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'},
|
|
|
|
#{'itag': '100', 'container': 'WebM', 'video_resolution': '360p', 'video_encoding': 'VP8', 'video_profile': '3D', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '128'},
|
2019-03-07 15:39:45 +03:00
|
|
|
{'itag': '43', 'container': 'WebM', 'video_resolution': '360p',
|
|
|
|
'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '0.5',
|
|
|
|
'audio_encoding': 'Vorbis', 'audio_bitrate': '128'},
|
|
|
|
{'itag': '34', 'container': 'FLV', 'video_resolution': '360p',
|
|
|
|
'video_encoding': 'H.264', 'video_profile': 'Main', 'video_bitrate': '0.5',
|
|
|
|
'audio_encoding': 'AAC', 'audio_bitrate': '128'},
|
2014-07-17 11:01:44 +04:00
|
|
|
#{'itag': '82', 'container': 'MP4', 'video_resolution': '360p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '96'},
|
2019-03-07 15:39:45 +03:00
|
|
|
{'itag': '18', 'container': 'MP4', 'video_resolution': '360p',
|
|
|
|
'video_encoding': 'H.264', 'video_profile': 'Baseline', 'video_bitrate': '0.5',
|
|
|
|
'audio_encoding': 'AAC', 'audio_bitrate': '96'},
|
|
|
|
{'itag': '6', 'container': 'FLV', 'video_resolution': '270p',
|
|
|
|
'video_encoding': 'Sorenson H.263', 'video_profile': '', 'video_bitrate': '0.8',
|
|
|
|
'audio_encoding': 'MP3', 'audio_bitrate': '64'},
|
2014-07-17 11:01:44 +04:00
|
|
|
#{'itag': '83', 'container': 'MP4', 'video_resolution': '240p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '96'},
|
2019-03-07 15:39:45 +03:00
|
|
|
{'itag': '13', 'container': '3GP', 'video_resolution': '',
|
|
|
|
'video_encoding': 'MPEG-4 Visual', 'video_profile': '', 'video_bitrate': '0.5',
|
|
|
|
'audio_encoding': 'AAC', 'audio_bitrate': ''},
|
|
|
|
{'itag': '5', 'container': 'FLV', 'video_resolution': '240p',
|
|
|
|
'video_encoding': 'Sorenson H.263', 'video_profile': '', 'video_bitrate': '0.25',
|
|
|
|
'audio_encoding': 'MP3', 'audio_bitrate': '64'},
|
|
|
|
{'itag': '36', 'container': '3GP', 'video_resolution': '240p',
|
|
|
|
'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.175',
|
|
|
|
'audio_encoding': 'AAC', 'audio_bitrate': '32'},
|
|
|
|
{'itag': '17', 'container': '3GP', 'video_resolution': '144p',
|
|
|
|
'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.05',
|
|
|
|
'audio_encoding': 'AAC', 'audio_bitrate': '24'},
|
2014-07-17 11:01:44 +04:00
|
|
|
]
|
|
|
|
|
|
|
|
def decipher(js, s):
|
2018-09-07 23:21:24 +03:00
|
|
|
# Examples:
|
|
|
|
# - https://www.youtube.com/yts/jsbin/player-da_DK-vflWlK-zq/base.js
|
|
|
|
# - https://www.youtube.com/yts/jsbin/player-vflvABTsY/da_DK/base.js
|
2018-11-07 18:49:18 +03:00
|
|
|
# - https://www.youtube.com/yts/jsbin/player-vfls4aurX/da_DK/base.js
|
2019-01-18 14:09:43 +03:00
|
|
|
# - https://www.youtube.com/yts/jsbin/player_ias-vfl_RGK2l/en_US/base.js
|
|
|
|
# - https://www.youtube.com/yts/jsbin/player-vflRjqq_w/da_DK/base.js
|
|
|
|
# - https://www.youtube.com/yts/jsbin/player_ias-vfl-jbnrr/da_DK/base.js
|
2014-07-17 11:01:44 +04:00
|
|
|
def tr_js(code):
|
|
|
|
code = re.sub(r'function', r'def', code)
|
2015-11-06 05:47:06 +03:00
|
|
|
code = re.sub(r'(\W)(as|if|in|is|or)\(', r'\1_\2(', code)
|
2014-07-17 11:01:44 +04:00
|
|
|
code = re.sub(r'\$', '_dollar', code)
|
|
|
|
code = re.sub(r'\{', r':\n\t', code)
|
|
|
|
code = re.sub(r'\}', r'\n', code)
|
|
|
|
code = re.sub(r'var\s+', r'', code)
|
|
|
|
code = re.sub(r'(\w+).join\(""\)', r'"".join(\1)', code)
|
|
|
|
code = re.sub(r'(\w+).length', r'len(\1)', code)
|
|
|
|
code = re.sub(r'(\w+).slice\((\w+)\)', r'\1[\2:]', code)
|
2014-07-24 19:09:08 +04:00
|
|
|
code = re.sub(r'(\w+).splice\((\w+),(\w+)\)', r'del \1[\2:\2+\3]', code)
|
2014-07-17 11:01:44 +04:00
|
|
|
code = re.sub(r'(\w+).split\(""\)', r'list(\1)', code)
|
|
|
|
return code
|
|
|
|
|
2015-12-22 13:58:09 +03:00
|
|
|
js = js.replace('\n', ' ')
|
2019-01-18 14:09:43 +03:00
|
|
|
f1 = match1(js, r'\.set\(\w+\.sp,encodeURIComponent\(([$\w]+)') or \
|
|
|
|
match1(js, r'\.set\(\w+\.sp,\(0,window\.encodeURIComponent\)\(([$\w]+)') or \
|
2018-11-07 18:49:18 +03:00
|
|
|
match1(js, r'\.set\(\w+\.sp,([$\w]+)\(\w+\.s\)\)') or \
|
2018-09-07 23:21:24 +03:00
|
|
|
match1(js, r'"signature",([$\w]+)\(\w+\.\w+\)')
|
2015-11-11 09:37:39 +03:00
|
|
|
f1def = match1(js, r'function %s(\(\w+\)\{[^\{]+\})' % re.escape(f1)) or \
|
2015-12-22 13:58:09 +03:00
|
|
|
match1(js, r'\W%s=function(\(\w+\)\{[^\{]+\})' % re.escape(f1))
|
2014-07-17 11:01:44 +04:00
|
|
|
f1def = re.sub(r'([$\w]+\.)([$\w]+\(\w+,\d+\))', r'\2', f1def)
|
2018-12-06 01:24:24 +03:00
|
|
|
f1def = 'function main_%s%s' % (f1, f1def) # prefix to avoid potential namespace conflict
|
2014-07-17 11:01:44 +04:00
|
|
|
code = tr_js(f1def)
|
|
|
|
f2s = set(re.findall(r'([$\w]+)\(\w+,\d+\)', f1def))
|
|
|
|
for f2 in f2s:
|
|
|
|
f2e = re.escape(f2)
|
|
|
|
f2def = re.search(r'[^$\w]%s:function\((\w+,\w+)\)(\{[^\{\}]+\})' % f2e, js)
|
|
|
|
if f2def:
|
|
|
|
f2def = 'function {}({}){}'.format(f2e, f2def.group(1), f2def.group(2))
|
|
|
|
else:
|
|
|
|
f2def = re.search(r'[^$\w]%s:function\((\w+)\)(\{[^\{\}]+\})' % f2e, js)
|
|
|
|
f2def = 'function {}({},b){}'.format(f2e, f2def.group(1), f2def.group(2))
|
2019-05-09 02:22:03 +03:00
|
|
|
f2 = re.sub(r'(as|if|in|is|or)', r'_\1', f2)
|
2014-07-17 11:01:44 +04:00
|
|
|
f2 = re.sub(r'\$', '_dollar', f2)
|
|
|
|
code = code + 'global %s\n' % f2 + tr_js(f2def)
|
|
|
|
|
2015-11-06 05:47:06 +03:00
|
|
|
f1 = re.sub(r'(as|if|in|is|or)', r'_\1', f1)
|
|
|
|
f1 = re.sub(r'\$', '_dollar', f1)
|
2018-12-06 01:24:24 +03:00
|
|
|
code = code + 'sig=main_%s(s)' % f1 # prefix to avoid potential namespace conflict
|
2014-07-17 11:01:44 +04:00
|
|
|
exec(code, globals(), locals())
|
|
|
|
return locals()['sig']
|
|
|
|
|
2018-09-11 18:31:47 +03:00
|
|
|
def chunk_by_range(url, size):
|
|
|
|
urls = []
|
|
|
|
chunk_size = 10485760
|
|
|
|
start, end = 0, chunk_size - 1
|
|
|
|
urls.append('%s&range=%s-%s' % (url, start, end))
|
|
|
|
while end + 1 < size: # processed size < expected size
|
|
|
|
start, end = end + 1, end + chunk_size
|
|
|
|
urls.append('%s&range=%s-%s' % (url, start, end))
|
|
|
|
return urls
|
|
|
|
|
2014-07-17 11:01:44 +04:00
|
|
|
def get_url_from_vid(vid):
|
2015-07-24 12:43:01 +03:00
|
|
|
return 'https://youtu.be/{}'.format(vid)
|
2014-07-17 11:01:44 +04:00
|
|
|
|
|
|
|
def get_vid_from_url(url):
|
|
|
|
"""Extracts video ID from URL.
|
|
|
|
"""
|
2017-08-24 12:06:23 +03:00
|
|
|
return match1(url, r'youtu\.be/([^?/]+)') or \
|
2014-07-17 11:01:44 +04:00
|
|
|
match1(url, r'youtube\.com/embed/([^/?]+)') or \
|
|
|
|
match1(url, r'youtube\.com/v/([^/?]+)') or \
|
2016-02-07 17:13:24 +03:00
|
|
|
match1(url, r'youtube\.com/watch/([^/?]+)') or \
|
2014-07-17 11:01:44 +04:00
|
|
|
parse_query_param(url, 'v') or \
|
|
|
|
parse_query_param(parse_query_param(url, 'u'), 'v')
|
|
|
|
|
|
|
|
def get_playlist_id_from_url(url):
|
|
|
|
"""Extracts playlist ID from URL.
|
|
|
|
"""
|
|
|
|
return parse_query_param(url, 'list') or \
|
2013-12-21 07:00:07 +04:00
|
|
|
parse_query_param(url, 'p')
|
2014-02-06 08:03:51 +04:00
|
|
|
|
2014-07-17 11:01:44 +04:00
|
|
|
def download_playlist_by_url(self, url, **kwargs):
|
|
|
|
self.url = url
|
|
|
|
|
2014-09-21 00:57:26 +04:00
|
|
|
playlist_id = self.__class__.get_playlist_id_from_url(self.url)
|
2014-07-17 11:01:44 +04:00
|
|
|
if playlist_id is None:
|
|
|
|
log.wtf('[Failed] Unsupported URL pattern.')
|
|
|
|
|
2015-07-24 12:43:01 +03:00
|
|
|
video_page = get_content('https://www.youtube.com/playlist?list=%s' % playlist_id)
|
2014-07-17 11:01:44 +04:00
|
|
|
from html.parser import HTMLParser
|
|
|
|
videos = sorted([HTMLParser().unescape(video)
|
|
|
|
for video in re.findall(r'<a href="(/watch\?[^"]+)"', video_page)
|
2015-11-21 07:10:46 +03:00
|
|
|
if parse_query_param(video, 'index')],
|
2014-07-17 11:01:44 +04:00
|
|
|
key=lambda video: parse_query_param(video, 'index'))
|
2015-11-21 07:10:46 +03:00
|
|
|
|
|
|
|
# Parse browse_ajax page for more videos to load
|
|
|
|
load_more_href = match1(video_page, r'data-uix-load-more-href="([^"]+)"')
|
|
|
|
while load_more_href:
|
|
|
|
browse_ajax = get_content('https://www.youtube.com/%s' % load_more_href)
|
|
|
|
browse_data = json.loads(browse_ajax)
|
|
|
|
load_more_widget_html = browse_data['load_more_widget_html']
|
|
|
|
content_html = browse_data['content_html']
|
|
|
|
vs = set(re.findall(r'href="(/watch\?[^"]+)"', content_html))
|
|
|
|
videos += sorted([HTMLParser().unescape(video)
|
|
|
|
for video in list(vs)
|
|
|
|
if parse_query_param(video, 'index')])
|
|
|
|
load_more_href = match1(load_more_widget_html, r'data-uix-load-more-href="([^"]+)"')
|
|
|
|
|
2014-07-17 11:01:44 +04:00
|
|
|
self.title = re.search(r'<meta name="title" content="([^"]+)"', video_page).group(1)
|
|
|
|
self.p_playlist()
|
|
|
|
for video in videos:
|
|
|
|
vid = parse_query_param(video, 'v')
|
|
|
|
index = parse_query_param(video, 'index')
|
2018-11-30 19:36:59 +03:00
|
|
|
try:
|
|
|
|
self.__class__().download_by_url(self.__class__.get_url_from_vid(vid), index=index, **kwargs)
|
|
|
|
except:
|
|
|
|
pass
|
2014-07-17 11:01:44 +04:00
|
|
|
|
|
|
|
def prepare(self, **kwargs):
|
|
|
|
assert self.url or self.vid
|
|
|
|
|
|
|
|
if not self.vid and self.url:
|
2014-09-21 00:57:26 +04:00
|
|
|
self.vid = self.__class__.get_vid_from_url(self.url)
|
2014-07-17 11:01:44 +04:00
|
|
|
|
|
|
|
if self.vid is None:
|
|
|
|
self.download_playlist_by_url(self.url, **kwargs)
|
|
|
|
exit(0)
|
|
|
|
|
2019-08-03 11:31:49 +03:00
|
|
|
if re.search('\Wlist=', self.url) and not kwargs.get('playlist'):
|
|
|
|
log.w('This video is from a playlist. (use --playlist to download all videos in the playlist.)')
|
|
|
|
|
2019-04-21 01:47:58 +03:00
|
|
|
# Get video info
|
|
|
|
# 'eurl' is a magic parameter that can bypass age restriction
|
|
|
|
# full form: 'eurl=https%3A%2F%2Fyoutube.googleapis.com%2Fv%2F{VIDEO_ID}'
|
|
|
|
video_info = parse.parse_qs(get_content('https://www.youtube.com/get_video_info?video_id={}&eurl=https%3A%2F%2Fy'.format(self.vid)))
|
|
|
|
logging.debug('STATUS: %s' % video_info['status'][0])
|
2014-07-17 11:01:44 +04:00
|
|
|
|
2017-07-03 03:02:36 +03:00
|
|
|
ytplayer_config = None
|
2014-07-17 11:01:44 +04:00
|
|
|
if 'status' not in video_info:
|
2018-11-30 19:36:59 +03:00
|
|
|
log.wtf('[Failed] Unknown status.', exit_code=None)
|
|
|
|
raise
|
2014-07-17 11:01:44 +04:00
|
|
|
elif video_info['status'] == ['ok']:
|
|
|
|
if 'use_cipher_signature' not in video_info or video_info['use_cipher_signature'] == ['False']:
|
2019-07-30 10:09:55 +03:00
|
|
|
self.title = parse.unquote_plus(json.loads(video_info["player_response"][0])["videoDetails"]["title"])
|
2015-10-17 23:50:54 +03:00
|
|
|
# Parse video page (for DASH)
|
|
|
|
video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid)
|
2016-03-05 21:43:17 +03:00
|
|
|
try:
|
|
|
|
ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+?});', video_page).group(1))
|
2017-02-01 15:07:59 +03:00
|
|
|
self.html5player = 'https://www.youtube.com' + ytplayer_config['assets']['js']
|
2016-11-09 19:13:02 +03:00
|
|
|
# Workaround: get_video_info returns bad s. Why?
|
|
|
|
stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
|
2019-06-11 16:56:46 +03:00
|
|
|
#stream_list = ytplayer_config['args']['adaptive_fmts'].split(',')
|
2016-03-05 21:43:17 +03:00
|
|
|
except:
|
2017-02-06 12:04:52 +03:00
|
|
|
stream_list = video_info['url_encoded_fmt_stream_map'][0].split(',')
|
2019-04-21 01:47:58 +03:00
|
|
|
if re.search('([^"]*/base\.js)"', video_page):
|
|
|
|
self.html5player = 'https://www.youtube.com' + re.search('([^"]*/base\.js)"', video_page).group(1)
|
|
|
|
else:
|
|
|
|
self.html5player = None
|
2015-10-17 23:50:54 +03:00
|
|
|
|
2014-07-17 11:01:44 +04:00
|
|
|
else:
|
|
|
|
# Parse video page instead
|
2015-07-24 12:43:01 +03:00
|
|
|
video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid)
|
2014-08-20 07:46:03 +04:00
|
|
|
ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+?});', video_page).group(1))
|
2014-07-17 11:01:44 +04:00
|
|
|
|
2019-07-30 10:09:55 +03:00
|
|
|
self.title = json.loads(ytplayer_config["args"]["player_response"])["videoDetails"]["title"]
|
2017-02-01 15:07:59 +03:00
|
|
|
self.html5player = 'https://www.youtube.com' + ytplayer_config['assets']['js']
|
2014-07-17 11:01:44 +04:00
|
|
|
stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
|
|
|
|
|
|
|
|
elif video_info['status'] == ['fail']:
|
2019-04-21 01:47:58 +03:00
|
|
|
logging.debug('ERRORCODE: %s' % video_info['errorcode'][0])
|
2014-07-17 11:01:44 +04:00
|
|
|
if video_info['errorcode'] == ['150']:
|
2019-04-21 01:47:58 +03:00
|
|
|
# FIXME: still relevant?
|
2019-03-03 19:40:54 +03:00
|
|
|
if cookies:
|
|
|
|
# Load necessary cookies into headers (for age-restricted videos)
|
|
|
|
consent, ssid, hsid, sid = 'YES', '', '', ''
|
|
|
|
for cookie in cookies:
|
|
|
|
if cookie.domain.endswith('.youtube.com'):
|
|
|
|
if cookie.name == 'SSID':
|
|
|
|
ssid = cookie.value
|
|
|
|
elif cookie.name == 'HSID':
|
|
|
|
hsid = cookie.value
|
|
|
|
elif cookie.name == 'SID':
|
|
|
|
sid = cookie.value
|
|
|
|
cookie_str = 'CONSENT=%s; SSID=%s; HSID=%s; SID=%s' % (consent, ssid, hsid, sid)
|
|
|
|
|
|
|
|
video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid,
|
|
|
|
headers={'Cookie': cookie_str})
|
|
|
|
else:
|
|
|
|
video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid)
|
|
|
|
|
2015-10-22 16:41:49 +03:00
|
|
|
try:
|
|
|
|
ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+});ytplayer', video_page).group(1))
|
|
|
|
except:
|
|
|
|
msg = re.search('class="message">([^<]+)<', video_page).group(1)
|
2019-03-03 19:58:55 +03:00
|
|
|
log.wtf('[Failed] Got message "%s". Try to login with --cookies.' % msg.strip())
|
2014-07-17 11:01:44 +04:00
|
|
|
|
|
|
|
if 'title' in ytplayer_config['args']:
|
|
|
|
# 150 Restricted from playback on certain sites
|
|
|
|
# Parse video page instead
|
|
|
|
self.title = ytplayer_config['args']['title']
|
2017-02-01 15:07:59 +03:00
|
|
|
self.html5player = 'https://www.youtube.com' + ytplayer_config['assets']['js']
|
2014-07-17 11:01:44 +04:00
|
|
|
stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
|
|
|
|
else:
|
2018-11-30 19:36:59 +03:00
|
|
|
log.wtf('[Error] The uploader has not made this video available in your country.', exit_code=None)
|
|
|
|
raise
|
2014-07-17 11:01:44 +04:00
|
|
|
#self.title = re.search('<meta name="title" content="([^"]+)"', video_page).group(1)
|
|
|
|
#stream_list = []
|
|
|
|
|
|
|
|
elif video_info['errorcode'] == ['100']:
|
2018-11-30 19:36:59 +03:00
|
|
|
log.wtf('[Failed] This video does not exist.', exit_code=None) #int(video_info['errorcode'][0])
|
|
|
|
raise
|
2014-07-17 11:01:44 +04:00
|
|
|
|
|
|
|
else:
|
2018-11-30 19:36:59 +03:00
|
|
|
log.wtf('[Failed] %s' % video_info['reason'][0], exit_code=None) #int(video_info['errorcode'][0])
|
|
|
|
raise
|
2014-07-17 11:01:44 +04:00
|
|
|
|
|
|
|
else:
|
2018-11-30 19:36:59 +03:00
|
|
|
log.wtf('[Failed] Invalid status.', exit_code=None)
|
|
|
|
raise
|
2014-07-17 11:01:44 +04:00
|
|
|
|
2017-02-06 12:04:52 +03:00
|
|
|
# YouTube Live
|
2017-07-03 02:49:23 +03:00
|
|
|
if ytplayer_config and (ytplayer_config['args'].get('livestream') == '1' or ytplayer_config['args'].get('live_playback') == '1'):
|
2018-12-24 20:21:28 +03:00
|
|
|
if 'hlsvp' in ytplayer_config['args']:
|
|
|
|
hlsvp = ytplayer_config['args']['hlsvp']
|
|
|
|
else:
|
|
|
|
player_response= json.loads(ytplayer_config['args']['player_response'])
|
|
|
|
log.e('[Failed] %s' % player_response['playabilityStatus']['reason'], exit_code=1)
|
2017-02-06 12:04:52 +03:00
|
|
|
|
|
|
|
if 'info_only' in kwargs and kwargs['info_only']:
|
|
|
|
return
|
|
|
|
else:
|
|
|
|
download_url_ffmpeg(hlsvp, self.title, 'mp4')
|
|
|
|
exit(0)
|
|
|
|
|
2014-07-17 11:01:44 +04:00
|
|
|
for stream in stream_list:
|
|
|
|
metadata = parse.parse_qs(stream)
|
|
|
|
stream_itag = metadata['itag'][0]
|
|
|
|
self.streams[stream_itag] = {
|
|
|
|
'itag': metadata['itag'][0],
|
|
|
|
'url': metadata['url'][0],
|
|
|
|
'sig': metadata['sig'][0] if 'sig' in metadata else None,
|
|
|
|
's': metadata['s'][0] if 's' in metadata else None,
|
2019-06-11 16:56:46 +03:00
|
|
|
'quality': metadata['quality'][0] if 'quality' in metadata else None,
|
|
|
|
#'quality': metadata['quality_label'][0] if 'quality_label' in metadata else None,
|
2014-07-17 11:01:44 +04:00
|
|
|
'type': metadata['type'][0],
|
|
|
|
'mime': metadata['type'][0].split(';')[0],
|
|
|
|
'container': mime_to_container(metadata['type'][0].split(';')[0]),
|
|
|
|
}
|
|
|
|
|
2015-10-18 03:03:16 +03:00
|
|
|
# Prepare caption tracks
|
|
|
|
try:
|
2017-07-08 02:35:16 +03:00
|
|
|
caption_tracks = json.loads(ytplayer_config['args']['player_response'])['captions']['playerCaptionsTracklistRenderer']['captionTracks']
|
2015-10-18 03:03:16 +03:00
|
|
|
for ct in caption_tracks:
|
2017-07-08 02:35:16 +03:00
|
|
|
ttsurl, lang = ct['baseUrl'], ct['languageCode']
|
|
|
|
|
2015-10-18 03:03:16 +03:00
|
|
|
tts_xml = parseString(get_content(ttsurl))
|
|
|
|
transcript = tts_xml.getElementsByTagName('transcript')[0]
|
|
|
|
texts = transcript.getElementsByTagName('text')
|
|
|
|
srt = ""; seq = 0
|
|
|
|
for text in texts:
|
2016-02-05 00:28:25 +03:00
|
|
|
if text.firstChild is None: continue # empty element
|
2015-10-18 03:03:16 +03:00
|
|
|
seq += 1
|
|
|
|
start = float(text.getAttribute('start'))
|
|
|
|
if text.getAttribute('dur'):
|
|
|
|
dur = float(text.getAttribute('dur'))
|
|
|
|
else: dur = 1.0 # could be ill-formed XML
|
|
|
|
finish = start + dur
|
|
|
|
m, s = divmod(start, 60); h, m = divmod(m, 60)
|
|
|
|
start = '{:0>2}:{:0>2}:{:06.3f}'.format(int(h), int(m), s).replace('.', ',')
|
|
|
|
m, s = divmod(finish, 60); h, m = divmod(m, 60)
|
|
|
|
finish = '{:0>2}:{:0>2}:{:06.3f}'.format(int(h), int(m), s).replace('.', ',')
|
2016-10-19 21:47:17 +03:00
|
|
|
content = unescape_html(text.firstChild.nodeValue)
|
2015-10-18 03:03:16 +03:00
|
|
|
|
|
|
|
srt += '%s\n' % str(seq)
|
|
|
|
srt += '%s --> %s\n' % (start, finish)
|
|
|
|
srt += '%s\n\n' % content
|
|
|
|
|
|
|
|
self.caption_tracks[lang] = srt
|
|
|
|
except: pass
|
|
|
|
|
2015-10-17 23:03:18 +03:00
|
|
|
# Prepare DASH streams
|
|
|
|
try:
|
|
|
|
dashmpd = ytplayer_config['args']['dashmpd']
|
|
|
|
dash_xml = parseString(get_content(dashmpd))
|
|
|
|
for aset in dash_xml.getElementsByTagName('AdaptationSet'):
|
|
|
|
mimeType = aset.getAttribute('mimeType')
|
|
|
|
if mimeType == 'audio/mp4':
|
|
|
|
rep = aset.getElementsByTagName('Representation')[-1]
|
|
|
|
burls = rep.getElementsByTagName('BaseURL')
|
|
|
|
dash_mp4_a_url = burls[0].firstChild.nodeValue
|
|
|
|
dash_mp4_a_size = burls[0].getAttribute('yt:contentLength')
|
2016-11-26 14:35:50 +03:00
|
|
|
if not dash_mp4_a_size:
|
|
|
|
try: dash_mp4_a_size = url_size(dash_mp4_a_url)
|
|
|
|
except: continue
|
2015-10-17 23:03:18 +03:00
|
|
|
elif mimeType == 'audio/webm':
|
|
|
|
rep = aset.getElementsByTagName('Representation')[-1]
|
|
|
|
burls = rep.getElementsByTagName('BaseURL')
|
|
|
|
dash_webm_a_url = burls[0].firstChild.nodeValue
|
|
|
|
dash_webm_a_size = burls[0].getAttribute('yt:contentLength')
|
2016-11-26 14:35:50 +03:00
|
|
|
if not dash_webm_a_size:
|
|
|
|
try: dash_webm_a_size = url_size(dash_webm_a_url)
|
|
|
|
except: continue
|
2015-10-17 23:03:18 +03:00
|
|
|
elif mimeType == 'video/mp4':
|
|
|
|
for rep in aset.getElementsByTagName('Representation'):
|
2015-10-17 23:50:54 +03:00
|
|
|
w = int(rep.getAttribute('width'))
|
2015-10-17 23:03:18 +03:00
|
|
|
h = int(rep.getAttribute('height'))
|
2015-11-06 07:57:56 +03:00
|
|
|
itag = rep.getAttribute('id')
|
|
|
|
burls = rep.getElementsByTagName('BaseURL')
|
|
|
|
dash_url = burls[0].firstChild.nodeValue
|
|
|
|
dash_size = burls[0].getAttribute('yt:contentLength')
|
2016-11-26 14:35:50 +03:00
|
|
|
if not dash_size:
|
|
|
|
try: dash_size = url_size(dash_url)
|
|
|
|
except: continue
|
2018-09-11 18:31:47 +03:00
|
|
|
dash_urls = self.__class__.chunk_by_range(dash_url, int(dash_size))
|
|
|
|
dash_mp4_a_urls = self.__class__.chunk_by_range(dash_mp4_a_url, int(dash_mp4_a_size))
|
2015-11-06 07:57:56 +03:00
|
|
|
self.dash_streams[itag] = {
|
|
|
|
'quality': '%sx%s' % (w, h),
|
|
|
|
'itag': itag,
|
|
|
|
'type': mimeType,
|
|
|
|
'mime': mimeType,
|
|
|
|
'container': 'mp4',
|
2018-09-11 18:31:47 +03:00
|
|
|
'src': [dash_urls, dash_mp4_a_urls],
|
2015-11-06 07:57:56 +03:00
|
|
|
'size': int(dash_size) + int(dash_mp4_a_size)
|
|
|
|
}
|
2015-10-17 23:03:18 +03:00
|
|
|
elif mimeType == 'video/webm':
|
|
|
|
for rep in aset.getElementsByTagName('Representation'):
|
2015-10-17 23:50:54 +03:00
|
|
|
w = int(rep.getAttribute('width'))
|
2015-10-17 23:03:18 +03:00
|
|
|
h = int(rep.getAttribute('height'))
|
2015-11-06 07:57:56 +03:00
|
|
|
itag = rep.getAttribute('id')
|
|
|
|
burls = rep.getElementsByTagName('BaseURL')
|
|
|
|
dash_url = burls[0].firstChild.nodeValue
|
|
|
|
dash_size = burls[0].getAttribute('yt:contentLength')
|
2016-11-26 14:35:50 +03:00
|
|
|
if not dash_size:
|
|
|
|
try: dash_size = url_size(dash_url)
|
|
|
|
except: continue
|
2018-09-11 18:31:47 +03:00
|
|
|
dash_urls = self.__class__.chunk_by_range(dash_url, int(dash_size))
|
|
|
|
dash_webm_a_urls = self.__class__.chunk_by_range(dash_webm_a_url, int(dash_webm_a_size))
|
2015-11-06 07:57:56 +03:00
|
|
|
self.dash_streams[itag] = {
|
|
|
|
'quality': '%sx%s' % (w, h),
|
|
|
|
'itag': itag,
|
|
|
|
'type': mimeType,
|
|
|
|
'mime': mimeType,
|
|
|
|
'container': 'webm',
|
2018-09-11 18:31:47 +03:00
|
|
|
'src': [dash_urls, dash_webm_a_urls],
|
2015-11-06 07:57:56 +03:00
|
|
|
'size': int(dash_size) + int(dash_webm_a_size)
|
|
|
|
}
|
2015-10-18 22:48:50 +03:00
|
|
|
except:
|
|
|
|
# VEVO
|
2016-03-05 21:43:17 +03:00
|
|
|
if not self.html5player: return
|
2015-10-18 22:48:50 +03:00
|
|
|
self.js = get_content(self.html5player)
|
2019-04-21 01:47:58 +03:00
|
|
|
|
|
|
|
try:
|
|
|
|
# Video info from video page (not always available)
|
|
|
|
streams = [dict([(i.split('=')[0],
|
|
|
|
parse.unquote(i.split('=')[1]))
|
|
|
|
for i in afmt.split('&')])
|
2019-04-22 03:04:39 +03:00
|
|
|
for afmt in ytplayer_config['args']['adaptive_fmts'].split(',')]
|
2019-04-21 01:47:58 +03:00
|
|
|
except:
|
2015-10-18 22:48:50 +03:00
|
|
|
streams = [dict([(i.split('=')[0],
|
|
|
|
parse.unquote(i.split('=')[1]))
|
|
|
|
for i in afmt.split('&')])
|
2019-04-21 01:47:58 +03:00
|
|
|
for afmt in video_info['adaptive_fmts'][0].split(',')]
|
|
|
|
|
|
|
|
for stream in streams: # get over speed limiting
|
|
|
|
stream['url'] += '&ratebypass=yes'
|
|
|
|
for stream in streams: # audio
|
|
|
|
if stream['type'].startswith('audio/mp4'):
|
|
|
|
dash_mp4_a_url = stream['url']
|
|
|
|
if 's' in stream:
|
|
|
|
sig = self.__class__.decipher(self.js, stream['s'])
|
2019-06-11 16:56:46 +03:00
|
|
|
dash_mp4_a_url += '&sig={}'.format(sig)
|
2019-04-21 01:47:58 +03:00
|
|
|
dash_mp4_a_size = stream['clen']
|
|
|
|
elif stream['type'].startswith('audio/webm'):
|
|
|
|
dash_webm_a_url = stream['url']
|
|
|
|
if 's' in stream:
|
|
|
|
sig = self.__class__.decipher(self.js, stream['s'])
|
2019-06-11 16:56:46 +03:00
|
|
|
dash_webm_a_url += '&sig={}'.format(sig)
|
2019-04-21 01:47:58 +03:00
|
|
|
dash_webm_a_size = stream['clen']
|
|
|
|
for stream in streams: # video
|
|
|
|
if 'size' in stream:
|
|
|
|
if stream['type'].startswith('video/mp4'):
|
|
|
|
mimeType = 'video/mp4'
|
|
|
|
dash_url = stream['url']
|
2015-10-21 21:53:14 +03:00
|
|
|
if 's' in stream:
|
|
|
|
sig = self.__class__.decipher(self.js, stream['s'])
|
2019-06-11 16:56:46 +03:00
|
|
|
dash_url += '&sig={}'.format(sig)
|
2019-04-21 01:47:58 +03:00
|
|
|
dash_size = stream['clen']
|
|
|
|
itag = stream['itag']
|
|
|
|
dash_urls = self.__class__.chunk_by_range(dash_url, int(dash_size))
|
|
|
|
dash_mp4_a_urls = self.__class__.chunk_by_range(dash_mp4_a_url, int(dash_mp4_a_size))
|
|
|
|
self.dash_streams[itag] = {
|
|
|
|
'quality': '%s (%s)' % (stream['size'], stream['quality_label']),
|
|
|
|
'itag': itag,
|
|
|
|
'type': mimeType,
|
|
|
|
'mime': mimeType,
|
|
|
|
'container': 'mp4',
|
|
|
|
'src': [dash_urls, dash_mp4_a_urls],
|
|
|
|
'size': int(dash_size) + int(dash_mp4_a_size)
|
|
|
|
}
|
|
|
|
elif stream['type'].startswith('video/webm'):
|
|
|
|
mimeType = 'video/webm'
|
|
|
|
dash_url = stream['url']
|
2015-10-21 21:53:14 +03:00
|
|
|
if 's' in stream:
|
|
|
|
sig = self.__class__.decipher(self.js, stream['s'])
|
2019-06-11 16:56:46 +03:00
|
|
|
dash_url += '&sig={}'.format(sig)
|
2019-04-21 01:47:58 +03:00
|
|
|
dash_size = stream['clen']
|
|
|
|
itag = stream['itag']
|
|
|
|
audio_url = None
|
|
|
|
audio_size = None
|
|
|
|
try:
|
|
|
|
audio_url = dash_webm_a_url
|
|
|
|
audio_size = int(dash_webm_a_size)
|
|
|
|
except UnboundLocalError as e:
|
|
|
|
audio_url = dash_mp4_a_url
|
|
|
|
audio_size = int(dash_mp4_a_size)
|
|
|
|
dash_urls = self.__class__.chunk_by_range(dash_url, int(dash_size))
|
|
|
|
audio_urls = self.__class__.chunk_by_range(audio_url, int(audio_size))
|
|
|
|
self.dash_streams[itag] = {
|
|
|
|
'quality': '%s (%s)' % (stream['size'], stream['quality_label']),
|
|
|
|
'itag': itag,
|
|
|
|
'type': mimeType,
|
|
|
|
'mime': mimeType,
|
|
|
|
'container': 'webm',
|
|
|
|
'src': [dash_urls, audio_urls],
|
|
|
|
'size': int(dash_size) + int(audio_size)
|
|
|
|
}
|
2015-10-17 23:03:18 +03:00
|
|
|
|
2014-07-17 11:01:44 +04:00
|
|
|
def extract(self, **kwargs):
|
|
|
|
if not self.streams_sorted:
|
|
|
|
# No stream is available
|
|
|
|
return
|
|
|
|
|
|
|
|
if 'stream_id' in kwargs and kwargs['stream_id']:
|
|
|
|
# Extract the stream
|
|
|
|
stream_id = kwargs['stream_id']
|
2015-10-17 23:03:18 +03:00
|
|
|
if stream_id not in self.streams and stream_id not in self.dash_streams:
|
2014-07-17 11:01:44 +04:00
|
|
|
log.e('[Error] Invalid video format.')
|
2014-07-30 05:48:26 +04:00
|
|
|
log.e('Run \'-i\' command with no specific video format to view all available formats.')
|
2014-07-17 11:01:44 +04:00
|
|
|
exit(2)
|
|
|
|
else:
|
|
|
|
# Extract stream with the best quality
|
|
|
|
stream_id = self.streams_sorted[0]['itag']
|
|
|
|
|
2015-10-17 23:03:18 +03:00
|
|
|
if stream_id in self.streams:
|
|
|
|
src = self.streams[stream_id]['url']
|
2015-11-08 03:23:57 +03:00
|
|
|
if self.streams[stream_id]['sig'] is not None:
|
2015-10-17 23:03:18 +03:00
|
|
|
sig = self.streams[stream_id]['sig']
|
2019-06-11 16:56:46 +03:00
|
|
|
src += '&sig={}'.format(sig)
|
2015-11-08 03:23:57 +03:00
|
|
|
elif self.streams[stream_id]['s'] is not None:
|
2015-10-18 23:23:52 +03:00
|
|
|
if not hasattr(self, 'js'):
|
|
|
|
self.js = get_content(self.html5player)
|
2015-10-17 23:03:18 +03:00
|
|
|
s = self.streams[stream_id]['s']
|
2015-10-18 22:48:50 +03:00
|
|
|
sig = self.__class__.decipher(self.js, s)
|
2019-06-11 16:56:46 +03:00
|
|
|
src += '&sig={}'.format(sig)
|
2015-10-17 23:03:18 +03:00
|
|
|
|
|
|
|
self.streams[stream_id]['src'] = [src]
|
|
|
|
self.streams[stream_id]['size'] = urls_size(self.streams[stream_id]['src'])
|
2012-08-20 19:54:03 +04:00
|
|
|
|
2014-07-17 11:01:44 +04:00
|
|
|
site = YouTube()
|
|
|
|
download = site.download_by_url
|
|
|
|
download_playlist = site.download_playlist_by_url
|