#!/usr/bin/env python from ..common import * from ..extractor import VideoExtractor from xml.dom.minidom import parseString class YouTube(VideoExtractor): name = "YouTube" # YouTube media encoding options, in descending quality order. # http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs. Retrieved July 17, 2014. stream_types = [ {'itag': '38', 'container': 'MP4', 'video_resolution': '3072p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3.5-5', 'audio_encoding': 'AAC', 'audio_bitrate': '192'}, #{'itag': '85', 'container': 'MP4', 'video_resolution': '1080p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '3-4', 'audio_encoding': 'AAC', 'audio_bitrate': '192'}, {'itag': '46', 'container': 'WebM', 'video_resolution': '1080p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'}, {'itag': '37', 'container': 'MP4', 'video_resolution': '1080p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3-4.3', 'audio_encoding': 'AAC', 'audio_bitrate': '192'}, #{'itag': '102', 'container': 'WebM', 'video_resolution': '720p', 'video_encoding': 'VP8', 'video_profile': '3D', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'}, {'itag': '45', 'container': 'WebM', 'video_resolution': '720p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '2', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'}, #{'itag': '84', 'container': 'MP4', 'video_resolution': '720p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '2-3', 'audio_encoding': 'AAC', 'audio_bitrate': '192'}, {'itag': '22', 'container': 'MP4', 'video_resolution': '720p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '2-3', 'audio_encoding': 'AAC', 'audio_bitrate': '192'}, {'itag': '120', 'container': 'FLV', 'video_resolution': '720p', 'video_encoding': 'H.264', 'video_profile': 'Main@L3.1', 'video_bitrate': '2', 'audio_encoding': 'AAC', 'audio_bitrate': '128'}, # Live streaming only {'itag': '44', 'container': 'WebM', 'video_resolution': '480p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '1', 'audio_encoding': 'Vorbis', 'audio_bitrate': '128'}, {'itag': '35', 'container': 'FLV', 'video_resolution': '480p', 'video_encoding': 'H.264', 'video_profile': 'Main', 'video_bitrate': '0.8-1', 'audio_encoding': 'AAC', 'audio_bitrate': '128'}, #{'itag': '101', 'container': 'WebM', 'video_resolution': '360p', 'video_encoding': 'VP8', 'video_profile': '3D', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'}, #{'itag': '100', 'container': 'WebM', 'video_resolution': '360p', 'video_encoding': 'VP8', 'video_profile': '3D', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '128'}, {'itag': '43', 'container': 'WebM', 'video_resolution': '360p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '0.5', 'audio_encoding': 'Vorbis', 'audio_bitrate': '128'}, {'itag': '34', 'container': 'FLV', 'video_resolution': '360p', 'video_encoding': 'H.264', 'video_profile': 'Main', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '128'}, #{'itag': '82', 'container': 'MP4', 'video_resolution': '360p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '96'}, {'itag': '18', 'container': 'MP4', 'video_resolution': '270p/360p', 'video_encoding': 'H.264', 'video_profile': 'Baseline', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '96'}, {'itag': '6', 'container': 'FLV', 'video_resolution': '270p', 'video_encoding': 'Sorenson H.263', 'video_profile': '', 'video_bitrate': '0.8', 'audio_encoding': 'MP3', 'audio_bitrate': '64'}, #{'itag': '83', 'container': 'MP4', 'video_resolution': '240p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '96'}, {'itag': '13', 'container': '3GP', 'video_resolution': '', 'video_encoding': 'MPEG-4 Visual', 'video_profile': '', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': ''}, {'itag': '5', 'container': 'FLV', 'video_resolution': '240p', 'video_encoding': 'Sorenson H.263', 'video_profile': '', 'video_bitrate': '0.25', 'audio_encoding': 'MP3', 'audio_bitrate': '64'}, {'itag': '36', 'container': '3GP', 'video_resolution': '240p', 'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.175', 'audio_encoding': 'AAC', 'audio_bitrate': '36'}, {'itag': '17', 'container': '3GP', 'video_resolution': '144p', 'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.05', 'audio_encoding': 'AAC', 'audio_bitrate': '24'}, ] def decipher(js, s): def tr_js(code): code = re.sub(r'function', r'def', code) code = re.sub(r'\$', '_dollar', code) code = re.sub(r'\{', r':\n\t', code) code = re.sub(r'\}', r'\n', code) code = re.sub(r'var\s+', r'', code) code = re.sub(r'(\w+).join\(""\)', r'"".join(\1)', code) code = re.sub(r'(\w+).length', r'len(\1)', code) code = re.sub(r'(\w+).slice\((\w+)\)', r'\1[\2:]', code) code = re.sub(r'(\w+).splice\((\w+),(\w+)\)', r'del \1[\2:\2+\3]', code) code = re.sub(r'(\w+).split\(""\)', r'list(\1)', code) return code f1 = match1(js, r'\w+\.sig\|\|([$\w]+)\(\w+\.\w+\)') f1def = match1(js, r'(function %s\(\w+\)\{[^\{]+\})' % re.escape(f1)) f1def = re.sub(r'([$\w]+\.)([$\w]+\(\w+,\d+\))', r'\2', f1def) code = tr_js(f1def) f2s = set(re.findall(r'([$\w]+)\(\w+,\d+\)', f1def)) for f2 in f2s: f2e = re.escape(f2) f2def = re.search(r'[^$\w]%s:function\((\w+,\w+)\)(\{[^\{\}]+\})' % f2e, js) if f2def: f2def = 'function {}({}){}'.format(f2e, f2def.group(1), f2def.group(2)) else: f2def = re.search(r'[^$\w]%s:function\((\w+)\)(\{[^\{\}]+\})' % f2e, js) f2def = 'function {}({},b){}'.format(f2e, f2def.group(1), f2def.group(2)) f2 = re.sub(r'\$', '_dollar', f2) code = code + 'global %s\n' % f2 + tr_js(f2def) code = code + 'sig=%s(s)' % re.sub(r'\$', '_dollar', f1) exec(code, globals(), locals()) return locals()['sig'] def get_url_from_vid(vid): return 'https://youtu.be/{}'.format(vid) def get_vid_from_url(url): """Extracts video ID from URL. """ return match1(url, r'youtu\.be/([^/]+)') or \ match1(url, r'youtube\.com/embed/([^/?]+)') or \ match1(url, r'youtube\.com/v/([^/?]+)') or \ parse_query_param(url, 'v') or \ parse_query_param(parse_query_param(url, 'u'), 'v') def get_playlist_id_from_url(url): """Extracts playlist ID from URL. """ return parse_query_param(url, 'list') or \ parse_query_param(url, 'p') def download_playlist_by_url(self, url, **kwargs): self.url = url playlist_id = self.__class__.get_playlist_id_from_url(self.url) if playlist_id is None: log.wtf('[Failed] Unsupported URL pattern.') video_page = get_content('https://www.youtube.com/playlist?list=%s' % playlist_id) from html.parser import HTMLParser videos = sorted([HTMLParser().unescape(video) for video in re.findall(r'([^<]+)<', video_page).group(1) log.wtf('[Failed] "%s"' % msg.strip()) if 'title' in ytplayer_config['args']: # 150 Restricted from playback on certain sites # Parse video page instead self.title = ytplayer_config['args']['title'] self.html5player = 'https:' + ytplayer_config['assets']['js'] stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',') else: log.wtf('[Error] The uploader has not made this video available in your country.') #self.title = re.search(' 1280: itag = rep.getAttribute('id') burls = rep.getElementsByTagName('BaseURL') dash_url = burls[0].firstChild.nodeValue dash_size = burls[0].getAttribute('yt:contentLength') self.dash_streams[itag] = { 'quality': '%sx%s' % (w, h), 'itag': itag, 'type': mimeType, 'mime': mimeType, 'container': 'mp4', 'src': [dash_url, dash_mp4_a_url], 'size': int(dash_size) + int(dash_mp4_a_size) } elif mimeType == 'video/webm': for rep in aset.getElementsByTagName('Representation'): w = int(rep.getAttribute('width')) h = int(rep.getAttribute('height')) if w > 1280: itag = rep.getAttribute('id') burls = rep.getElementsByTagName('BaseURL') dash_url = burls[0].firstChild.nodeValue dash_size = burls[0].getAttribute('yt:contentLength') self.dash_streams[itag] = { 'quality': '%sx%s' % (w, h), 'itag': itag, 'type': mimeType, 'mime': mimeType, 'container': 'webm', 'src': [dash_url, dash_webm_a_url], 'size': int(dash_size) + int(dash_webm_a_size) } except: # VEVO self.js = get_content(self.html5player) if 'adaptive_fmts' in ytplayer_config['args']: streams = [dict([(i.split('=')[0], parse.unquote(i.split('=')[1])) for i in afmt.split('&')]) for afmt in ytplayer_config['args']['adaptive_fmts'].split(',')] for stream in streams: # audio if stream['type'].startswith('audio/mp4'): dash_mp4_a_url = stream['url'] if 's' in stream: sig = self.__class__.decipher(self.js, stream['s']) dash_mp4_a_url += '&signature={}'.format(sig) dash_mp4_a_size = stream['clen'] elif stream['type'].startswith('audio/webm'): dash_webm_a_url = stream['url'] if 's' in stream: sig = self.__class__.decipher(self.js, stream['s']) dash_webm_a_url += '&signature={}'.format(sig) dash_webm_a_size = stream['clen'] for stream in streams: # video if 'size' in stream: w = int(r1(r'(\d+)x\d+', stream['size'])) if w > 1280 and stream['type'].startswith('video/mp4'): mimeType = 'video/mp4' dash_url = stream['url'] sig = self.__class__.decipher(self.js, stream['s']) dash_url += '&signature={}'.format(sig) dash_size = stream['clen'] itag = stream['itag'] self.dash_streams[itag] = { 'quality': stream['size'], 'itag': itag, 'type': mimeType, 'mime': mimeType, 'container': 'mp4', 'src': [dash_url, dash_mp4_a_url], 'size': int(dash_size) + int(dash_mp4_a_size) } elif w > 1280 and stream['type'].startswith('video/webm'): mimeType = 'video/webm' dash_url = stream['url'] sig = self.__class__.decipher(self.js, stream['s']) dash_url += '&signature={}'.format(sig) dash_size = stream['clen'] itag = stream['itag'] self.dash_streams[itag] = { 'quality': stream['size'], 'itag': itag, 'type': mimeType, 'mime': mimeType, 'container': 'webm', 'src': [dash_url, dash_webm_a_url], 'size': int(dash_size) + int(dash_webm_a_size) } def extract(self, **kwargs): if not self.streams_sorted: # No stream is available return if 'stream_id' in kwargs and kwargs['stream_id']: # Extract the stream stream_id = kwargs['stream_id'] if stream_id not in self.streams and stream_id not in self.dash_streams: log.e('[Error] Invalid video format.') log.e('Run \'-i\' command with no specific video format to view all available formats.') exit(2) else: # Extract stream with the best quality stream_id = self.streams_sorted[0]['itag'] if stream_id in self.streams: src = self.streams[stream_id]['url'] if self.streams[stream_id]['sig'] is not None: sig = self.streams[stream_id]['sig'] src += '&signature={}'.format(sig) elif self.streams[stream_id]['s'] is not None: if not hasattr(self, 'js'): self.js = get_content(self.html5player) s = self.streams[stream_id]['s'] sig = self.__class__.decipher(self.js, s) src += '&signature={}'.format(sig) self.streams[stream_id]['src'] = [src] self.streams[stream_id]['size'] = urls_size(self.streams[stream_id]['src']) site = YouTube() download = site.download_by_url download_playlist = site.download_playlist_by_url