2012-08-31 19:20:38 +04:00
|
|
|
#!/usr/bin/env python
|
2012-08-20 19:54:03 +04:00
|
|
|
|
|
|
|
__all__ = ['youtube_download', 'youtube_download_by_id']
|
|
|
|
|
2012-08-31 19:20:38 +04:00
|
|
|
from ..common import *
|
2012-08-20 19:54:03 +04:00
|
|
|
|
2013-03-22 07:24:01 +04:00
|
|
|
# YouTube media encoding options, in descending quality order.
|
2014-02-14 05:49:53 +04:00
|
|
|
# taken from http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs, 2/14/2014.
|
2013-07-11 12:48:13 +04:00
|
|
|
yt_codecs = [
|
2013-03-22 07:24:01 +04:00
|
|
|
{'itag': 38, 'container': 'MP4', 'video_resolution': '3072p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3.5-5', 'audio_encoding': 'AAC', 'audio_bitrate': '192'},
|
2014-02-19 07:46:02 +04:00
|
|
|
#{'itag': 85, 'container': 'MP4', 'video_resolution': '1080p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '3-4', 'audio_encoding': 'AAC', 'audio_bitrate': '192'},
|
2013-03-22 07:24:01 +04:00
|
|
|
{'itag': 46, 'container': 'WebM', 'video_resolution': '1080p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'},
|
|
|
|
{'itag': 37, 'container': 'MP4', 'video_resolution': '1080p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3-4.3', 'audio_encoding': 'AAC', 'audio_bitrate': '192'},
|
2014-02-19 07:46:02 +04:00
|
|
|
#{'itag': 102, 'container': 'WebM', 'video_resolution': '720p', 'video_encoding': 'VP8', 'video_profile': '3D', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'},
|
2014-02-14 05:49:53 +04:00
|
|
|
{'itag': 45, 'container': 'WebM', 'video_resolution': '720p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '2', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'},
|
2014-02-19 07:46:02 +04:00
|
|
|
#{'itag': 84, 'container': 'MP4', 'video_resolution': '720p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '2-3', 'audio_encoding': 'AAC', 'audio_bitrate': '192'},
|
2014-02-14 05:49:53 +04:00
|
|
|
{'itag': 22, 'container': 'MP4', 'video_resolution': '720p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '2-3', 'audio_encoding': 'AAC', 'audio_bitrate': '192'},
|
|
|
|
{'itag': 120, 'container': 'FLV', 'video_resolution': '720p', 'video_encoding': 'H.264', 'video_profile': 'Main@L3.1', 'video_bitrate': '2', 'audio_encoding': 'AAC', 'audio_bitrate': '128'},
|
2013-03-22 07:24:01 +04:00
|
|
|
{'itag': 44, 'container': 'WebM', 'video_resolution': '480p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '1', 'audio_encoding': 'Vorbis', 'audio_bitrate': '128'},
|
|
|
|
{'itag': 35, 'container': 'FLV', 'video_resolution': '480p', 'video_encoding': 'H.264', 'video_profile': 'Main', 'video_bitrate': '0.8-1', 'audio_encoding': 'AAC', 'audio_bitrate': '128'},
|
2014-02-19 07:46:02 +04:00
|
|
|
#{'itag': 101, 'container': 'WebM', 'video_resolution': '360p', 'video_encoding': 'VP8', 'video_profile': '3D', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'},
|
|
|
|
#{'itag': 100, 'container': 'WebM', 'video_resolution': '360p', 'video_encoding': 'VP8', 'video_profile': '3D', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '128'},
|
2013-03-22 07:24:01 +04:00
|
|
|
{'itag': 43, 'container': 'WebM', 'video_resolution': '360p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '0.5', 'audio_encoding': 'Vorbis', 'audio_bitrate': '128'},
|
|
|
|
{'itag': 34, 'container': 'FLV', 'video_resolution': '360p', 'video_encoding': 'H.264', 'video_profile': 'Main', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '128'},
|
2014-02-19 07:46:02 +04:00
|
|
|
#{'itag': 82, 'container': 'MP4', 'video_resolution': '360p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '96'},
|
2013-03-22 07:24:01 +04:00
|
|
|
{'itag': 18, 'container': 'MP4', 'video_resolution': '270p/360p', 'video_encoding': 'H.264', 'video_profile': 'Baseline', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '96'},
|
|
|
|
{'itag': 6, 'container': 'FLV', 'video_resolution': '270p', 'video_encoding': 'Sorenson H.263', 'video_profile': '', 'video_bitrate': '0.8', 'audio_encoding': 'MP3', 'audio_bitrate': '64'},
|
2014-02-19 07:46:02 +04:00
|
|
|
#{'itag': 83, 'container': 'MP4', 'video_resolution': '240p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '96'},
|
2013-03-22 07:24:01 +04:00
|
|
|
{'itag': 13, 'container': '3GP', 'video_resolution': '', 'video_encoding': 'MPEG-4 Visual', 'video_profile': '', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': ''},
|
|
|
|
{'itag': 5, 'container': 'FLV', 'video_resolution': '240p', 'video_encoding': 'Sorenson H.263', 'video_profile': '', 'video_bitrate': '0.25', 'audio_encoding': 'MP3', 'audio_bitrate': '64'},
|
2014-02-14 05:49:53 +04:00
|
|
|
{'itag': 36, 'container': '3GP', 'video_resolution': '240p', 'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.175', 'audio_encoding': 'AAC', 'audio_bitrate': '36'},
|
2013-03-22 07:24:01 +04:00
|
|
|
{'itag': 17, 'container': '3GP', 'video_resolution': '144p', 'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.05', 'audio_encoding': 'AAC', 'audio_bitrate': '24'},
|
|
|
|
]
|
|
|
|
|
2013-08-17 11:11:03 +04:00
|
|
|
def decipher(js, s):
|
|
|
|
def tr_js(code):
|
|
|
|
code = re.sub(r'function', r'def', code)
|
2014-06-20 02:40:33 +04:00
|
|
|
code = re.sub(r'\$', '_dollar', code)
|
2013-08-17 11:11:03 +04:00
|
|
|
code = re.sub(r'\{', r':\n\t', code)
|
|
|
|
code = re.sub(r'\}', r'\n', code)
|
|
|
|
code = re.sub(r'var\s+', r'', code)
|
|
|
|
code = re.sub(r'(\w+).join\(""\)', r'"".join(\1)', code)
|
|
|
|
code = re.sub(r'(\w+).length', r'len(\1)', code)
|
|
|
|
code = re.sub(r'(\w+).reverse\(\)', r'\1[::-1]', code)
|
|
|
|
code = re.sub(r'(\w+).slice\((\d+)\)', r'\1[\2:]', code)
|
|
|
|
code = re.sub(r'(\w+).split\(""\)', r'list(\1)', code)
|
|
|
|
return code
|
2014-02-06 08:03:51 +04:00
|
|
|
|
2014-06-20 02:40:33 +04:00
|
|
|
f1 = match1(js, r'\w+\.sig\|\|([$\w]+)\(\w+\.\w+\)')
|
|
|
|
f1def = match1(js, r'(function %s\(\w+\)\{[^\{]+\})' % re.escape(f1))
|
2013-08-17 11:11:03 +04:00
|
|
|
code = tr_js(f1def)
|
2014-02-06 08:03:51 +04:00
|
|
|
f2 = match1(f1def, r'([$\w]+)\(\w+,\d+\)')
|
2013-08-17 11:11:03 +04:00
|
|
|
if f2 is not None:
|
2014-02-06 08:03:51 +04:00
|
|
|
f2e = re.escape(f2)
|
|
|
|
f2def = match1(js, r'(function %s\(\w+,\w+\)\{[^\{]+\})' % f2e)
|
2014-06-20 02:40:33 +04:00
|
|
|
f2 = re.sub(r'\$', '_dollar', f2)
|
2013-08-17 11:11:03 +04:00
|
|
|
code = code + 'global %s\n' % f2 + tr_js(f2def)
|
2014-02-06 08:03:51 +04:00
|
|
|
|
2014-06-20 02:40:33 +04:00
|
|
|
code = code + 'sig=%s(s)' % re.sub(r'\$', '_dollar', f1)
|
2013-08-17 11:11:03 +04:00
|
|
|
exec(code, globals(), locals())
|
|
|
|
return locals()['sig']
|
2013-06-26 20:50:25 +04:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
def youtube_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False):
|
|
|
|
"""Downloads a YouTube video by its unique id.
|
|
|
|
"""
|
2014-02-06 08:03:51 +04:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
raw_video_info = get_content('http://www.youtube.com/get_video_info?video_id=%s' % id)
|
|
|
|
video_info = parse.parse_qs(raw_video_info)
|
2014-02-06 08:03:51 +04:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
if video_info['status'] == ['ok'] and ('use_cipher_signature' not in video_info or video_info['use_cipher_signature'] == ['False']):
|
|
|
|
title = parse.unquote_plus(video_info['title'][0])
|
|
|
|
stream_list = parse.parse_qs(raw_video_info)['url_encoded_fmt_stream_map'][0].split(',')
|
2014-02-06 08:03:51 +04:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
else:
|
|
|
|
# Parse video page when video_info is not usable.
|
|
|
|
video_page = get_content('http://www.youtube.com/watch?v=%s' % id)
|
2014-04-17 16:59:05 +04:00
|
|
|
ytplayer_config = json.loads(match1(video_page, r'ytplayer.config\s*=\s*([^\n]+});'))
|
2014-02-06 08:03:51 +04:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
title = ytplayer_config['args']['title']
|
|
|
|
stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
|
2014-02-06 08:03:51 +04:00
|
|
|
|
2013-08-17 11:11:03 +04:00
|
|
|
html5player = ytplayer_config['assets']['js']
|
2013-12-20 02:47:52 +04:00
|
|
|
if html5player[0:2] == '//':
|
|
|
|
html5player = 'http:' + html5player
|
2014-02-06 08:03:51 +04:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
streams = {
|
|
|
|
parse.parse_qs(stream)['itag'][0] : parse.parse_qs(stream)
|
|
|
|
for stream in stream_list
|
|
|
|
}
|
2014-02-06 08:03:51 +04:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
for codec in yt_codecs:
|
|
|
|
itag = str(codec['itag'])
|
|
|
|
if itag in streams:
|
|
|
|
download_stream = streams[itag]
|
|
|
|
break
|
2014-02-06 08:03:51 +04:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
url = download_stream['url'][0]
|
2014-02-28 20:21:43 +04:00
|
|
|
if 'sig' in download_stream:
|
|
|
|
sig = download_stream['sig'][0]
|
|
|
|
url = '%s&signature=%s' % (url, sig)
|
|
|
|
elif 's' in download_stream:
|
|
|
|
js = get_content(html5player)
|
|
|
|
sig = decipher(js, download_stream['s'][0])
|
|
|
|
url = '%s&signature=%s' % (url, sig)
|
2014-02-06 08:03:51 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
type, ext, size = url_info(url)
|
2014-02-06 08:03:51 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
print_info(site_info, title, type, size)
|
|
|
|
if not info_only:
|
|
|
|
download_urls([url], title, ext, size, output_dir, merge = merge)
|
|
|
|
|
2013-10-23 07:32:53 +04:00
|
|
|
def youtube_list_download_by_id(list_id, title=None, output_dir='.', merge=True, info_only=False):
|
|
|
|
"""Downloads a YouTube video list by its unique id.
|
|
|
|
"""
|
|
|
|
|
|
|
|
video_page = get_content('http://www.youtube.com/playlist?list=%s' % list_id)
|
2013-11-11 22:03:47 +04:00
|
|
|
ids = set(re.findall(r'<a href="\/watch\?v=([\w-]+)', video_page))
|
2013-10-23 07:32:53 +04:00
|
|
|
for id in ids:
|
|
|
|
youtube_download_by_id(id, title, output_dir, merge, info_only)
|
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
def youtube_download(url, output_dir='.', merge=True, info_only=False):
|
|
|
|
"""Downloads YouTube videos by URL.
|
|
|
|
"""
|
2014-02-06 08:03:51 +04:00
|
|
|
|
2013-10-18 16:49:29 +04:00
|
|
|
id = match1(url, r'youtu.be/([^/]+)') or \
|
2014-02-16 04:10:13 +04:00
|
|
|
match1(url, r'youtube.com/embed/([^/]+)') or \
|
2013-10-18 16:49:29 +04:00
|
|
|
parse_query_param(url, 'v') or \
|
|
|
|
parse_query_param(parse_query_param(url, 'u'), 'v')
|
2013-10-23 07:32:53 +04:00
|
|
|
if id is None:
|
2013-12-21 07:00:07 +04:00
|
|
|
list_id = parse_query_param(url, 'list') or \
|
|
|
|
parse_query_param(url, 'p')
|
2013-10-23 07:32:53 +04:00
|
|
|
assert id or list_id
|
2014-02-06 08:03:51 +04:00
|
|
|
|
2013-10-23 07:32:53 +04:00
|
|
|
if id:
|
|
|
|
youtube_download_by_id(id, title=None, output_dir=output_dir, merge=merge, info_only=info_only)
|
|
|
|
else:
|
|
|
|
youtube_list_download_by_id(list_id, title=None, output_dir=output_dir, merge=merge, info_only=info_only)
|
2012-08-20 19:54:03 +04:00
|
|
|
|
|
|
|
site_info = "YouTube.com"
|
|
|
|
download = youtube_download
|
|
|
|
download_playlist = playlist_not_supported('youtube')
|