YouTube: fix #135

This commit is contained in:
Mort Yao 2013-03-22 04:24:01 +01:00
parent 0929e4265e
commit df740c10e2

View File

@ -4,13 +4,95 @@ __all__ = ['youtube_download', 'youtube_download_by_id']
from ..common import * from ..common import *
import json # YouTube media encoding options, in descending quality order.
# taken from http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs, 3/22/2013.
youtube_codecs = [
{'itag': 38, 'container': 'MP4', 'video_resolution': '3072p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3.5-5', 'audio_encoding': 'AAC', 'audio_bitrate': '192'},
{'itag': 46, 'container': 'WebM', 'video_resolution': '1080p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'},
{'itag': 37, 'container': 'MP4', 'video_resolution': '1080p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3-4.3', 'audio_encoding': 'AAC', 'audio_bitrate': '192'},
{'itag': 102, 'container': '', 'video_resolution': '', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '2', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'},
{'itag': 45, 'container': 'WebM', 'video_resolution': '720p', 'video_encoding': '', 'video_profile': '', 'video_bitrate': '', 'audio_encoding': '', 'audio_bitrate': ''},
{'itag': 22, 'container': 'MP4', 'video_resolution': '720p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '2-2.9', 'audio_encoding': 'AAC', 'audio_bitrate': '192'},
{'itag': 84, 'container': 'MP4', 'video_resolution': '720p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '2-2.9', 'audio_encoding': 'AAC', 'audio_bitrate': '152'},
{'itag': 120, 'container': 'FLV', 'video_resolution': '720p', 'video_encoding': 'AVC', 'video_profile': 'Main@L3.1', 'video_bitrate': '2', 'audio_encoding': 'AAC', 'audio_bitrate': '128'},
{'itag': 85, 'container': 'MP4', 'video_resolution': '520p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '2-2.9', 'audio_encoding': 'AAC', 'audio_bitrate': '152'},
{'itag': 44, 'container': 'WebM', 'video_resolution': '480p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '1', 'audio_encoding': 'Vorbis', 'audio_bitrate': '128'},
{'itag': 35, 'container': 'FLV', 'video_resolution': '480p', 'video_encoding': 'H.264', 'video_profile': 'Main', 'video_bitrate': '0.8-1', 'audio_encoding': 'AAC', 'audio_bitrate': '128'},
{'itag': 101, 'container': 'WebM', 'video_resolution': '360p', 'video_encoding': 'VP8', 'video_profile': '3D', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'},
{'itag': 100, 'container': 'WebM', 'video_resolution': '360p', 'video_encoding': 'VP8', 'video_profile': '3D', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '128'},
{'itag': 43, 'container': 'WebM', 'video_resolution': '360p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '0.5', 'audio_encoding': 'Vorbis', 'audio_bitrate': '128'},
{'itag': 34, 'container': 'FLV', 'video_resolution': '360p', 'video_encoding': 'H.264', 'video_profile': 'Main', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '128'},
{'itag': 82, 'container': 'MP4', 'video_resolution': '360p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '96'},
{'itag': 18, 'container': 'MP4', 'video_resolution': '270p/360p', 'video_encoding': 'H.264', 'video_profile': 'Baseline', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '96'},
{'itag': 6, 'container': 'FLV', 'video_resolution': '270p', 'video_encoding': 'Sorenson H.263', 'video_profile': '', 'video_bitrate': '0.8', 'audio_encoding': 'MP3', 'audio_bitrate': '64'},
{'itag': 83, 'container': 'MP4', 'video_resolution': '240p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '96'},
{'itag': 13, 'container': '3GP', 'video_resolution': '', 'video_encoding': 'MPEG-4 Visual', 'video_profile': '', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': ''},
{'itag': 5, 'container': 'FLV', 'video_resolution': '240p', 'video_encoding': 'Sorenson H.263', 'video_profile': '', 'video_bitrate': '0.25', 'audio_encoding': 'MP3', 'audio_bitrate': '64'},
{'itag': 36, 'container': '3GP', 'video_resolution': '240p', 'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.17', 'audio_encoding': 'AAC', 'audio_bitrate': '38'},
{'itag': 17, 'container': '3GP', 'video_resolution': '144p', 'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.05', 'audio_encoding': 'AAC', 'audio_bitrate': '24'},
]
def parse_video_info(raw_info):
"""Parser for YouTube's get_video_info data.
Returns a map, with 'url_encoded_fmt_stream_map' field being a sorted list.
"""
# Percent-encoding reserved characters, used as separators.
separator = {
'&': '%26',
'=': '%3D',
}
# fmt_level = {'itag': level, ...}
# itag of a higher quality maps to a lower level number.
# The highest quality has level number 0.
fmt_level = dict(
zip(
[str(codec['itag'])
for codec in
youtube_codecs],
range(len(youtube_codecs))))
return dict(
[(lambda metadata:
['url_encoded_fmt_stream_map', (
lambda url_encoded_fmt_stream_map:
sorted(
[dict(
[sub_item.split(separator['='])
for sub_item in
item.split(separator['&'])])
for item in
url_encoded_fmt_stream_map.split('%2C')],
key =
lambda stream:
fmt_level[stream['itag']]))
(metadata[1])]
if metadata[0] == 'url_encoded_fmt_stream_map'
else metadata)
(item.split('='))
for item in
raw_info.split('&')])
def youtube_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): def youtube_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
html = request.urlopen('http://www.youtube.com/watch?v=' + id).read().decode('utf-8')
raw_info = request.urlopen('http://www.youtube.com/get_video_info?video_id=%s' % id).read().decode('utf-8')
video_info = parse_video_info(raw_info)
if video_info['status'] == 'ok': # use get_video_info data
title = parse.unquote(video_info['title'].replace('+', ' '))
signature = video_info['url_encoded_fmt_stream_map'][0]['sig']
url = parse.unquote(parse.unquote(video_info['url_encoded_fmt_stream_map'][0]['url'])) + "&signature=%s" % signature
else: # parse video page when "embedding disabled by request"
import json
html = request.urlopen('http://www.youtube.com/watch?v=' + id).read().decode('utf-8')
html = unescape_html(html) html = unescape_html(html)
yt_player_config = json.loads(r1(r'yt.playerConfig = ([^\n]+);\n', html)) yt_player_config = json.loads(r1(r'ytplayer.config = ([^\n]+);', html))
title = yt_player_config['args']['title'] title = yt_player_config['args']['title']
title = unicodize(title) title = unicodize(title)
title = parse.unquote(title) title = parse.unquote(title)
@ -25,10 +107,7 @@ def youtube_download_by_id(id, title = None, output_dir = '.', merge = True, inf
'85', '85',
'44', '35', '44', '35',
'101', '100', '43', '34', '82', '18', '101', '100', '43', '34', '82', '18',
'6', '6', '83', '13', '5', '36', '17',
'83', '5', '36',
'17',
'13',
]: ]:
fmt = r1(r'([^,\"]*itag=' + itag + "[^,\"]*)", html) fmt = r1(r'([^,\"]*itag=' + itag + "[^,\"]*)", html)
if fmt: if fmt:
@ -41,7 +120,7 @@ def youtube_download_by_id(id, title = None, output_dir = '.', merge = True, inf
try: try:
url url
except NameError: except NameError:
url = r1(r'crossdomain.xml"\);yt.preload.start\("([^"]+)"\)', html) url = r1(r'ytdns.ping\("([^"]+)"[^;]*;</script>', html)
url = unicodize(url) url = unicodize(url)
url = re.sub(r'\\/', '/', url) url = re.sub(r'\\/', '/', url)
url = re.sub(r'generate_204', 'videoplayback', url) url = re.sub(r'generate_204', 'videoplayback', url)