YouTube: fix #135

2025-03-13 11:24:02 +03:00 · 2013-03-22 04:24:01 +01:00 · 2013-03-22 04:24:01 +01:00 · df740c10e2
commit df740c10e2
parent 0929e4265e
1 changed files with 115 additions and 36 deletions
--- a/src/you_get/downloader/youtube.py
+++ b/src/you_get/downloader/youtube.py
@ -4,13 +4,95 @@ __all__ = ['youtube_download', 'youtube_download_by_id']
 from ..common import *
-import json
+# YouTube media encoding options, in descending quality order.
 # taken from http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs, 3/22/2013.
 youtube_codecs = [
    {'itag': 38, 'container': 'MP4', 'video_resolution': '3072p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3.5-5', 'audio_encoding': 'AAC', 'audio_bitrate': '192'},
    {'itag': 46, 'container': 'WebM', 'video_resolution': '1080p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'},
    {'itag': 37, 'container': 'MP4', 'video_resolution': '1080p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3-4.3', 'audio_encoding': 'AAC', 'audio_bitrate': '192'},
    {'itag': 102, 'container': '', 'video_resolution': '', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '2', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'},
    {'itag': 45, 'container': 'WebM', 'video_resolution': '720p', 'video_encoding': '', 'video_profile': '', 'video_bitrate': '', 'audio_encoding': '', 'audio_bitrate': ''},
    {'itag': 22, 'container': 'MP4', 'video_resolution': '720p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '2-2.9', 'audio_encoding': 'AAC', 'audio_bitrate': '192'},
    {'itag': 84, 'container': 'MP4', 'video_resolution': '720p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '2-2.9', 'audio_encoding': 'AAC', 'audio_bitrate': '152'},
    {'itag': 120, 'container': 'FLV', 'video_resolution': '720p', 'video_encoding': 'AVC', 'video_profile': 'Main@L3.1', 'video_bitrate': '2', 'audio_encoding': 'AAC', 'audio_bitrate': '128'},
    {'itag': 85, 'container': 'MP4', 'video_resolution': '520p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '2-2.9', 'audio_encoding': 'AAC', 'audio_bitrate': '152'},
    {'itag': 44, 'container': 'WebM', 'video_resolution': '480p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '1', 'audio_encoding': 'Vorbis', 'audio_bitrate': '128'},
    {'itag': 35, 'container': 'FLV', 'video_resolution': '480p', 'video_encoding': 'H.264', 'video_profile': 'Main', 'video_bitrate': '0.8-1', 'audio_encoding': 'AAC', 'audio_bitrate': '128'},
    {'itag': 101, 'container': 'WebM', 'video_resolution': '360p', 'video_encoding': 'VP8', 'video_profile': '3D', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'},
    {'itag': 100, 'container': 'WebM', 'video_resolution': '360p', 'video_encoding': 'VP8', 'video_profile': '3D', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '128'},
    {'itag': 43, 'container': 'WebM', 'video_resolution': '360p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '0.5', 'audio_encoding': 'Vorbis', 'audio_bitrate': '128'},
    {'itag': 34, 'container': 'FLV', 'video_resolution': '360p', 'video_encoding': 'H.264', 'video_profile': 'Main', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '128'},
    {'itag': 82, 'container': 'MP4', 'video_resolution': '360p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '96'},
    {'itag': 18, 'container': 'MP4', 'video_resolution': '270p/360p', 'video_encoding': 'H.264', 'video_profile': 'Baseline', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '96'},
    {'itag': 6, 'container': 'FLV', 'video_resolution': '270p', 'video_encoding': 'Sorenson H.263', 'video_profile': '', 'video_bitrate': '0.8', 'audio_encoding': 'MP3', 'audio_bitrate': '64'},
    {'itag': 83, 'container': 'MP4', 'video_resolution': '240p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '96'},
    {'itag': 13, 'container': '3GP', 'video_resolution': '', 'video_encoding': 'MPEG-4 Visual', 'video_profile': '', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': ''},
    {'itag': 5, 'container': 'FLV', 'video_resolution': '240p', 'video_encoding': 'Sorenson H.263', 'video_profile': '', 'video_bitrate': '0.25', 'audio_encoding': 'MP3', 'audio_bitrate': '64'},
    {'itag': 36, 'container': '3GP', 'video_resolution': '240p', 'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.17', 'audio_encoding': 'AAC', 'audio_bitrate': '38'},
    {'itag': 17, 'container': '3GP', 'video_resolution': '144p', 'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.05', 'audio_encoding': 'AAC', 'audio_bitrate': '24'},
 ]
 def parse_video_info(raw_info):
    """Parser for YouTube's get_video_info data.
    Returns a map, with 'url_encoded_fmt_stream_map' field being a sorted list.
    """
    # Percent-encoding reserved characters, used as separators.
    separator = {
        '&': '%26',
        '=': '%3D',
    }
    # fmt_level = {'itag': level, ...}
    # itag of a higher quality maps to a lower level number.
    # The highest quality has level number 0.
    fmt_level = dict(
        zip(
            [str(codec['itag'])
                for codec in
                    youtube_codecs],
            range(len(youtube_codecs))))
    return dict(
        [(lambda metadata:
            ['url_encoded_fmt_stream_map', (
                lambda url_encoded_fmt_stream_map:
                    sorted(
                        [dict(
                            [sub_item.split(separator['='])
                                for sub_item in
                                    item.split(separator['&'])])
                            for item in
                                url_encoded_fmt_stream_map.split('%2C')],
                        key =
                            lambda stream:
                                fmt_level[stream['itag']]))
                (metadata[1])]
            if metadata[0] == 'url_encoded_fmt_stream_map'
            else metadata)
        (item.split('='))
            for item in
                raw_info.split('&')])
 def youtube_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
    html = request.urlopen('http://www.youtube.com/watch?v=' + id).read().decode('utf-8')
    raw_info = request.urlopen('http://www.youtube.com/get_video_info?video_id=%s' % id).read().decode('utf-8')
    video_info = parse_video_info(raw_info)
    if video_info['status'] == 'ok': # use get_video_info data
        title = parse.unquote(video_info['title'].replace('+', ' '))
        signature = video_info['url_encoded_fmt_stream_map'][0]['sig']
        url = parse.unquote(parse.unquote(video_info['url_encoded_fmt_stream_map'][0]['url'])) + "&signature=%s" % signature
    else: # parse video page when "embedding disabled by request"
        import json
        html = request.urlopen('http://www.youtube.com/watch?v=' + id).read().decode('utf-8')
        html = unescape_html(html)
-    yt_player_config = json.loads(r1(r'yt.playerConfig = ([^\n]+);\n', html))
+        yt_player_config = json.loads(r1(r'ytplayer.config = ([^\n]+);', html))
        title = yt_player_config['args']['title']
        title = unicodize(title)
        title = parse.unquote(title)
@ -25,10 +107,7 @@ def youtube_download_by_id(id, title = None, output_dir = '.', merge = True, inf
            '85',
            '44', '35',
            '101', '100', '43', '34', '82', '18',
-        '6',
+            '6', '83', '13', '5', '36', '17',
        '83', '5', '36',
        '17',
        '13',
        ]:
            fmt = r1(r'([^,\"]*itag=' + itag + "[^,\"]*)", html)
            if fmt:
@ -41,7 +120,7 @@ def youtube_download_by_id(id, title = None, output_dir = '.', merge = True, inf
        try:
            url
        except NameError:
-        url = r1(r'crossdomain.xml"\);yt.preload.start\("([^"]+)"\)', html)
+            url = r1(r'ytdns.ping\("([^"]+)"[^;]*;</script>', html)
            url = unicodize(url)
            url = re.sub(r'\\/', '/', url)
            url = re.sub(r'generate_204', 'videoplayback', url)