fix #2, #4: parse YouTube signature

2025-02-02 16:24:00 +03:00 · 2012-09-17 13:11:46 +02:00 · 2012-09-17 13:11:46 +02:00 · 8389164081
commit 8389164081
parent 4cb24e07d2
2 changed files with 17 additions and 1 deletions
--- a/you_get/common.py
+++ b/you_get/common.py
@ -43,6 +43,9 @@ def r1_of(patterns, text):
        if x:
            return x
 def unicodize(text):
    return re.sub(r'\\u([0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])', lambda x: chr(int(x.group(0)[2:], 16)), text)
 def escape_file_path(path):
    path = path.replace('/', '-')
    path = path.replace('\\', '-')
@ -572,6 +575,8 @@ def script_main(script_name, download, download_playlist = None):
    set_http_proxy(proxy)
    for url in args:
        if url.startswith('https://'):
            url = url[8:]
        if not url.startswith('http://'):
            url = 'http://' + url
--- a/you_get/downloader/youtube.py
+++ b/you_get/downloader/youtube.py
@ -8,7 +8,17 @@ def youtube_download_by_id(id, title = None, output_dir = '.', merge = True, inf
    try:
        url = parse.parse_qs(parse.unquote(request.urlopen('http://www.youtube.com/get_video_info?&video_id=' + id).read().decode('utf-8')))['url_encoded_fmt_stream_map'][0][4:]
    except:
-        url = parse.parse_qs(parse.unquote(request.urlopen('http://www.youtube.com/watch?v=' + id).read().decode('utf-8')))['url_encoded_fmt_stream_map'][0][4:]
+        html = request.urlopen('http://www.youtube.com/watch?v=' + id).read().decode('utf-8')
        url = parse.parse_qs(parse.unquote(html))['url_encoded_fmt_stream_map'][0][4:]
        if not url.startswith('http://'):
            codec = r1(r'yt.preload.start\("http:\\/\\/([^\\]+)\\/crossdomain.xml"\)', html)
            signature = r1(r'signature=([^\\]+)\\', html)
            urls = parse.parse_qs(parse.unquote(html))['url']
            for u in urls:
                if u.startswith('http://' + codec):
                    url = "%s&signature=%s" % (u, signature)
                    break
    type, ext, size = url_info(url)
    print_info(site_info, title, type, size)
@ -23,6 +33,7 @@ def youtube_download(url, output_dir = '.', merge = True, info_only = False):
    except:
        html = get_html(url, 'utf-8')
        title = r1(r'"title": "([^"]+)"', html)
        title = unicodize(title)
    assert title
    title = parse.unquote(title)
    title = escape_file_path(title)