From 8389164081c357ce97b50459924328785debc805 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 17 Sep 2012 13:11:46 +0200 Subject: [PATCH] fix #2, #4: parse YouTube signature --- you_get/common.py | 5 +++++ you_get/downloader/youtube.py | 13 ++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/you_get/common.py b/you_get/common.py index ef1f7318..cc50c68f 100644 --- a/you_get/common.py +++ b/you_get/common.py @@ -43,6 +43,9 @@ def r1_of(patterns, text): if x: return x +def unicodize(text): + return re.sub(r'\\u([0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])', lambda x: chr(int(x.group(0)[2:], 16)), text) + def escape_file_path(path): path = path.replace('/', '-') path = path.replace('\\', '-') @@ -572,6 +575,8 @@ def script_main(script_name, download, download_playlist = None): set_http_proxy(proxy) for url in args: + if url.startswith('https://'): + url = url[8:] if not url.startswith('http://'): url = 'http://' + url diff --git a/you_get/downloader/youtube.py b/you_get/downloader/youtube.py index 4b12f165..918508cc 100644 --- a/you_get/downloader/youtube.py +++ b/you_get/downloader/youtube.py @@ -8,7 +8,17 @@ def youtube_download_by_id(id, title = None, output_dir = '.', merge = True, inf try: url = parse.parse_qs(parse.unquote(request.urlopen('http://www.youtube.com/get_video_info?&video_id=' + id).read().decode('utf-8')))['url_encoded_fmt_stream_map'][0][4:] except: - url = parse.parse_qs(parse.unquote(request.urlopen('http://www.youtube.com/watch?v=' + id).read().decode('utf-8')))['url_encoded_fmt_stream_map'][0][4:] + html = request.urlopen('http://www.youtube.com/watch?v=' + id).read().decode('utf-8') + url = parse.parse_qs(parse.unquote(html))['url_encoded_fmt_stream_map'][0][4:] + if not url.startswith('http://'): + codec = r1(r'yt.preload.start\("http:\\/\\/([^\\]+)\\/crossdomain.xml"\)', html) + signature = r1(r'signature=([^\\]+)\\', html) + urls = parse.parse_qs(parse.unquote(html))['url'] + for u in urls: + if u.startswith('http://' + codec): + url = "%s&signature=%s" % (u, signature) + break + type, ext, size = url_info(url) print_info(site_info, title, type, size) @@ -23,6 +33,7 @@ def youtube_download(url, output_dir = '.', merge = True, info_only = False): except: html = get_html(url, 'utf-8') title = r1(r'"title": "([^"]+)"', html) + title = unicodize(title) assert title title = parse.unquote(title) title = escape_file_path(title)