From 0e17ee6faf2a3a00ed09e5a8adf3d91d98961332 Mon Sep 17 00:00:00 2001 From: codepongo Date: Fri, 24 May 2013 14:36:29 +0800 Subject: [PATCH 01/64] Xiami: fix replace ? to - in lyric file name --- src/you_get/downloader/xiami.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/downloader/xiami.py b/src/you_get/downloader/xiami.py index 937893f3..a7b06101 100644 --- a/src/you_get/downloader/xiami.py +++ b/src/you_get/downloader/xiami.py @@ -30,7 +30,7 @@ def location_dec(str): def xiami_download_lyric(lrc_url, file_name, output_dir): lrc = get_html(lrc_url, faker = True) if len(lrc) > 0: - with open(output_dir + "/" + file_name.replace('/', '-') + '.lrc', 'w', encoding='utf-8') as x: + with open(output_dir + "/" + file_name.replace('/', '-').replace('?', '-') + '.lrc', 'w', encoding='utf-8') as x: x.write(lrc) def xiami_download_pic(pic_url, file_name, output_dir): From 883a7d4576a2135780ac52b55efe79265cafe02e Mon Sep 17 00:00:00 2001 From: codepongo Date: Tue, 4 Jun 2013 10:52:09 +0800 Subject: [PATCH 02/64] revert xiami --- src/you_get/downloader/xiami.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/you_get/downloader/xiami.py b/src/you_get/downloader/xiami.py index a7b06101..c45079d4 100644 --- a/src/you_get/downloader/xiami.py +++ b/src/you_get/downloader/xiami.py @@ -67,6 +67,7 @@ def xiami_download_showcollect(cid, output_dir = '.', merge = True, info_only = xml = get_html('http://www.xiami.com/song/playlist/id/%s/type/3' % cid, faker = True) doc = parseString(xml) + album_name = album_name.replace(':', ' ') output_dir = output_dir + "/" + "[" + collect_name + "]" tracks = doc.getElementsByTagName("track") track_nr = 1 From 52665f342c8307928f9de9249dfb738e8ffa05bb Mon Sep 17 00:00:00 2001 From: codepongo Date: Tue, 4 Jun 2013 13:17:34 +0800 Subject: [PATCH 03/64] album name with colon --- src/you_get/downloader/xiami.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/downloader/xiami.py b/src/you_get/downloader/xiami.py index c45079d4..127da043 100644 --- a/src/you_get/downloader/xiami.py +++ b/src/you_get/downloader/xiami.py @@ -67,7 +67,6 @@ def xiami_download_showcollect(cid, output_dir = '.', merge = True, info_only = xml = get_html('http://www.xiami.com/song/playlist/id/%s/type/3' % cid, faker = True) doc = parseString(xml) - album_name = album_name.replace(':', ' ') output_dir = output_dir + "/" + "[" + collect_name + "]" tracks = doc.getElementsByTagName("track") track_nr = 1 @@ -94,6 +93,7 @@ def xiami_download_album(aid, output_dir = '.', merge = True, info_only = False) album_name = r1(r'', xml) artist = r1(r'', xml) doc = parseString(xml) + album_name = album_name.replace(':', ' ') output_dir = output_dir + "/%s - %s" % (artist, album_name) tracks = doc.getElementsByTagName("track") track_nr = 1 From 2b637b68c547127ba790950fddcaef6f0157de54 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 5 Jun 2013 00:18:44 +0200 Subject: [PATCH 04/64] Google+: fix #183, fix #189, fix #193 --- src/you_get/downloader/google.py | 61 ++++++++++++++++++++++++-------- 1 file changed, 46 insertions(+), 15 deletions(-) diff --git a/src/you_get/downloader/google.py b/src/you_get/downloader/google.py index cd02697f..0193db2f 100644 --- a/src/you_get/downloader/google.py +++ b/src/you_get/downloader/google.py @@ -6,6 +6,40 @@ from ..common import * import re +# YouTube media encoding options, in descending quality order. +# taken from http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs, 3/22/2013. +youtube_codecs = [ + {'itag': 38, 'container': 'MP4', 'video_resolution': '3072p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3.5-5', 'audio_encoding': 'AAC', 'audio_bitrate': '192'}, + {'itag': 46, 'container': 'WebM', 'video_resolution': '1080p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'}, + {'itag': 37, 'container': 'MP4', 'video_resolution': '1080p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3-4.3', 'audio_encoding': 'AAC', 'audio_bitrate': '192'}, + {'itag': 102, 'container': '', 'video_resolution': '', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '2', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'}, + {'itag': 45, 'container': 'WebM', 'video_resolution': '720p', 'video_encoding': '', 'video_profile': '', 'video_bitrate': '', 'audio_encoding': '', 'audio_bitrate': ''}, + {'itag': 22, 'container': 'MP4', 'video_resolution': '720p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '2-2.9', 'audio_encoding': 'AAC', 'audio_bitrate': '192'}, + {'itag': 84, 'container': 'MP4', 'video_resolution': '720p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '2-2.9', 'audio_encoding': 'AAC', 'audio_bitrate': '152'}, + {'itag': 120, 'container': 'FLV', 'video_resolution': '720p', 'video_encoding': 'AVC', 'video_profile': 'Main@L3.1', 'video_bitrate': '2', 'audio_encoding': 'AAC', 'audio_bitrate': '128'}, + {'itag': 85, 'container': 'MP4', 'video_resolution': '520p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '2-2.9', 'audio_encoding': 'AAC', 'audio_bitrate': '152'}, + {'itag': 44, 'container': 'WebM', 'video_resolution': '480p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '1', 'audio_encoding': 'Vorbis', 'audio_bitrate': '128'}, + {'itag': 35, 'container': 'FLV', 'video_resolution': '480p', 'video_encoding': 'H.264', 'video_profile': 'Main', 'video_bitrate': '0.8-1', 'audio_encoding': 'AAC', 'audio_bitrate': '128'}, + {'itag': 101, 'container': 'WebM', 'video_resolution': '360p', 'video_encoding': 'VP8', 'video_profile': '3D', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'}, + {'itag': 100, 'container': 'WebM', 'video_resolution': '360p', 'video_encoding': 'VP8', 'video_profile': '3D', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '128'}, + {'itag': 43, 'container': 'WebM', 'video_resolution': '360p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '0.5', 'audio_encoding': 'Vorbis', 'audio_bitrate': '128'}, + {'itag': 34, 'container': 'FLV', 'video_resolution': '360p', 'video_encoding': 'H.264', 'video_profile': 'Main', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '128'}, + {'itag': 82, 'container': 'MP4', 'video_resolution': '360p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '96'}, + {'itag': 18, 'container': 'MP4', 'video_resolution': '270p/360p', 'video_encoding': 'H.264', 'video_profile': 'Baseline', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '96'}, + {'itag': 6, 'container': 'FLV', 'video_resolution': '270p', 'video_encoding': 'Sorenson H.263', 'video_profile': '', 'video_bitrate': '0.8', 'audio_encoding': 'MP3', 'audio_bitrate': '64'}, + {'itag': 83, 'container': 'MP4', 'video_resolution': '240p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '96'}, + {'itag': 13, 'container': '3GP', 'video_resolution': '', 'video_encoding': 'MPEG-4 Visual', 'video_profile': '', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': ''}, + {'itag': 5, 'container': 'FLV', 'video_resolution': '240p', 'video_encoding': 'Sorenson H.263', 'video_profile': '', 'video_bitrate': '0.25', 'audio_encoding': 'MP3', 'audio_bitrate': '64'}, + {'itag': 36, 'container': '3GP', 'video_resolution': '240p', 'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.17', 'audio_encoding': 'AAC', 'audio_bitrate': '38'}, + {'itag': 17, 'container': '3GP', 'video_resolution': '144p', 'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.05', 'audio_encoding': 'AAC', 'audio_bitrate': '24'}, +] +fmt_level = dict( + zip( + [str(codec['itag']) + for codec in + youtube_codecs], + range(len(youtube_codecs)))) + def google_download(url, output_dir = '.', merge = True, info_only = False): # Percent-encoding Unicode URL url = parse.quote(url, safe = ':/+%') @@ -14,25 +48,22 @@ def google_download(url, output_dir = '.', merge = True, info_only = False): if service == 'plus': # Google Plus - if re.search(r'plus.google.com/photos/\d+/albums/\d+/\d+', url): - oid = r1(r'plus.google.com/photos/(\d+)/albums/\d+/\d+', url) - pid = r1(r'plus.google.com/photos/\d+/albums/\d+/(\d+)', url) - - elif re.search(r'plus.google.com/photos/\d+/albums/posts/\d+', url): - oid = r1(r'plus.google.com/photos/(\d+)/albums/posts/\d+', url) - pid = r1(r'plus.google.com/photos/\d+/albums/posts/(\d+)', url) - - else: + if not re.search(r'plus.google.com/photos/[^/]*/albums/\d+/\d+', url): html = get_html(url) - oid = r1(r'"https://plus.google.com/photos/(\d+)/albums/\d+/\d+', html) - pid = r1(r'"https://plus.google.com/photos/\d+/albums/\d+/(\d+)', html) - - url = "http://plus.google.com/photos/%s/albums/posts/%s?oid=%s&pid=%s" % (oid, pid, oid, pid) + url = r1(r'"(https://plus.google.com/photos/\d+/albums/\d+/\d+)', html) + title = r1(r'([^<\n]+)', html) + else: + title = None html = get_html(url) - real_url = unicodize(r1(r'"(https://video.googleusercontent.com/[^"]*)",\d\]', html).replace('\/', '/')) + real_urls = re.findall(r'\[(\d+),\d+,\d+,"([^"]+)"\]', html) + real_url = unicodize(sorted(real_urls, key = lambda x : fmt_level[x[0]])[0][1]) + + if title is None: + post_url = r1(r'"(https://plus.google.com/\d+/posts/[^"]*)"', html) + post_html = get_html(post_url) + title = r1(r'<title>([^<\n]+)', post_html) - title = r1(r"\"([^\"]+)\",\"%s\"" % pid, html) if title is None: response = request.urlopen(request.Request(real_url)) if response.headers['content-disposition']: From c8508ca6bfdbb4df58e30b9cbed5d5014bd2f30f Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Fri, 7 Jun 2013 00:32:00 +0200 Subject: [PATCH 05/64] Sina: fix #186 --- src/you_get/downloader/sina.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/downloader/sina.py b/src/you_get/downloader/sina.py index 741b9f77..b533d8dc 100644 --- a/src/you_get/downloader/sina.py +++ b/src/you_get/downloader/sina.py @@ -27,7 +27,7 @@ def sina_download_by_id(id, title = None, output_dir = '.', merge = True, info_o download_urls(urls, title, 'flv', size, output_dir = output_dir, merge = merge) def sina_download(url, output_dir = '.', merge = True, info_only = False): - id = r1(r'[^_]vid\s*:\s*\'([^\']+)\',', get_html(url)).split('|')[-1] + id = r1(r'vid=(\d+)', url) or r1(r'[^_]vid\s*:\s*\'([^\']+)\',', get_html(url)).split('|')[-1] assert id sina_download_by_id(id, output_dir = output_dir, merge = merge, info_only = info_only) From ff59ca64377adf801e875cf16961565294b4543c Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Fri, 7 Jun 2013 01:22:51 +0200 Subject: [PATCH 06/64] add support for Baidu Wangpan, fix #177 --- README.md | 4 +++- README.txt | 3 ++- src/you_get/common.py | 2 +- src/you_get/downloader/baidu.py | 19 ++++++++++++++++--- 4 files changed, 22 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 7579f81a..055a3903 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,8 @@ Fork me on GitHub: <https://github.com/soimort/you-get> * Sohu (搜狐视频) <http://tv.sohu.com> * 56 (56网) <http://www.56.com> * Xiami (虾米) <http://www.xiami.com> -* Baidu (百度音乐) <http://music.baidu.com> +* Baidu Music (百度音乐) <http://music.baidu.com> +* Baidu Wangpan (百度网盘) <http://pan.baidu.com> * SongTaste <http://www.songtaste.com> ## Dependencies @@ -264,6 +265,7 @@ You-Get基于优酷下载脚本[iambus/youku-lixian](https://github.com/iambus/y * 56网 <http://www.56.com> * 虾米 <http://www.xiami.com> * 百度音乐 <http://music.baidu.com> +* 百度网盘 <http://pan.baidu.com> * SongTaste <http://www.songtaste.com> ## 依赖 diff --git a/README.txt b/README.txt index d90bd5d1..afb9e45e 100644 --- a/README.txt +++ b/README.txt @@ -50,7 +50,8 @@ Supported Sites (As of Now) * Sohu (搜狐视频) http://tv.sohu.com * 56 (56网) http://www.56.com * Xiami (虾米) http://www.xiami.com -* Baidu (百度音乐) http://music.baidu.com +* Baidu Music (百度音乐) http://music.baidu.com +* Baidu Wangpan (百度网盘) http://pan.baidu.com * SongTaste http://www.songtaste.com Dependencies diff --git a/src/you_get/common.py b/src/you_get/common.py index 7f708511..54d082af 100644 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -136,7 +136,7 @@ def url_info(url, faker = False): type = None if headers['content-disposition']: try: - filename = parse.unquote(r1(r'filename="?(.+)"?', headers['content-disposition'])) + filename = parse.unquote(r1(r'filename="?([^"]+)"?', headers['content-disposition'])) if len(filename.split('.')) > 1: ext = filename.split('.')[-1] else: diff --git a/src/you_get/downloader/baidu.py b/src/you_get/downloader/baidu.py index 245b1d4f..405e0b78 100755 --- a/src/you_get/downloader/baidu.py +++ b/src/you_get/downloader/baidu.py @@ -68,12 +68,25 @@ def baidu_download_album(aid, output_dir = '.', merge = True, info_only = False) track_nr += 1 def baidu_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False): - - if re.match(r'http://music.baidu.com/album/\d+', url): + if re.match(r'http://pan.baidu.com', url): + html = get_html(url) + + title = r1(r'server_filename="([^"]+)"', html) + if len(title.split('.')) > 1: + title = ".".join(title.split('.')[:-1]) + + real_url = r1(r'href="([^"]+)" id="downFileButtom"', html).replace('&', '&') + type, ext, size = url_info(real_url, faker = True) + + print_info(site_info, title, ext, size) + if not info_only: + download_urls([real_url], title, ext, size, output_dir, merge = merge) + + elif re.match(r'http://music.baidu.com/album/\d+', url): id = r1(r'http://music.baidu.com/album/(\d+)', url) baidu_download_album(id, output_dir, merge, info_only) - if re.match('http://music.baidu.com/song/\d+', url): + elif re.match('http://music.baidu.com/song/\d+', url): id = r1(r'http://music.baidu.com/song/(\d+)', url) baidu_download_song(id, output_dir, merge, info_only) From 5d5d085506525d7d4a5a0eae48734c4d9e423f4d Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Fri, 7 Jun 2013 01:30:00 +0200 Subject: [PATCH 07/64] version 0.3.13 --- CHANGELOG.txt | 12 +++++++++++- src/you_get/version.py | 4 ++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 264971d6..0fa901e8 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,6 +1,16 @@ Changelog ========= +0.3.13 +------ + +*Date: 2013-06-07* + +* Add support for: + - Baidu Wangpan (video only) +* Fix issue for: + - Google+ + 0.3.12 ------ @@ -86,7 +96,7 @@ Changelog * Add support for: - Douban - MioMio -* Fix issue for: +* Fix issues for: - Tudou - Vimeo diff --git a/src/you_get/version.py b/src/you_get/version.py index 8b7c8a81..2700f682 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -2,5 +2,5 @@ __all__ = ['__version__', '__date__'] -__version__ = '0.3.12' -__date__ = '2013-05-19' +__version__ = '0.3.13' +__date__ = '2013-06-07' From f302540303f676a6ff0005c30ff967524461cfa6 Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Wed, 12 Jun 2013 16:53:33 +0200 Subject: [PATCH 08/64] AcFun: fix #196 --- src/you_get/downloader/acfun.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/downloader/acfun.py b/src/you_get/downloader/acfun.py index aa880bee..684fec2f 100644 --- a/src/you_get/downloader/acfun.py +++ b/src/you_get/downloader/acfun.py @@ -16,7 +16,7 @@ def get_srt_json(id): return get_html(url) def acfun_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): - info = json.loads(get_html('http://www.acfun.tv/api/getVideoByID.aspx?vid=' + id)) + info = json.loads(get_html('http://wenzhou.acfun.tv/api/getVideoByID.aspx?vid=' + id)) t = info['vtype'] vid = info['vid'] if t == 'sina': @@ -37,7 +37,7 @@ def acfun_download_by_id(id, title = None, output_dir = '.', merge = True, info_ x.write(cmt) def acfun_download(url, output_dir = '.', merge = True, info_only = False): - assert re.match(r'http://www.acfun.tv/v/ac(\d+)', url) + assert re.match(r'http://[^\.]+.acfun.tv/v/ac(\d+)', url) html = get_html(url) title = r1(r'<h1 id="title-article" class="title"[^<>]*>([^<>]+)<', html) From fc24a267b598ace59954461b661622c0ca70370b Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Thu, 13 Jun 2013 00:12:45 +0200 Subject: [PATCH 09/64] add support for Alive.in.th, fix #190 --- README.md | 2 ++ README.txt | 1 + src/you_get/__main__.py | 1 + src/you_get/downloader/__init__.py | 1 + src/you_get/downloader/alive.py | 21 +++++++++++++++++++++ 5 files changed, 26 insertions(+) create mode 100644 src/you_get/downloader/alive.py diff --git a/README.md b/README.md index 055a3903..03bc2a1c 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,7 @@ Fork me on GitHub: <https://github.com/soimort/you-get> * Baidu Music (百度音乐) <http://music.baidu.com> * Baidu Wangpan (百度网盘) <http://pan.baidu.com> * SongTaste <http://www.songtaste.com> +* Alive.in.th <http://alive.in.th> ## Dependencies @@ -267,6 +268,7 @@ You-Get基于优酷下载脚本[iambus/youku-lixian](https://github.com/iambus/y * 百度音乐 <http://music.baidu.com> * 百度网盘 <http://pan.baidu.com> * SongTaste <http://www.songtaste.com> +* Alive.in.th <http://alive.in.th> ## 依赖 diff --git a/README.txt b/README.txt index afb9e45e..4fe437af 100644 --- a/README.txt +++ b/README.txt @@ -53,6 +53,7 @@ Supported Sites (As of Now) * Baidu Music (百度音乐) http://music.baidu.com * Baidu Wangpan (百度网盘) http://pan.baidu.com * SongTaste http://www.songtaste.com +* Alive.in.th http://alive.in.th Dependencies ------------ diff --git a/src/you_get/__main__.py b/src/you_get/__main__.py index 568082f5..0e6c4f43 100644 --- a/src/you_get/__main__.py +++ b/src/you_get/__main__.py @@ -33,6 +33,7 @@ def url_to_module(url): 'google': google, 'iask': sina, 'ifeng': ifeng, + 'in': alive, 'iqiyi': iqiyi, 'joy': joy, 'jpopsuki': jpopsuki, diff --git a/src/you_get/downloader/__init__.py b/src/you_get/downloader/__init__.py index 0b85ad8a..7bab2a60 100644 --- a/src/you_get/downloader/__init__.py +++ b/src/you_get/downloader/__init__.py @@ -1,6 +1,7 @@ #!/usr/bin/env python from .acfun import * +from .alive import * from .baidu import * from .bilibili import * from .blip import * diff --git a/src/you_get/downloader/alive.py b/src/you_get/downloader/alive.py new file mode 100644 index 00000000..33764c72 --- /dev/null +++ b/src/you_get/downloader/alive.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python + +__all__ = ['alive_download'] + +from ..common import * + +def alive_download(url, output_dir = '.', merge = True, info_only = False): + html = get_html(url) + + title = r1(r'<meta property="og:title" content="([^"]+)"', html) + + url = r1(r'file: "(http://alive[^"]+)"', html) + type, ext, size = url_info(url) + + print_info(site_info, title, type, size) + if not info_only: + download_urls([url], title, ext, size, output_dir, merge = merge) + +site_info = "Alive.in.th" +download = alive_download +download_playlist = playlist_not_supported('alive') From 3b21acfe4b720dce2ebdf11b16db81905d885dfa Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Thu, 13 Jun 2013 16:28:05 +0200 Subject: [PATCH 10/64] iQIYI: fix #197 --- src/you_get/downloader/iqiyi.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/you_get/downloader/iqiyi.py b/src/you_get/downloader/iqiyi.py index c8693dbf..5c951d1d 100644 --- a/src/you_get/downloader/iqiyi.py +++ b/src/you_get/downloader/iqiyi.py @@ -6,13 +6,8 @@ from ..common import * def iqiyi_download(url, output_dir = '.', merge = True, info_only = False): html = get_html(url) - #title = r1(r'title\s*:\s*"([^"]+)"', html) - #title = unescape_html(title).decode('utf-8') - #videoId = r1(r'videoId\s*:\s*"([^"]+)"', html) - #pid = r1(r'pid\s*:\s*"([^"]+)"', html) - #ptype = r1(r'ptype\s*:\s*"([^"]+)"', html) - #info_url = 'http://cache.video.qiyi.com/v/%s/%s/%s/' % (videoId, pid, ptype) - videoId = r1(r'''["']videoId["'][:=]["']([^"']+)["']''', html) + + videoId = r1(r'data-player-videoid="([^"]+)"', html) assert videoId info_url = 'http://cache.video.qiyi.com/v/%s' % videoId From a0d971de457f8b96cfc722de49137216ab30f09d Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Thu, 13 Jun 2013 16:50:04 +0200 Subject: [PATCH 11/64] remove support of JPopsuki --- README.md | 2 -- README.txt | 1 - src/you_get/__main__.py | 1 - src/you_get/downloader/__init__.py | 1 - src/you_get/downloader/jpopsuki.py | 23 ----------------------- 5 files changed, 28 deletions(-) delete mode 100644 src/you_get/downloader/jpopsuki.py diff --git a/README.md b/README.md index 03bc2a1c..d913ca3e 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,6 @@ Fork me on GitHub: <https://github.com/soimort/you-get> * SoundCloud <http://soundcloud.com> * Mixcloud <http://www.mixcloud.com> * Freesound <http://www.freesound.org> -* JPopsuki <http://jpopsuki.tv> * VID48 <http://vid48.com> * Niconico (ニコニコ動画) <http://www.nicovideo.jp> * Youku (优酷) <http://www.youku.com> @@ -243,7 +242,6 @@ You-Get基于优酷下载脚本[iambus/youku-lixian](https://github.com/iambus/y * SoundCloud <http://soundcloud.com> * Mixcloud <http://www.mixcloud.com> * Freesound <http://www.freesound.org> -* JPopsuki <http://jpopsuki.tv> * VID48 <http://vid48.com> * NICONICO动画 <http://www.nicovideo.jp> * 优酷 <http://www.youku.com> diff --git a/README.txt b/README.txt index 4fe437af..88572c34 100644 --- a/README.txt +++ b/README.txt @@ -28,7 +28,6 @@ Supported Sites (As of Now) * SoundCloud http://soundcloud.com * Mixcloud http://www.mixcloud.com * Freesound http://www.freesound.org -* JPopsuki http://jpopsuki.tv * VID48 http://vid48.com * Niconico (ニコニコ動画) http://www.nicovideo.jp * Youku (优酷) http://www.youku.com diff --git a/src/you_get/__main__.py b/src/you_get/__main__.py index 0e6c4f43..bf23019d 100644 --- a/src/you_get/__main__.py +++ b/src/you_get/__main__.py @@ -36,7 +36,6 @@ def url_to_module(url): 'in': alive, 'iqiyi': iqiyi, 'joy': joy, - 'jpopsuki': jpopsuki, 'kankanews': bilibili, 'ku6': ku6, 'miomio': miomio, diff --git a/src/you_get/downloader/__init__.py b/src/you_get/downloader/__init__.py index 7bab2a60..2cb2588d 100644 --- a/src/you_get/downloader/__init__.py +++ b/src/you_get/downloader/__init__.py @@ -15,7 +15,6 @@ from .google import * from .ifeng import * from .iqiyi import * from .joy import * -from .jpopsuki import * from .ku6 import * from .miomio import * from .mixcloud import * diff --git a/src/you_get/downloader/jpopsuki.py b/src/you_get/downloader/jpopsuki.py deleted file mode 100644 index a88b23c2..00000000 --- a/src/you_get/downloader/jpopsuki.py +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env python - -__all__ = ['jpopsuki_download'] - -from ..common import * - -def jpopsuki_download(url, output_dir = '.', merge = True, info_only = False): - html = get_html(url) - - title = r1(r'<meta name="title" content="([^"]*)"', html) - if title.endswith(' - JPopsuki TV'): - title = title[:-14] - - url = "http://jpopsuki.tv%s" % r1(r'<source src="([^"]*)"', html) - type, ext, size = url_info(url) - - print_info(site_info, title, type, size) - if not info_only: - download_urls([url], title, ext, size, output_dir, merge = merge) - -site_info = "JPopsuki.tv" -download = jpopsuki_download -download_playlist = playlist_not_supported('jpopsuki') From 2cc7fb5e8aa6cb5410cc6200fc873edde2dbe1a5 Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Thu, 13 Jun 2013 17:12:37 +0200 Subject: [PATCH 12/64] version 0.3.14 --- CHANGELOG.txt | 13 +++++++++++++ src/you_get/version.py | 4 ++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 0fa901e8..ac4e1009 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,6 +1,19 @@ Changelog ========= +0.3.14 +------ + +*Date: 2013-06-14* + +* Add support for: + - Alive.in.th +* Remove support of: + - JPopsuki +* Fix issues for: + - AcFun + - iQIYI + 0.3.13 ------ diff --git a/src/you_get/version.py b/src/you_get/version.py index 2700f682..d8b68a52 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -2,5 +2,5 @@ __all__ = ['__version__', '__date__'] -__version__ = '0.3.13' -__date__ = '2013-06-07' +__version__ = '0.3.14' +__date__ = '2013-06-14' From 22ffb14ca2aee7fee8e2619739e336c5add919ae Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Thu, 20 Jun 2013 21:13:35 +0200 Subject: [PATCH 13/64] add support for Instagram, fix #200 --- README.md | 2 ++ README.txt | 1 + src/you_get/__main__.py | 1 + src/you_get/downloader/__init__.py | 1 + src/you_get/downloader/instagram.py | 22 ++++++++++++++++++++++ 5 files changed, 27 insertions(+) create mode 100644 src/you_get/downloader/instagram.py diff --git a/README.md b/README.md index d913ca3e..c8f6bf05 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ Fork me on GitHub: <https://github.com/soimort/you-get> * Google Drive <http://docs.google.com> * Tumblr <http://www.tumblr.com> * Vine <http://vine.co> +* Instagram <http://instagram.com> * SoundCloud <http://soundcloud.com> * Mixcloud <http://www.mixcloud.com> * Freesound <http://www.freesound.org> @@ -239,6 +240,7 @@ You-Get基于优酷下载脚本[iambus/youku-lixian](https://github.com/iambus/y * Google Drive <http://docs.google.com> * Tumblr <http://www.tumblr.com> * Vine <http://vine.co> +* Instagram <http://instagram.com> * SoundCloud <http://soundcloud.com> * Mixcloud <http://www.mixcloud.com> * Freesound <http://www.freesound.org> diff --git a/README.txt b/README.txt index 88572c34..c544103a 100644 --- a/README.txt +++ b/README.txt @@ -25,6 +25,7 @@ Supported Sites (As of Now) * Google Drive http://docs.google.com * Tumblr http://www.tumblr.com * Vine http://vine.co +* Instagram http://instagram.com * SoundCloud http://soundcloud.com * Mixcloud http://www.mixcloud.com * Freesound http://www.freesound.org diff --git a/src/you_get/__main__.py b/src/you_get/__main__.py index bf23019d..00ea834f 100644 --- a/src/you_get/__main__.py +++ b/src/you_get/__main__.py @@ -34,6 +34,7 @@ def url_to_module(url): 'iask': sina, 'ifeng': ifeng, 'in': alive, + 'instagram': instagram, 'iqiyi': iqiyi, 'joy': joy, 'kankanews': bilibili, diff --git a/src/you_get/downloader/__init__.py b/src/you_get/downloader/__init__.py index 2cb2588d..644b1bc6 100644 --- a/src/you_get/downloader/__init__.py +++ b/src/you_get/downloader/__init__.py @@ -13,6 +13,7 @@ from .facebook import * from .freesound import * from .google import * from .ifeng import * +from .instagram import * from .iqiyi import * from .joy import * from .ku6 import * diff --git a/src/you_get/downloader/instagram.py b/src/you_get/downloader/instagram.py new file mode 100644 index 00000000..f73e23fe --- /dev/null +++ b/src/you_get/downloader/instagram.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python + +__all__ = ['instagram_download'] + +from ..common import * + +def instagram_download(url, output_dir = '.', merge = True, info_only = False): + html = get_html(url) + + id = r1(r'instagram.com/p/([^/]+)/', html) + description = r1(r'<meta property="og:description" content="([^"]*)"', html) + title = description + " [" + id + "]" + url = r1(r'<meta property="og:video" content="([^"]*)"', html) + type, ext, size = url_info(url) + + print_info(site_info, title, type, size) + if not info_only: + download_urls([url], title, ext, size, output_dir, merge = merge) + +site_info = "Instagram" +download = instagram_download +download_playlist = playlist_not_supported('instagram') From 1240e5290656c60ecbbadda7ca63983db01b271c Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Thu, 20 Jun 2013 21:16:13 +0200 Subject: [PATCH 14/64] version 0.3.15 --- CHANGELOG.txt | 8 ++++++++ src/you_get/version.py | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index ac4e1009..842b9a80 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,6 +1,14 @@ Changelog ========= +0.3.15 +------ + +*Date: 2013-06-21* + +* Add support for: + - Instagram + 0.3.14 ------ diff --git a/src/you_get/version.py b/src/you_get/version.py index d8b68a52..b940c1fc 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -2,5 +2,5 @@ __all__ = ['__version__', '__date__'] -__version__ = '0.3.14' -__date__ = '2013-06-14' +__version__ = '0.3.15' +__date__ = '2013-06-21' From abb3c938c189c90d5a526b095ceb4a704c9e88e3 Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Wed, 26 Jun 2013 14:01:11 +0800 Subject: [PATCH 15/64] use HTTPS proxy --- src/you_get/common.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 54d082af..de52ef6b 100644 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -597,9 +597,7 @@ def set_http_proxy(proxy): elif proxy == '': # Don't use any proxy proxy_support = request.ProxyHandler({}) else: # Use proxy - if not proxy.startswith('http://'): - proxy = 'http://' + proxy - proxy_support = request.ProxyHandler({'http': '%s' % proxy}) + proxy_support = request.ProxyHandler({'http': '%s' % proxy, 'https': '%s' % proxy}) opener = request.build_opener(proxy_support) request.install_opener(opener) From 8e2165af475ec3682cb3aea7194c6cde59aee76e Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Wed, 26 Jun 2013 16:24:30 +0800 Subject: [PATCH 16/64] Instagram: update site_info --- src/you_get/downloader/instagram.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/downloader/instagram.py b/src/you_get/downloader/instagram.py index f73e23fe..6071dfd0 100644 --- a/src/you_get/downloader/instagram.py +++ b/src/you_get/downloader/instagram.py @@ -17,6 +17,6 @@ def instagram_download(url, output_dir = '.', merge = True, info_only = False): if not info_only: download_urls([url], title, ext, size, output_dir, merge = merge) -site_info = "Instagram" +site_info = "Instagram.com" download = instagram_download download_playlist = playlist_not_supported('instagram') From 6c1be0f93607853e50b0d5a1a8844f44be8804ca Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Thu, 27 Jun 2013 00:50:25 +0800 Subject: [PATCH 17/64] YouTube: decrypt ciphered signature, temporarily fix #203 --- src/you_get/downloader/youtube.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/you_get/downloader/youtube.py b/src/you_get/downloader/youtube.py index 6ec39ccf..69285b6a 100644 --- a/src/you_get/downloader/youtube.py +++ b/src/you_get/downloader/youtube.py @@ -78,13 +78,24 @@ def parse_video_info(raw_info): for item in raw_info.split('&')]) +# Imported from youtube-dl +def _decrypt_signature(s): + """Decrypt the key the two subkeys must have a length of 43""" + (a, b) = s.split('.') + if len(a) != 43 or len(b) != 43: + raise Exception('Unable to decrypt signature, subkeys lengths not valid') + b = ''.join([b[:8], a[0], b[9:18], b[-4], b[19:39], b[18]])[0:40] + a = a[-40:] + s_dec = '.'.join((a, b))[::-1] + return s_dec + def youtube_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): raw_info = request.urlopen('http://www.youtube.com/get_video_info?video_id=%s' % id).read().decode('utf-8') video_info = parse_video_info(raw_info) - if video_info['status'] == 'ok': # use get_video_info data + if video_info['status'] == 'ok' and not video_info['use_cipher_signature'] == 'True': # use get_video_info data title = parse.unquote(video_info['title'].replace('+', ' ')) @@ -118,7 +129,7 @@ def youtube_download_by_id(id, title = None, output_dir = '.', merge = True, inf url = r1(r'url=([^\\]+)', fmt) url = unicodize(url) url = parse.unquote(url) - sig = r1(r'sig=([^\\]+)', fmt) + sig = r1(r'sig=([^\\]+)', fmt) or _decrypt_signature(r1(r's=([^\\]+)', fmt)) url = url + '&signature=' + sig break try: From d2128789e5fda4a916e865f29692721a4e4f5205 Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Thu, 27 Jun 2013 09:13:46 +0800 Subject: [PATCH 18/64] Sohu: fix #204 --- src/you_get/downloader/sohu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/downloader/sohu.py b/src/you_get/downloader/sohu.py index 5e6df793..4400836a 100644 --- a/src/you_get/downloader/sohu.py +++ b/src/you_get/downloader/sohu.py @@ -8,7 +8,7 @@ import json def real_url(host, prot, file, new): url = 'http://%s/?prot=%s&file=%s&new=%s' % (host, prot, file, new) - start, _, host, key, _, _ = get_html(url).split('|') + start, _, host, key = get_html(url).split('|')[:4] return '%s%s?key=%s' % (start[:-1], new, key) def sohu_download(url, output_dir = '.', merge = True, info_only = False): From da66e127b8afddbf5cbe0978a9bf39884f2c29bc Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Thu, 27 Jun 2013 23:38:23 +0800 Subject: [PATCH 19/64] YouTube: fix #203, using decryption algorithm from youtube-dl --- src/you_get/downloader/youtube.py | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/src/you_get/downloader/youtube.py b/src/you_get/downloader/youtube.py index 69285b6a..557eca1f 100644 --- a/src/you_get/downloader/youtube.py +++ b/src/you_get/downloader/youtube.py @@ -78,16 +78,24 @@ def parse_video_info(raw_info): for item in raw_info.split('&')]) -# Imported from youtube-dl -def _decrypt_signature(s): - """Decrypt the key the two subkeys must have a length of 43""" - (a, b) = s.split('.') - if len(a) != 43 or len(b) != 43: - raise Exception('Unable to decrypt signature, subkeys lengths not valid') - b = ''.join([b[:8], a[0], b[9:18], b[-4], b[19:39], b[18]])[0:40] - a = a[-40:] - s_dec = '.'.join((a, b))[::-1] - return s_dec +# Signature decryption algorithm, reused code from youtube-dl +def decrypt_signature(s): + if len(s) == 88: + return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12] + elif len(s) == 87: + return s[62] + s[82:62:-1] + s[83] + s[61:52:-1] + s[0] + s[51:2:-1] + elif len(s) == 86: + return s[2:63] + s[82] + s[64:82] + s[63] + elif len(s) == 85: + return s[76] + s[82:76:-1] + s[83] + s[75:60:-1] + s[0] + s[59:50:-1] + s[1] + s[49:2:-1] + elif len(s) == 84: + return s[83:36:-1] + s[2] + s[35:26:-1] + s[3] + s[25:3:-1] + s[26] + elif len(s) == 83: + return s[52] + s[81:55:-1] + s[2] + s[54:52:-1] + s[82] + s[51:36:-1] + s[55] + s[35:2:-1] + s[36] + elif len(s) == 82: + return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34] + else: + raise Exception(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s))) def youtube_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): @@ -129,7 +137,7 @@ def youtube_download_by_id(id, title = None, output_dir = '.', merge = True, inf url = r1(r'url=([^\\]+)', fmt) url = unicodize(url) url = parse.unquote(url) - sig = r1(r'sig=([^\\]+)', fmt) or _decrypt_signature(r1(r's=([^\\]+)', fmt)) + sig = r1(r'sig=([^\\]+)', fmt) or decrypt_signature(r1(r's=([^\\]+)', fmt)) url = url + '&signature=' + sig break try: From 9854825ee769cb03c5dc0362630fca825ed57f36 Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Thu, 27 Jun 2013 23:50:53 +0800 Subject: [PATCH 20/64] fix unicode literal for Python 3.2 --- src/you_get/downloader/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/downloader/youtube.py b/src/you_get/downloader/youtube.py index 557eca1f..3cfa3e76 100644 --- a/src/you_get/downloader/youtube.py +++ b/src/you_get/downloader/youtube.py @@ -95,7 +95,7 @@ def decrypt_signature(s): elif len(s) == 82: return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34] else: - raise Exception(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s))) + raise Exception('Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s))) def youtube_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): From 65d0697bbfa39989f169063b82f5b527c3710dd2 Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Thu, 27 Jun 2013 23:56:15 +0800 Subject: [PATCH 21/64] version 0.3.16 --- CHANGELOG.txt | 10 ++++++++++ src/you_get/version.py | 4 ++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 842b9a80..685b2d46 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,6 +1,16 @@ Changelog ========= +0.3.16 +------ + +*Date: 2013-06-28* + +* Fix issues for: + - YouTube + - Sohu + - Google+ (enable HTTPS proxy) + 0.3.15 ------ diff --git a/src/you_get/version.py b/src/you_get/version.py index b940c1fc..1e21293d 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -2,5 +2,5 @@ __all__ = ['__version__', '__date__'] -__version__ = '0.3.15' -__date__ = '2013-06-21' +__version__ = '0.3.16' +__date__ = '2013-06-28' From ecdc6193f7e19518fefcb11631fa70748d74017e Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Mon, 1 Jul 2013 22:39:39 +0800 Subject: [PATCH 22/64] bilibili: fix #185 --- src/you_get/downloader/bilibili.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/downloader/bilibili.py b/src/you_get/downloader/bilibili.py index 20e3c467..b74ce408 100644 --- a/src/you_get/downloader/bilibili.py +++ b/src/you_get/downloader/bilibili.py @@ -64,7 +64,7 @@ def bilibili_download_by_cid(id, title, output_dir = '.', merge = True, info_onl elif re.search(r'/mp4/', urls[0]): type = 'mp4' else: - raise NotImplementedError(urls[0]) + type = 'flv' size = 0 for url in urls: From 39a30b0396d3a45d382dc214ca48ff045cc655cd Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Fri, 5 Jul 2013 15:00:43 +0800 Subject: [PATCH 23/64] 163: fix #205 --- src/you_get/downloader/netease.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/you_get/downloader/netease.py b/src/you_get/downloader/netease.py index 964c192d..863689f3 100644 --- a/src/you_get/downloader/netease.py +++ b/src/you_get/downloader/netease.py @@ -7,10 +7,13 @@ from ..common import * def netease_download(url, output_dir = '.', merge = True, info_only = False): html = get_decoded_html(url) - src = r1(r'<source src="([^"]+)"', html) - title = r1('movieDescription=\'([^\']+)\'', html) + title = r1('movieDescription=\'([^\']+)\'', html) or r1('<title>(.+)', html) + if title[0] == ' ': + title = title[1:] - if title: + src = r1(r'(.+)', html) - if title[0] == ' ': - title = title[1:] - url = r1(r'(.+)-list.m3u8', src) + ".mp4" + url = r1(r'["\'](.+)-list.m3u8["\']', html) + ".mp4" _, _, size = url_info(url) ext = 'mp4' From 101215addae7904efce50de0b5d54c493f18edcf Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 11 Jul 2013 00:40:18 +0800 Subject: [PATCH 24/64] YouTube: new algo for len 83, fix #206 --- src/you_get/downloader/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/downloader/youtube.py b/src/you_get/downloader/youtube.py index 3cfa3e76..cc9d45a3 100644 --- a/src/you_get/downloader/youtube.py +++ b/src/you_get/downloader/youtube.py @@ -91,7 +91,7 @@ def decrypt_signature(s): elif len(s) == 84: return s[83:36:-1] + s[2] + s[35:26:-1] + s[3] + s[25:3:-1] + s[26] elif len(s) == 83: - return s[52] + s[81:55:-1] + s[2] + s[54:52:-1] + s[82] + s[51:36:-1] + s[55] + s[35:2:-1] + s[36] + return s[:81] elif len(s) == 82: return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34] else: From fdc9d81c866b3d8e330cd3e30893041729c16029 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 11 Jul 2013 11:11:45 +0800 Subject: [PATCH 25/64] remove test of JPopsuki --- tests/test.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/test.py b/tests/test.py index 75f6f7ac..2a25779b 100644 --- a/tests/test.py +++ b/tests/test.py @@ -17,11 +17,6 @@ class YouGetTests(unittest.TestCase): "http://www.freesound.org/people/Corsica_S/sounds/184419/", ]) - def test_jpopsuki(self): - test_urls([ - #"http://jpopsuki.tv/video/Dragon-Ash---Run-to-the-Sun/8ad7aec604badd0b0798cd999b63ae17", - ]) - def test_mixcloud(self): test_urls([ "http://www.mixcloud.com/beatbopz/beat-bopz-disco-mix/", From 72df45bc8e07f812135c8932e06480adee19eafd Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 11 Jul 2013 16:48:13 +0800 Subject: [PATCH 26/64] refactor, code cleanup for YouTube --- src/you_get/common.py | 130 ++++++++++++++++++++++++++-- src/you_get/downloader/youtube.py | 139 +++++++++--------------------- 2 files changed, 163 insertions(+), 106 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index de52ef6b..5b7e02e6 100644 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -7,6 +7,7 @@ import os import re import sys from urllib import request, parse +import platform from .version import __version__ @@ -33,20 +34,63 @@ def tr(s): except: return str(s.encode('utf-8'))[2:-1] +# DEPRECATED in favor of match1() def r1(pattern, text): m = re.search(pattern, text) if m: return m.group(1) +# DEPRECATED in favor of match1() def r1_of(patterns, text): for p in patterns: x = r1(p, text) if x: return x +def match1(text, *patterns): + """Scans through a string for substrings matched some patterns (first-subgroups only). + + Args: + text: A string to be scanned. + patterns: Arbitrary number of regex patterns. + + Returns: + When only one pattern is given, returns a string (None if no match found). + When more than one pattern are given, returns a list of strings ([] if no match found). + """ + + if len(patterns) == 1: + pattern = patterns[0] + match = re.search(pattern, text) + if match: + return match.group(1) + else: + return None + else: + ret = [] + for pattern in patterns: + match = re.search(pattern, text) + if match: + ret.append(match.group(1)) + return ret + +def parse_query_param(url, param): + """Parses the query string of a URL and returns the value of a parameter. + + Args: + url: A URL. + param: A string representing the name of the parameter. + + Returns: + The value of the parameter. + """ + + return parse.parse_qs(parse.urlparse(url).query)[param][0] + def unicodize(text): return re.sub(r'\\u([0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])', lambda x: chr(int(x.group(0)[2:], 16)), text) +# DEPRECATED in favor of filenameable() def escape_file_path(path): path = path.replace('/', '-') path = path.replace('\\', '-') @@ -54,23 +98,57 @@ def escape_file_path(path): path = path.replace('?', '-') return path +def filenameable(text): + """Converts a string to a legal filename through various OSes. + """ + # All POSIX systems + text = text.translate({ + 0: None, + ord('/'): '-', + }) + if platform.system() == 'Darwin': # For Mac OS + text = text.translate({ + ord(':'): '-', + }) + elif platform.system() == 'Windows': # For Windows + text = text.translate({ + ord(':'): '-', + ord('*'): '-', + ord('?'): '-', + ord('\\'): '-', + ord('\"'): '\'', + ord('<'): '-', + ord('>'): '-', + ord('|'): '-', + ord('+'): '-', + ord('['): '(', + ord(']'): ')', + }) + return text + def unescape_html(html): from html import parser html = parser.HTMLParser().unescape(html) html = re.sub(r'&#(\d+);', lambda x: chr(int(x.group(1))), html) return html -def ungzip(s): +def ungzip(data): + """Decompresses data for Content-Encoding: gzip. + """ from io import BytesIO import gzip - buffer = BytesIO(s) - f = gzip.GzipFile(fileobj = buffer) + buffer = BytesIO(data) + f = gzip.GzipFile(fileobj=buffer) return f.read() -def undeflate(s): +def undeflate(data): + """Decompresses data for Content-Encoding: deflate. + (the zlib compression is used.) + """ import zlib - return zlib.decompress(s, -zlib.MAX_WBITS) + return zlib.decompress(data, -zlib.MAX_WBITS) +# DEPRECATED in favor of get_content() def get_response(url, faker = False): if faker: response = request.urlopen(request.Request(url, headers = fake_headers), None) @@ -85,10 +163,12 @@ def get_response(url, faker = False): response.data = data return response +# DEPRECATED in favor of get_content() def get_html(url, encoding = None, faker = False): content = get_response(url, faker).data return str(content, 'utf-8', 'ignore') +# DEPRECATED in favor of get_content() def get_decoded_html(url, faker = False): response = get_response(url, faker) data = response.data @@ -98,6 +178,38 @@ def get_decoded_html(url, faker = False): else: return data +def get_content(url, headers={}, decoded=True): + """Gets the content of a URL via sending a HTTP GET request. + + Args: + url: A URL. + headers: Request headers used by the client. + decoded: Whether decode the response body using UTF-8 or the charset specified in Content-Type. + + Returns: + The content as a string. + """ + + response = request.urlopen(request.Request(url, headers=headers)) + data = response.read() + + # Handle HTTP compression for gzip and deflate (zlib) + content_encoding = response.getheader('Content-Encoding') + if content_encoding == 'gzip': + data = ungzip(data) + elif content_encoding == 'deflate': + data = undeflate(data) + + # Decode the response body + if decoded: + charset = match1(response.getheader('Content-Type'), r'charset=([\w-]+)') + if charset is not None: + data = data.decode(charset) + else: + data = data.decode('utf-8') + + return data + def url_size(url, faker = False): if faker: response = request.urlopen(request.Request(url, headers = fake_headers), None) @@ -388,7 +500,9 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None, import sys traceback.print_exc(file = sys.stdout) pass - title = escape_file_path(title) + + title = filenameable(title) + filename = '%s.%s' % (title, ext) filepath = os.path.join(output_dir, filename) if total_size: @@ -463,7 +577,9 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir = '.', refer return assert ext in ('ts') - title = escape_file_path(title) + + title = filenameable(title) + filename = '%s.%s' % (title, 'ts') filepath = os.path.join(output_dir, filename) if total_size: diff --git a/src/you_get/downloader/youtube.py b/src/you_get/downloader/youtube.py index cc9d45a3..3e85ee21 100644 --- a/src/you_get/downloader/youtube.py +++ b/src/you_get/downloader/youtube.py @@ -6,7 +6,7 @@ from ..common import * # YouTube media encoding options, in descending quality order. # taken from http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs, 3/22/2013. -youtube_codecs = [ +yt_codecs = [ {'itag': 38, 'container': 'MP4', 'video_resolution': '3072p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3.5-5', 'audio_encoding': 'AAC', 'audio_bitrate': '192'}, {'itag': 46, 'container': 'WebM', 'video_resolution': '1080p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'}, {'itag': 37, 'container': 'MP4', 'video_resolution': '1080p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3-4.3', 'audio_encoding': 'AAC', 'audio_bitrate': '192'}, @@ -32,52 +32,6 @@ youtube_codecs = [ {'itag': 17, 'container': '3GP', 'video_resolution': '144p', 'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.05', 'audio_encoding': 'AAC', 'audio_bitrate': '24'}, ] -def parse_video_info(raw_info): - """Parser for YouTube's get_video_info data. - Returns a dict, where 'url_encoded_fmt_stream_map' maps to a sorted list. - """ - - # Percent-encoding reserved characters, used as separators. - sepr = { - '&': '%26', - ',': '%2C', - '=': '%3D', - } - - # fmt_level = {'itag': level, ...} - # itag of a higher quality maps to a lower level number. - # The highest quality has level number 0. - fmt_level = dict( - zip( - [str(codec['itag']) - for codec in - youtube_codecs], - range(len(youtube_codecs)))) - - # {key1: value1, key2: value2, ..., - # 'url_encoded_fmt_stream_map': [{'itag': '38', ...}, ...] - # } - return dict( - [(lambda metadata: - ['url_encoded_fmt_stream_map', ( - lambda stream_map: - sorted( - [dict( - [subitem.split(sepr['=']) - for subitem in - item.split(sepr['&'])]) - for item in - stream_map.split(sepr[','])], - key = - lambda stream: - fmt_level[stream['itag']])) - (metadata[1])] - if metadata[0] == 'url_encoded_fmt_stream_map' - else metadata) - (item.split('=')) - for item in - raw_info.split('&')]) - # Signature decryption algorithm, reused code from youtube-dl def decrypt_signature(s): if len(s) == 88: @@ -97,56 +51,42 @@ def decrypt_signature(s): else: raise Exception('Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s))) -def youtube_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): +def youtube_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False): + """Downloads a YouTube video by its unique id. + """ - raw_info = request.urlopen('http://www.youtube.com/get_video_info?video_id=%s' % id).read().decode('utf-8') + raw_video_info = get_content('http://www.youtube.com/get_video_info?video_id=%s' % id) + video_info = parse.parse_qs(raw_video_info) - video_info = parse_video_info(raw_info) + if video_info['status'] == ['ok'] and ('use_cipher_signature' not in video_info or video_info['use_cipher_signature'] == ['False']): + title = parse.unquote_plus(video_info['title'][0]) + stream_list = parse.parse_qs(raw_video_info)['url_encoded_fmt_stream_map'][0].split(',') + + else: + # Parse video page when video_info is not usable. + video_page = get_content('http://www.youtube.com/watch?v=%s' % id) + ytplayer_config = json.loads(match1(video_page, r'ytplayer.config\s*=\s*([^\n]+);')) + + title = ytplayer_config['args']['title'] + stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',') - if video_info['status'] == 'ok' and not video_info['use_cipher_signature'] == 'True': # use get_video_info data - - title = parse.unquote(video_info['title'].replace('+', ' ')) - - signature = video_info['url_encoded_fmt_stream_map'][0]['sig'] - url = parse.unquote(parse.unquote(video_info['url_encoded_fmt_stream_map'][0]['url'])) + "&signature=%s" % signature - - else: # parse video page when "embedding disabled by request" - - import json - html = request.urlopen('http://www.youtube.com/watch?v=' + id).read().decode('utf-8') - html = unescape_html(html) - yt_player_config = json.loads(r1(r'ytplayer.config = ([^\n]+);', html)) - title = yt_player_config['args']['title'] - title = unicodize(title) - title = parse.unquote(title) - title = escape_file_path(title) - - for itag in [ - '38', - '46', '37', - '102', '45', '22', - '84', - '120', - '85', - '44', '35', - '101', '100', '43', '34', '82', '18', - '6', '83', '13', '5', '36', '17', - ]: - fmt = r1(r'([^,\"]*itag=' + itag + "[^,\"]*)", html) - if fmt: - url = r1(r'url=([^\\]+)', fmt) - url = unicodize(url) - url = parse.unquote(url) - sig = r1(r'sig=([^\\]+)', fmt) or decrypt_signature(r1(r's=([^\\]+)', fmt)) - url = url + '&signature=' + sig - break - try: - url - except NameError: - url = r1(r'ytdns.ping\("([^"]+)"[^;]*;', html) - url = unicodize(url) - url = re.sub(r'\\/', '/', url) - url = re.sub(r'generate_204', 'videoplayback', url) + streams = { + parse.parse_qs(stream)['itag'][0] : parse.parse_qs(stream) + for stream in stream_list + } + + for codec in yt_codecs: + itag = str(codec['itag']) + if itag in streams: + download_stream = streams[itag] + break + + url = download_stream['url'][0] + if 'sig' in download_stream: + sig = download_stream['sig'][0] + else: + sig = decrypt_signature(download_stream['s'][0]) + url = '%s&signature=%s' % (url, sig) type, ext, size = url_info(url) @@ -154,13 +94,14 @@ def youtube_download_by_id(id, title = None, output_dir = '.', merge = True, inf if not info_only: download_urls([url], title, ext, size, output_dir, merge = merge) -def youtube_download(url, output_dir = '.', merge = True, info_only = False): - id = r1(r'youtu.be/(.*)', url) - if not id: - id = parse.parse_qs(parse.urlparse(url).query)['v'][0] +def youtube_download(url, output_dir='.', merge=True, info_only=False): + """Downloads YouTube videos by URL. + """ + + id = match1(url, r'youtu.be/([^/]+)') or parse_query_param(url, 'v') assert id - youtube_download_by_id(id, None, output_dir, merge = merge, info_only = info_only) + youtube_download_by_id(id, title=None, output_dir=output_dir, merge=merge, info_only=info_only) site_info = "YouTube.com" download = youtube_download From c4db743bdb9bbfef2e39cc57e2be290d9f51bc10 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 11 Jul 2013 16:48:28 +0800 Subject: [PATCH 27/64] add tests.test_common --- tests/test_common.py | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 tests/test_common.py diff --git a/tests/test_common.py b/tests/test_common.py new file mode 100644 index 00000000..5e97b77b --- /dev/null +++ b/tests/test_common.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python + +import unittest + +from you_get import * + +class TestCommon(unittest.TestCase): + + def test_match1(self): + self.assertEqual(match1('http://youtu.be/1234567890A', r'youtu.be/([^/]+)'), '1234567890A') + self.assertEqual(match1('http://youtu.be/1234567890A', r'youtu.be/([^/]+)', r'youtu.(\w+)'), ['1234567890A', 'be']) From f87b5261a7092b6a2d248aaaa3168aeb1983c19c Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 11 Jul 2013 16:57:43 +0800 Subject: [PATCH 28/64] version 0.3.17 --- CHANGELOG.txt | 11 +++++++++++ src/you_get/version.py | 4 ++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 685b2d46..615d7e7d 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,6 +1,17 @@ Changelog ========= +0.3.17 +------ + +*Date: 2013-07-12* + +* Fix issues for: + - YouTube + - 163 + - bilibili +* Code cleanup. + 0.3.16 ------ diff --git a/src/you_get/version.py b/src/you_get/version.py index 1e21293d..9416d79f 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -2,5 +2,5 @@ __all__ = ['__version__', '__date__'] -__version__ = '0.3.16' -__date__ = '2013-06-28' +__version__ = '0.3.17' +__date__ = '2013-07-12' From 952e310264c7ffa05215e4ccddc9aa495fdd1640 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 14 Jul 2013 23:34:42 +0800 Subject: [PATCH 29/64] Sina: fix #207 for video.sina.com --- src/you_get/downloader/acfun.py | 2 +- src/you_get/downloader/bilibili.py | 2 +- src/you_get/downloader/sina.py | 51 +++++++++++++++++++++++------- 3 files changed, 41 insertions(+), 14 deletions(-) diff --git a/src/you_get/downloader/acfun.py b/src/you_get/downloader/acfun.py index 684fec2f..7d49c85e 100644 --- a/src/you_get/downloader/acfun.py +++ b/src/you_get/downloader/acfun.py @@ -5,7 +5,7 @@ __all__ = ['acfun_download'] from ..common import * from .qq import qq_download_by_id -from .sina import sina_download_by_id +from .sina import sina_download_by_vid from .tudou import tudou_download_by_iid from .youku import youku_download_by_id diff --git a/src/you_get/downloader/bilibili.py b/src/you_get/downloader/bilibili.py index b74ce408..7fdce98c 100644 --- a/src/you_get/downloader/bilibili.py +++ b/src/you_get/downloader/bilibili.py @@ -4,7 +4,7 @@ __all__ = ['bilibili_download'] from ..common import * -from .sina import sina_download_by_id +from .sina import sina_download_by_vid from .tudou import tudou_download_by_id from .youku import youku_download_by_id diff --git a/src/you_get/downloader/sina.py b/src/you_get/downloader/sina.py index b533d8dc..af030a9e 100644 --- a/src/you_get/downloader/sina.py +++ b/src/you_get/downloader/sina.py @@ -1,20 +1,22 @@ #!/usr/bin/env python -__all__ = ['sina_download', 'sina_download_by_id'] +__all__ = ['sina_download', 'sina_download_by_vid', 'sina_download_by_vkey'] from ..common import * -import re - def video_info(id): - xml = get_decoded_html('http://v.iask.com/v_play.php?vid=%s' % id) + xml = get_content('http://v.iask.com/v_play.php?vid=%s' % id, decoded=True) urls = re.findall(r'(?:)?', xml) - name = r1(r'(?:)?', xml) - vstr = r1(r'(?:)?', xml) + name = match1(xml, r'(?:)?') + vstr = match1(xml, r'(?:)?') return urls, name, vstr -def sina_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): - urls, name, vstr = video_info(id) +def sina_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only=False): + """Downloads a Sina video by its unique vid. + http://video.sina.com.cn/ + """ + + urls, name, vstr = video_info(vid) title = title or name assert title size = 0 @@ -26,11 +28,36 @@ def sina_download_by_id(id, title = None, output_dir = '.', merge = True, info_o if not info_only: download_urls(urls, title, 'flv', size, output_dir = output_dir, merge = merge) -def sina_download(url, output_dir = '.', merge = True, info_only = False): - id = r1(r'vid=(\d+)', url) or r1(r'[^_]vid\s*:\s*\'([^\']+)\',', get_html(url)).split('|')[-1] - assert id +def sina_download_by_vkey(vkey, title=None, output_dir='.', merge=True, info_only=False): + """Downloads a Sina video by its unique vkey. + http://video.sina.com/ + """ - sina_download_by_id(id, output_dir = output_dir, merge = merge, info_only = info_only) + url = 'http://video.sina.com/v/flvideo/%s_0.flv' % vkey + type, ext, size = url_info(url) + + print_info(site_info, title, 'flv', size) + if not info_only: + download_urls([url], title, 'flv', size, output_dir = output_dir, merge = merge) + +def sina_download(url, output_dir='.', merge=True, info_only=False): + """Downloads Sina videos by URL. + """ + + vid = match1(url, r'vid=(\d+)') + if vid is None: + video_page = get_content(url) + vid = hd_vid = match1(video_page, r'hd_vid\s*:\s*\'([^\']+)\'') + if hd_vid == '0': + vids = match1(video_page, r'[^\w]vid\s*:\s*\'([^\']+)\'').split('|') + vid = vids[-1] + + if vid: + sina_download_by_vid(vid, output_dir=output_dir, merge=merge, info_only=info_only) + else: + vkey = match1(video_page, r'vkey\s*:\s*"([^"]+)"') + title = match1(video_page, r'title\s*:\s*"([^"]+)"') + sina_download_by_vkey(vkey, title=title, output_dir=output_dir, merge=merge, info_only=info_only) site_info = "Sina.com" download = sina_download From 609d6e70d79c0b7b2ca247467da480927ca9e6e6 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 16 Jul 2013 10:58:06 +0800 Subject: [PATCH 30/64] Youku: fix #208 --- src/you_get/downloader/youku.py | 49 +++++++++++++++++---------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/src/you_get/downloader/youku.py b/src/you_get/downloader/youku.py index 1603394d..ee9731d0 100644 --- a/src/you_get/downloader/youku.py +++ b/src/you_get/downloader/youku.py @@ -25,7 +25,7 @@ def find_video_id_from_url(url): return r1_of(patterns, url) def find_video_id_from_show_page(url): - return re.search(r'
.*href="([^"]+)"', get_html(url)).group(1) + return re.search(r'([^<>]*)', page).group(1) else: title = re.search(r'(\d+)', get_html(url)).group(1)) return ['http://v.youku.com/v_playlist/f%so0p%s.html' % (id, i) for i in range(n)] -def youku_download_playlist(url, output_dir = '.', merge = True, info_only = False): - if re.match(r'http://www.youku.com/show_page/id_\w+.html', url): - url = find_video_id_from_show_page(url) +def youku_download_playlist(url, output_dir='.', merge=True, info_only=False): + """Downloads a Youku playlist. + """ if re.match(r'http://www.youku.com/playlist_show/id_\d+(?:_ascending_\d_mode_pic(?:_page_\d+)?)?.html', url): ids = parse_vplaylist(url) @@ -185,21 +173,36 @@ def youku_download_playlist(url, output_dir = '.', merge = True, info_only = Fal ids = parse_vplaylist(url) elif re.match(r'http://u.youku.com/user_playlist/pid_(\d+)_id_[\w=]+(?:_page_\d+)?.html', url): ids = parse_vplaylist(url) - else: + elif re.match(r'http://www.youku.com/show_page/id_\w+.html', url): + url = find_video_id_from_show_page(url) assert re.match(r'http://v.youku.com/v_show/id_([\w=]+).html', url), 'URL not supported as playlist' ids = parse_playlist(url) + else: + ids = [] + assert ids != [] title = parse_playlist_title(url, get_html(url)) - title = title.replace('?', '-') + title = filenameable(title) output_dir = os.path.join(output_dir, title) for i, id in enumerate(ids): + print('Processing %s of %s videos...' % (i + 1, len(ids))) try: - print('Processing %s of %s videos...' % (i + 1, len(ids))) - youku_download(id, output_dir, merge = merge, info_only = info_only) + id, title = parse_page(youku_url(id)) + youku_download_by_id(id, title, output_dir=output_dir, merge=merge, info_only=info_only) except: continue +def youku_download(url, output_dir='.', merge=True, info_only=False): + """Downloads Youku videos by URL. + """ + + try: + youku_download_playlist(url, output_dir=output_dir, merge=merge, info_only=info_only) + except: + id, title = parse_page(url) + youku_download_by_id(id, title=title, output_dir=output_dir, merge=merge, info_only=info_only) + site_info = "Youku.com" download = youku_download download_playlist = youku_download_playlist From 02e1d3fe81b532eada39101465de1e4ab0542001 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 16 Jul 2013 22:57:23 +0800 Subject: [PATCH 31/64] bilibili: fix #209 --- src/you_get/downloader/bilibili.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/downloader/bilibili.py b/src/you_get/downloader/bilibili.py index 7fdce98c..8512d362 100644 --- a/src/you_get/downloader/bilibili.py +++ b/src/you_get/downloader/bilibili.py @@ -83,7 +83,7 @@ def bilibili_download(url, output_dir = '.', merge = True, info_only = False): title = unescape_html(title) title = escape_file_path(title) - flashvars = r1_of([r'flashvars="([^"]+)"', r'"https://secure.bilibili.tv/secure,(cid=\d+)(?:&aid=\d+)?"'], html) + flashvars = r1_of([r'player_params=\'(cid=\d+)', r'flashvars="([^"]+)"', r'"https://secure.bilibili.tv/secure,(cid=\d+)(?:&aid=\d+)?"'], html) assert flashvars t, id = flashvars.split('=', 1) id = id.split('&')[0] From 82671676ed8be15a5368f786dc94517e726bbb2a Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 17 Jul 2013 12:54:58 +0800 Subject: [PATCH 32/64] AcFun: use sina_download_by_vid() --- src/you_get/downloader/acfun.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/downloader/acfun.py b/src/you_get/downloader/acfun.py index 7d49c85e..88e1a7d0 100644 --- a/src/you_get/downloader/acfun.py +++ b/src/you_get/downloader/acfun.py @@ -20,7 +20,7 @@ def acfun_download_by_id(id, title = None, output_dir = '.', merge = True, info_ t = info['vtype'] vid = info['vid'] if t == 'sina': - sina_download_by_id(vid, title, output_dir = output_dir, merge = merge, info_only = info_only) + sina_download_by_vid(vid, title, output_dir = output_dir, merge = merge, info_only = info_only) elif t == 'youku': youku_download_by_id(vid, title, output_dir = output_dir, merge = merge, info_only = info_only) elif t == 'tudou': @@ -49,7 +49,7 @@ def acfun_download(url, output_dir = '.', merge = True, info_only = False): id = r1(r"\[Video\](\d+)\[/Video\]", html) or r1(r"\[video\](\d+)\[/video\]", html) if not id: id = r1(r"src=\"/newflvplayer/player.*id=(\d+)", html) - sina_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only) + sina_download_by_vid(id, title, output_dir = output_dir, merge = merge, info_only = info_only) else: acfun_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only) From 9a87f83c7060b66f7f95f2823db11b5e86a4fd67 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 19 Jul 2013 00:14:05 +0800 Subject: [PATCH 33/64] Dailymotion: fix #210 --- src/you_get/downloader/dailymotion.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/you_get/downloader/dailymotion.py b/src/you_get/downloader/dailymotion.py index 5d42cbb9..99d586c8 100644 --- a/src/you_get/downloader/dailymotion.py +++ b/src/you_get/downloader/dailymotion.py @@ -5,16 +5,22 @@ __all__ = ['dailymotion_download'] from ..common import * def dailymotion_download(url, output_dir = '.', merge = True, info_only = False): - html = get_html(url) - html = parse.unquote(html).replace('\/', '/') + """Downloads Dailymotion videos by URL. + """ - title = r1(r'meta property="og:title" content="([^"]+)"', html) - title = escape_file_path(title) + id = match1(url, r'/video/([^\?]+)') + embed_url = 'http://www.dailymotion.com/embed/video/%s' % id + html = get_content(embed_url) - for quality in ['hd720URL', 'hqURL', 'sdURL']: - real_url = r1(r',\"' + quality + '\"\:\"([^\"]+?)\",', html) + info = json.loads(match1(html, r'var\s*info\s*=\s*({.+}),\n')) + + title = info['title'] + + for quality in ['stream_h264_hd1080_url', 'stream_h264_hd_url', 'stream_h264_hq_url', 'stream_h264_url', 'stream_h264_ld_url']: + real_url = info[quality] if real_url: break + type, ext, size = url_info(real_url) print_info(site_info, title, type, size) From e8b81941503d96dba5d77b2c8f0d875433c30d74 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 19 Jul 2013 08:25:20 +0800 Subject: [PATCH 34/64] version 0.3.18 --- CHANGELOG.txt | 12 ++++++++++++ src/you_get/version.py | 4 ++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 615d7e7d..d3c75e3e 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,6 +1,18 @@ Changelog ========= +0.3.18 +------ + +*Date: 2013-07-19* + +* Fix issues for: + - Dailymotion + - Youku + - Sina + - AcFun + - bilibili + 0.3.17 ------ diff --git a/src/you_get/version.py b/src/you_get/version.py index 9416d79f..5f6a824a 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -2,5 +2,5 @@ __all__ = ['__version__', '__date__'] -__version__ = '0.3.17' -__date__ = '2013-07-12' +__version__ = '0.3.18' +__date__ = '2013-07-19' From fc3d932244f8a0e3fe9093c36c254b13f515982e Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 19 Jul 2013 09:33:33 +0800 Subject: [PATCH 35/64] show git hash in version --- src/you_get/common.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 5b7e02e6..4a17e05c 100644 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -729,8 +729,18 @@ def download_main(download, download_playlist, urls, playlist, output_dir, merge else: download(url, output_dir = output_dir, merge = merge, info_only = info_only) +def get_version(): + try: + import subprocess + real_dir = os.path.dirname(os.path.realpath(__file__)) + git_hash = subprocess.Popen(['git', 'rev-parse', '--short', 'HEAD'], cwd=real_dir, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL).stdout.read().decode('utf-8').strip() + assert git_hash + return '%s-%s' % (__version__, git_hash) + except: + return __version__ + def script_main(script_name, download, download_playlist = None): - version = 'You-Get %s, a video downloader.' % __version__ + version = 'You-Get %s, a video downloader.' % get_version() help = 'Usage: %s [OPTION]... [URL]...\n' % script_name help += '''\nStartup options: -V | --version Display the version and exit. From 2159e9f11cb16f37e0a987e16e988427acef7cb6 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 23 Jul 2013 09:38:51 +0800 Subject: [PATCH 36/64] Xiami: fix default MIME type (audio/mpeg) --- src/you_get/downloader/xiami.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/downloader/xiami.py b/src/you_get/downloader/xiami.py index a7b06101..bdb480f8 100644 --- a/src/you_get/downloader/xiami.py +++ b/src/you_get/downloader/xiami.py @@ -52,8 +52,8 @@ def xiami_download_song(sid, output_dir = '.', merge = True, info_only = False): url = location_dec(i.getElementsByTagName("location")[0].firstChild.nodeValue) lrc_url = i.getElementsByTagName("lyric")[0].firstChild.nodeValue type, ext, size = url_info(url, faker = True) - if not ext: - ext = 'mp3' + if not type: + type = 'mp3' print_info(site_info, song_title, type, size) if not info_only: From ade1252a06bca7f00c8e27756564371306a222fd Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 23 Jul 2013 09:41:41 +0800 Subject: [PATCH 37/64] Xiami: fix default MIME type (audio/mpeg) --- .gitignore | 1 + src/you_get/downloader/xiami.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 63b93fe6..0a5d13ab 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ _*/ *.3gp *.asf *.flv +*.lrc *.mkv *.mp3 *.mp4 diff --git a/src/you_get/downloader/xiami.py b/src/you_get/downloader/xiami.py index bdb480f8..1c1a65a6 100644 --- a/src/you_get/downloader/xiami.py +++ b/src/you_get/downloader/xiami.py @@ -52,10 +52,10 @@ def xiami_download_song(sid, output_dir = '.', merge = True, info_only = False): url = location_dec(i.getElementsByTagName("location")[0].firstChild.nodeValue) lrc_url = i.getElementsByTagName("lyric")[0].firstChild.nodeValue type, ext, size = url_info(url, faker = True) - if not type: - type = 'mp3' + if not ext: + ext = 'mp3' - print_info(site_info, song_title, type, size) + print_info(site_info, song_title, ext, size) if not info_only: file_name = "%s - %s - %s" % (song_title, album_name, artist) download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True) From 568d40ed322f85e19baab6e7ac383a506d9740be Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 23 Jul 2013 09:45:07 +0800 Subject: [PATCH 38/64] Xiami: support URL pattern http://www.xiami.com/song/detail/id/.* --- src/you_get/downloader/xiami.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/you_get/downloader/xiami.py b/src/you_get/downloader/xiami.py index 1c1a65a6..d0f7cb81 100644 --- a/src/you_get/downloader/xiami.py +++ b/src/you_get/downloader/xiami.py @@ -130,6 +130,10 @@ def xiami_download(url, output_dir = '.', stream_type = None, merge = True, info if re.match('http://www.xiami.com/song/\d+', url): id = r1(r'http://www.xiami.com/song/(\d+)', url) xiami_download_song(id, output_dir, merge, info_only) + + if re.match('http://www.xiami.com/song/detail/id/\d+', url): + id = r1(r'http://www.xiami.com/song/detail/id/(\d+)', url) + xiami_download_song(id, output_dir, merge, info_only) site_info = "Xiami.com" download = xiami_download From d1fa95c38be765a2e5929c031dd77bf10bfd4dda Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 26 Jul 2013 10:36:04 +0800 Subject: [PATCH 39/64] refactor: you_get.__main__ -> you_get.downloader.__main__ --- src/you_get/__init__.py | 12 ++++++------ src/you_get/downloader/__init__.py | 1 + src/you_get/{ => downloader}/__main__.py | 5 ++--- src/you_get/version.py | 5 ++--- you-get | 9 +++++---- you-get.json | 2 +- 6 files changed, 17 insertions(+), 17 deletions(-) rename src/you_get/{ => downloader}/__main__.py (97%) diff --git a/src/you_get/__init__.py b/src/you_get/__init__.py index 0256fcfe..ecca35d2 100644 --- a/src/you_get/__init__.py +++ b/src/you_get/__init__.py @@ -1,9 +1,9 @@ #!/usr/bin/env python -from .processor import * - -from .downloader import * - -from .version import * from .common import * -from .__main__ import * +from .version import * + +# Easy import +#from .cli_wrapper.converter import * +#from .cli_wrapper.player import * +from .downloader import * diff --git a/src/you_get/downloader/__init__.py b/src/you_get/downloader/__init__.py index 644b1bc6..a053ef43 100644 --- a/src/you_get/downloader/__init__.py +++ b/src/you_get/downloader/__init__.py @@ -1,5 +1,6 @@ #!/usr/bin/env python +from .__main__ import * from .acfun import * from .alive import * from .baidu import * diff --git a/src/you_get/__main__.py b/src/you_get/downloader/__main__.py similarity index 97% rename from src/you_get/__main__.py rename to src/you_get/downloader/__main__.py index 00ea834f..81e8bf00 100644 --- a/src/you_get/__main__.py +++ b/src/you_get/downloader/__main__.py @@ -1,9 +1,8 @@ #!/usr/bin/env python - __all__ = ['main', 'any_download', 'any_download_playlist'] -from .downloader import * -from .common import * +from ..downloader import * +from ..common import * def url_to_module(url): site = r1(r'http://([^/]+)/', url) diff --git a/src/you_get/version.py b/src/you_get/version.py index 5f6a824a..b784683f 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,6 +1,5 @@ #!/usr/bin/env python - __all__ = ['__version__', '__date__'] -__version__ = '0.3.18' -__date__ = '2013-07-19' +__version__ = '0.4.0dev' +__date__ = '2013-07-26' diff --git a/you-get b/you-get index 8f88af13..86b44109 100755 --- a/you-get +++ b/you-get @@ -1,9 +1,10 @@ #!/usr/bin/env python3 import os, sys -sys.path.insert(0, os.path.join((os.path.dirname(os.path.realpath(__file__))), "src")) +__path__ = os.path.dirname(os.path.realpath(__file__)) +__srcdir__ = 'src' +sys.path.insert(1, os.path.join(__path__, __srcdir__)) +from you_get.downloader import main -from you_get import * - -if __name__ == "__main__": +if __name__ == '__main__': main() diff --git a/you-get.json b/you-get.json index e7619b96..92114cff 100644 --- a/you-get.json +++ b/you-get.json @@ -31,6 +31,6 @@ ], "console_scripts": [ - "you-get = you_get.__main__:main" + "you-get = you_get.downloader.__main__:main" ] } From 60d0dd768b9a82e18ea2bd568f0b2eb2f1a74a07 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 26 Jul 2013 10:44:51 +0800 Subject: [PATCH 40/64] fix regression d1fa95c --- src/you_get/downloader/__init__.py | 3 ++- tests/test.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/you_get/downloader/__init__.py b/src/you_get/downloader/__init__.py index a053ef43..ffb534e40 100644 --- a/src/you_get/downloader/__init__.py +++ b/src/you_get/downloader/__init__.py @@ -1,6 +1,5 @@ #!/usr/bin/env python -from .__main__ import * from .acfun import * from .alive import * from .baidu import * @@ -38,3 +37,5 @@ from .xiami import * from .yinyuetai import * from .youku import * from .youtube import * + +from .__main__ import * diff --git a/tests/test.py b/tests/test.py index 2a25779b..641878ef 100644 --- a/tests/test.py +++ b/tests/test.py @@ -4,7 +4,7 @@ import unittest from you_get import * -from you_get.__main__ import url_to_module +from you_get.downloader.__main__ import url_to_module def test_urls(urls): for url in urls: From fdccc0f13f4ad1dc807102e9ff0d6f51009c9c5a Mon Sep 17 00:00:00 2001 From: "Lee, Donggu" Date: Fri, 2 Aug 2013 16:25:16 +0700 Subject: [PATCH 41/64] Fixed Tudou --- src/you_get/downloader/tudou.py | 39 +++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/src/you_get/downloader/tudou.py b/src/you_get/downloader/tudou.py index b2b8bc1a..f28da697 100644 --- a/src/you_get/downloader/tudou.py +++ b/src/you_get/downloader/tudou.py @@ -5,26 +5,31 @@ __all__ = ['tudou_download', 'tudou_download_playlist', 'tudou_download_by_id', from ..common import * def tudou_download_by_iid(iid, title, output_dir = '.', merge = True, info_only = False): - xml = get_html('http://v2.tudou.com/v?it=' + iid + '&st=1,2,3,4,99') - + data = json.loads(get_decoded_html('http://www.tudou.com/outplay/goto/getItemSegs.action?iid=%s' % iid)) + vids = [] + for k in data: + if len(data[k]) == 1: + vids.append({"k": data[k][0]["k"], "size": data[k][0]["size"]}) + + temp = max(vids, key=lambda x:x["size"]) + vid, size = temp["k"], temp["size"] + + xml = get_html('http://ct.v2.tudou.com/f?id=%s' % vid) from xml.dom.minidom import parseString doc = parseString(xml) - title = title or doc.firstChild.getAttribute('tt') or doc.firstChild.getAttribute('title') - urls = [(int(n.getAttribute('brt')), n.firstChild.nodeValue.strip()) for n in doc.getElementsByTagName('f')] - - url = max(urls, key = lambda x:x[0])[1] - assert 'f4v' in url - - type, ext, size = url_info(url) - - print_info(site_info, title, type, size) - if not info_only: - #url_save(url, filepath, bar): - download_urls([url], title, ext, total_size = None, output_dir = output_dir, merge = merge) + url = [n.firstChild.nodeValue.strip() for n in doc.getElementsByTagName('f')][0] -def tudou_download_by_id(id, title, output_dir = '.', merge = True, info_only = False): + ext = r1(r'http://[\w.]*/(\w+)/[\w.]*', url) + + print_info(site_info, title, ext, size) + if not info_only: + download_urls([url], title, ext, size, output_dir = output_dir, merge = merge) + +def tudou_download_by_id(id, title, output_dir = '.', merge = True, info_only = False): html = get_html('http://www.tudou.com/programs/view/%s/' % id) - iid = r1(r'iid\s*[:=]\s*(\S+)', html) + + iid = r1(r'iid\s*[:=]\s*(\S+)', html) + title = r1(r'kw\s*[:=]\s*[\'\"]([^\']+?)[\'\"]', html) tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge, info_only = info_only) def tudou_download(url, output_dir = '.', merge = True, info_only = False): @@ -76,4 +81,4 @@ def tudou_download_playlist(url, output_dir = '.', merge = True, info_only = Fal site_info = "Tudou.com" download = tudou_download -download_playlist = tudou_download_playlist +download_playlist = tudou_download_playlist \ No newline at end of file From 11301304f3e2cc461936f47ff1b9822ba5ec66fd Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Tue, 30 Jul 2013 21:09:37 +0800 Subject: [PATCH 42/64] Add support for 5sing --- src/you_get/downloader/__init__.py | 1 + src/you_get/downloader/__main__.py | 1 + src/you_get/downloader/fivesing.py | 18 ++++++++++++++++++ 3 files changed, 20 insertions(+) create mode 100644 src/you_get/downloader/fivesing.py diff --git a/src/you_get/downloader/__init__.py b/src/you_get/downloader/__init__.py index ffb534e40..7a4e315d 100644 --- a/src/you_get/downloader/__init__.py +++ b/src/you_get/downloader/__init__.py @@ -10,6 +10,7 @@ from .coursera import * from .dailymotion import * from .douban import * from .facebook import * +from .fivesing import * from .freesound import * from .google import * from .ifeng import * diff --git a/src/you_get/downloader/__main__.py b/src/you_get/downloader/__main__.py index 81e8bf00..4ce9cbe5 100644 --- a/src/you_get/downloader/__main__.py +++ b/src/you_get/downloader/__main__.py @@ -19,6 +19,7 @@ def url_to_module(url): downloads = { '163': netease, '56': w56, + '5sing': fivesing, 'acfun': acfun, 'baidu': baidu, 'bilibili': bilibili, diff --git a/src/you_get/downloader/fivesing.py b/src/you_get/downloader/fivesing.py new file mode 100644 index 00000000..d9e4b4fd --- /dev/null +++ b/src/you_get/downloader/fivesing.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python + + __all__ = ['fivesing_download'] + +from ..common import * + +def fivesing_download(url, output_dir=".", merge=True, info_only=False): + html = get_html(url) + title = r1(r'var SongName = "(.*)";', html) + url = r1(r'file: "(\S*)"', html) + songtype, ext, size = url_info(url) + print_info(site_info, title, songtype, size) + if not info_only: + download_urls([url], title, ext, size, output_dir) + +site_info = "5sing.com" +download = fivesing_download +download_playlist = playlist_not_supported("5sing") From 82dfbd77ec00de89e4629ec3a9110a3f3093e039 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 3 Aug 2013 18:32:30 +0800 Subject: [PATCH 43/64] remove trailing whitespace --- src/you_get/downloader/fivesing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/downloader/fivesing.py b/src/you_get/downloader/fivesing.py index d9e4b4fd..31d427c0 100644 --- a/src/you_get/downloader/fivesing.py +++ b/src/you_get/downloader/fivesing.py @@ -12,7 +12,7 @@ def fivesing_download(url, output_dir=".", merge=True, info_only=False): print_info(site_info, title, songtype, size) if not info_only: download_urls([url], title, ext, size, output_dir) - + site_info = "5sing.com" download = fivesing_download download_playlist = playlist_not_supported("5sing") From ef66fee099b9473c094d79725191865082f32028 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 3 Aug 2013 18:34:09 +0800 Subject: [PATCH 44/64] remove trailing whitespace --- src/you_get/downloader/fivesing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/downloader/fivesing.py b/src/you_get/downloader/fivesing.py index 31d427c0..2d948af5 100644 --- a/src/you_get/downloader/fivesing.py +++ b/src/you_get/downloader/fivesing.py @@ -1,6 +1,6 @@ #!/usr/bin/env python - __all__ = ['fivesing_download'] +__all__ = ['fivesing_download'] from ..common import * From 2303bb5398cf2793b66d6441fcde0430936ac5ff Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 3 Aug 2013 21:07:42 +0800 Subject: [PATCH 45/64] YouTube: new algo --- src/you_get/downloader/youtube.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/you_get/downloader/youtube.py b/src/you_get/downloader/youtube.py index 3e85ee21..7eecd778 100644 --- a/src/you_get/downloader/youtube.py +++ b/src/you_get/downloader/youtube.py @@ -34,20 +34,28 @@ yt_codecs = [ # Signature decryption algorithm, reused code from youtube-dl def decrypt_signature(s): - if len(s) == 88: + if len(s) == 92: + return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83] + elif len(s) == 90: + return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81] + elif len(s) == 88: return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12] elif len(s) == 87: - return s[62] + s[82:62:-1] + s[83] + s[61:52:-1] + s[0] + s[51:2:-1] + return s[4:23] + s[86] + s[24:85] elif len(s) == 86: - return s[2:63] + s[82] + s[64:82] + s[63] + return s[83:85] + s[26] + s[79:46:-1] + s[85] + s[45:36:-1] + s[30] + s[35:30:-1] + s[46] + s[29:26:-1] + s[82] + s[25:1:-1] elif len(s) == 85: - return s[76] + s[82:76:-1] + s[83] + s[75:60:-1] + s[0] + s[59:50:-1] + s[1] + s[49:2:-1] + return s[2:8] + s[0] + s[9:21] + s[65] + s[22:65] + s[84] + s[66:82] + s[21] elif len(s) == 84: return s[83:36:-1] + s[2] + s[35:26:-1] + s[3] + s[25:3:-1] + s[26] elif len(s) == 83: - return s[:81] + return s[:15] + s[80] + s[16:80] + s[15] elif len(s) == 82: return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34] + elif len(s) == 81: + return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] + elif len(s) == 79: + return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] else: raise Exception('Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s))) From 0614a1a32382d2473c54e6f68ca668fe5a4a7126 Mon Sep 17 00:00:00 2001 From: "Lee, Donggu" Date: Sat, 3 Aug 2013 20:16:22 +0700 Subject: [PATCH 46/64] changed youku url --- src/you_get/downloader/youku.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/downloader/youku.py b/src/you_get/downloader/youku.py index ee9731d0..20c79c4d 100644 --- a/src/you_get/downloader/youku.py +++ b/src/you_get/downloader/youku.py @@ -80,8 +80,8 @@ def parse_page(url): return id2, title def get_info(videoId2): - return json.loads(get_html('http://v.youku.com/player/getPlayList/VideoIDS/' + videoId2)) - + return json.loads(get_html('http://v.youku.com/player/getPlayList/VideoIDS/' + videoId2 + '/timezone/+08/version/5/source/out/Sc/2')) + def find_video(info, stream_type = None): #key = '%s%x' % (info['data'][0]['key2'], int(info['data'][0]['key1'], 16) ^ 0xA55AA5A5) segs = info['data'][0]['segs'] From 82532aa504cb51fb69d97fbf62ba2adde2380ff6 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 7 Aug 2013 14:00:00 +0800 Subject: [PATCH 47/64] prefer ffmpeg_concat_mp4_to_mp4 --- src/you_get/common.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 4a17e05c..8faf907a 100644 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -551,19 +551,18 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None, elif ext == 'mp4': try: - from .processor.join_mp4 import concat_mp4 - concat_mp4(parts, os.path.join(output_dir, title + '.mp4')) - for part in parts: - os.remove(part) - except: from .processor.ffmpeg import has_ffmpeg_installed if has_ffmpeg_installed(): from .processor.ffmpeg import ffmpeg_concat_mp4_to_mp4 ffmpeg_concat_mp4_to_mp4(parts, os.path.join(output_dir, title + '.mp4')) - for part in parts: - os.remove(part) else: - print('No ffmpeg is found. Merging aborted.') + from .processor.join_mp4 import concat_mp4 + concat_mp4(parts, os.path.join(output_dir, title + '.mp4')) + except: + raise + else: + for part in parts: + os.remove(part) else: print("Can't merge %s files" % ext) From 7aa2198adf50548e3f6d23cfddf2e65f296847df Mon Sep 17 00:00:00 2001 From: "Lee, Donggu" Date: Thu, 8 Aug 2013 12:04:34 +0700 Subject: [PATCH 48/64] added ehow.com --- src/you_get/downloader/__init__.py | 1 + src/you_get/downloader/__main__.py | 1 + src/you_get/downloader/ehow.py | 38 ++++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+) create mode 100644 src/you_get/downloader/ehow.py diff --git a/src/you_get/downloader/__init__.py b/src/you_get/downloader/__init__.py index 7a4e315d..0323d016 100644 --- a/src/you_get/downloader/__init__.py +++ b/src/you_get/downloader/__init__.py @@ -9,6 +9,7 @@ from .cntv import * from .coursera import * from .dailymotion import * from .douban import * +from .ehow import * from .facebook import * from .fivesing import * from .freesound import * diff --git a/src/you_get/downloader/__main__.py b/src/you_get/downloader/__main__.py index 4ce9cbe5..594825c5 100644 --- a/src/you_get/downloader/__main__.py +++ b/src/you_get/downloader/__main__.py @@ -28,6 +28,7 @@ def url_to_module(url): 'coursera': coursera, 'dailymotion': dailymotion, 'douban': douban, + 'ehow': ehow, 'facebook': facebook, 'freesound': freesound, 'google': google, diff --git a/src/you_get/downloader/ehow.py b/src/you_get/downloader/ehow.py new file mode 100644 index 00000000..adee6bfc --- /dev/null +++ b/src/you_get/downloader/ehow.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python + +__all__ = ['ehow_download'] + +from ..common import * + +def ehow_download(url, output_dir = '.', merge = True, info_only = False): + + assert re.search(r'http://www.ehow.com/video_', url), "URL you entered is not supported" + + html = get_html(url) + contentid = r1(r'', html) + vid = r1(r'"demand_ehow_videoid":"([^"]+)"', html) + assert vid + + xml = get_html('http://www.ehow.com/services/video/series.xml?demand_ehow_videoid=%s' % vid) + + from xml.dom.minidom import parseString + doc = parseString(xml) + tab = doc.getElementsByTagName('related')[0].firstChild + + for video in tab.childNodes: + if re.search(contentid, video.attributes['link'].value): + url = video.attributes['flv'].value + break + + title = video.attributes['title'].value + assert title + + type, ext, size = url_info(url) + print_info(site_info, title, type, size) + + if not info_only: + download_urls([url], title, ext, size, output_dir, merge = merge) + +site_info = "ehow.com" +download = ehow_download +download_playlist = playlist_not_supported('ehow') \ No newline at end of file From e4eb92d2645b1738c8708e488df18624109e9c25 Mon Sep 17 00:00:00 2001 From: klb3713 Date: Thu, 8 Aug 2013 17:28:36 +0800 Subject: [PATCH 49/64] =?UTF-8?q?=E4=BF=AE=E6=AD=A3pptv=E8=A7=86=E9=A2=91?= =?UTF-8?q?=E5=9C=B0=E5=9D=80=E8=A7=A3=E6=9E=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/you_get/downloader/pptv.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/you_get/downloader/pptv.py b/src/you_get/downloader/pptv.py index 53c8e508..4fd88e5b 100644 --- a/src/you_get/downloader/pptv.py +++ b/src/you_get/downloader/pptv.py @@ -9,18 +9,14 @@ import urllib import hashlib def pptv_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): - xml = get_html('http://web-play.pptv.com/webplay3-151-%s.xml' % id) + xml = get_html('http://web-play.pptv.com/webplay3-0-%s.xml?type=web.fpp' % id) host = r1(r'([^<>]+)', xml) - port = 8080 - st = r1(r'([^<>]+)', xml).encode('utf-8') - key = hashlib.md5(st).hexdigest() # FIXME: incorrect key - rids = re.findall(r'rid="([^"]+)"', xml) + key = r1(r']+>([^<>]+)', xml) rid = r1(r'rid="([^"]+)"', xml) title = r1(r'nm="([^"]+)"', xml) pieces = re.findall(' Date: Thu, 8 Aug 2013 20:18:54 +0400 Subject: [PATCH 50/64] Added Khan Academy support (videos downloaded from YouTube) --- src/you_get/downloader/__init__.py | 1 + src/you_get/downloader/__main__.py | 1 + src/you_get/downloader/khan.py | 15 +++++++++++++++ 3 files changed, 17 insertions(+) create mode 100755 src/you_get/downloader/khan.py diff --git a/src/you_get/downloader/__init__.py b/src/you_get/downloader/__init__.py index 7a4e315d..adaafdf0 100644 --- a/src/you_get/downloader/__init__.py +++ b/src/you_get/downloader/__init__.py @@ -38,5 +38,6 @@ from .xiami import * from .yinyuetai import * from .youku import * from .youtube import * +from .khan import * from .__main__ import * diff --git a/src/you_get/downloader/__main__.py b/src/you_get/downloader/__main__.py index 4ce9cbe5..1fc96e79 100644 --- a/src/you_get/downloader/__main__.py +++ b/src/you_get/downloader/__main__.py @@ -59,6 +59,7 @@ def url_to_module(url): 'youku': youku, 'youtu': youtube, 'youtube': youtube, + 'khanacademy': khan, #TODO } if k in downloads: diff --git a/src/you_get/downloader/khan.py b/src/you_get/downloader/khan.py new file mode 100755 index 00000000..9c4bb40d --- /dev/null +++ b/src/you_get/downloader/khan.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python + +__all__ = ['khan_download'] + +from ..common import * +from .youtube import youtube_download_by_id + +def khan_download(url, output_dir = '.', merge = True, info_only = False): + page = get_html(url) + id = page[page.find('src="https://www.youtube.com/embed/') + len('src="https://www.youtube.com/embed/') :page.find('?enablejsapi=1&wmode=transparent&modestbranding=1&rel=0&fs=1&showinfo=0')] + youtube_download_by_id(id) + +site_info = "khanacademy.org" +download = khan_download +download_playlist = playlist_not_supported('khan') From f21479c0a382840ea0d7fba9e9d4c8eaa6965308 Mon Sep 17 00:00:00 2001 From: David Parunakian Date: Thu, 8 Aug 2013 20:27:54 +0400 Subject: [PATCH 51/64] Added TED.com video downloader Closes #56 --- src/you_get/downloader/__init__.py | 1 + src/you_get/downloader/__main__.py | 1 + src/you_get/downloader/ted.py | 24 ++++++++++++++++++++++++ 3 files changed, 26 insertions(+) create mode 100644 src/you_get/downloader/ted.py diff --git a/src/you_get/downloader/__init__.py b/src/you_get/downloader/__init__.py index 7a4e315d..9be7e3d6 100644 --- a/src/you_get/downloader/__init__.py +++ b/src/you_get/downloader/__init__.py @@ -38,5 +38,6 @@ from .xiami import * from .yinyuetai import * from .youku import * from .youtube import * +from .ted import * from .__main__ import * diff --git a/src/you_get/downloader/__main__.py b/src/you_get/downloader/__main__.py index 4ce9cbe5..2f5cfbf0 100644 --- a/src/you_get/downloader/__main__.py +++ b/src/you_get/downloader/__main__.py @@ -49,6 +49,7 @@ def url_to_module(url): 'sohu': sohu, 'songtaste':songtaste, 'soundcloud': soundcloud, + 'ted': ted, 'tudou': tudou, 'tumblr': tumblr, 'vid48': vid48, diff --git a/src/you_get/downloader/ted.py b/src/you_get/downloader/ted.py new file mode 100644 index 00000000..167da2a8 --- /dev/null +++ b/src/you_get/downloader/ted.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python + +__all__ = ['ted_download'] + +from ..common import * + +def ted_download(url, output_dir = '.', merge = True, info_only = False): + page = get_html(url).split("\n") + for line in page: + if line.find("") > -1: + title = line.replace("<title>", "").replace("", "").replace("\t", "") + title = title[:title.find(' | ')] + if line.find("no-flash-video-download") > -1: + url = line.replace(' Date: Thu, 8 Aug 2013 21:51:52 +0400 Subject: [PATCH 52/64] Fixed a bug: no parameters were getting passed to the youtube_download_by_id() function --- src/you_get/downloader/khan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/downloader/khan.py b/src/you_get/downloader/khan.py index 9c4bb40d..617eec47 100755 --- a/src/you_get/downloader/khan.py +++ b/src/you_get/downloader/khan.py @@ -8,7 +8,7 @@ from .youtube import youtube_download_by_id def khan_download(url, output_dir = '.', merge = True, info_only = False): page = get_html(url) id = page[page.find('src="https://www.youtube.com/embed/') + len('src="https://www.youtube.com/embed/') :page.find('?enablejsapi=1&wmode=transparent&modestbranding=1&rel=0&fs=1&showinfo=0')] - youtube_download_by_id(id) + youtube_download_by_id(id, output_dir=output_dir, merge=merge, info_only=info_only) site_info = "khanacademy.org" download = khan_download From 3a4abe0bc0adf527618748042ff541ac047d6872 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 10 Aug 2013 16:37:51 +0800 Subject: [PATCH 53/64] YouTube: new algo --- src/you_get/downloader/youtube.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/you_get/downloader/youtube.py b/src/you_get/downloader/youtube.py index 7eecd778..fc355833 100644 --- a/src/you_get/downloader/youtube.py +++ b/src/you_get/downloader/youtube.py @@ -38,16 +38,18 @@ def decrypt_signature(s): return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83] elif len(s) == 90: return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81] + elif len(s) == 89: + return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1] elif len(s) == 88: return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12] elif len(s) == 87: - return s[4:23] + s[86] + s[24:85] + return s[83:53:-1] + s[3] + s[52:40:-1] + s[86] + s[39:10:-1] + s[0] + s[9:3:-1] + s[53] elif len(s) == 86: - return s[83:85] + s[26] + s[79:46:-1] + s[85] + s[45:36:-1] + s[30] + s[35:30:-1] + s[46] + s[29:26:-1] + s[82] + s[25:1:-1] + return s[5:20] + s[2] + s[21:] elif len(s) == 85: return s[2:8] + s[0] + s[9:21] + s[65] + s[22:65] + s[84] + s[66:82] + s[21] elif len(s) == 84: - return s[83:36:-1] + s[2] + s[35:26:-1] + s[3] + s[25:3:-1] + s[26] + return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27] elif len(s) == 83: return s[:15] + s[80] + s[16:80] + s[15] elif len(s) == 82: From 11a13bd83a251b9a735ee6c0af112ac8f1b8128b Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 10 Aug 2013 16:57:09 +0800 Subject: [PATCH 54/64] Xiami: do not raise exception if no lyric found --- src/you_get/downloader/xiami.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/you_get/downloader/xiami.py b/src/you_get/downloader/xiami.py index d0f7cb81..fccc584b 100644 --- a/src/you_get/downloader/xiami.py +++ b/src/you_get/downloader/xiami.py @@ -59,7 +59,10 @@ def xiami_download_song(sid, output_dir = '.', merge = True, info_only = False): if not info_only: file_name = "%s - %s - %s" % (song_title, album_name, artist) download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True) - xiami_download_lyric(lrc_url, file_name, output_dir) + try: + xiami_download_lyric(lrc_url, file_name, output_dir) + except: + pass def xiami_download_showcollect(cid, output_dir = '.', merge = True, info_only = False): html = get_html('http://www.xiami.com/song/showcollect/id/' + cid, faker = True) @@ -84,7 +87,10 @@ def xiami_download_showcollect(cid, output_dir = '.', merge = True, info_only = if not info_only: file_name = "%02d.%s - %s - %s" % (track_nr, song_title, artist, album_name) download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True) - xiami_download_lyric(lrc_url, file_name, output_dir) + try: + xiami_download_lyric(lrc_url, file_name, output_dir) + except: + pass track_nr += 1 @@ -111,7 +117,10 @@ def xiami_download_album(aid, output_dir = '.', merge = True, info_only = False) if not info_only: file_name = "%02d.%s" % (track_nr, song_title) download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True) - xiami_download_lyric(lrc_url, file_name, output_dir) + try: + xiami_download_lyric(lrc_url, file_name, output_dir) + except: + pass if not pic_exist: xiami_download_pic(pic_url, 'cover', output_dir) pic_exist = True From dceede85d310a43158e11db010de540178fca8f6 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 15 Aug 2013 19:29:12 +0800 Subject: [PATCH 55/64] YouTube: new algo --- src/you_get/downloader/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/downloader/youtube.py b/src/you_get/downloader/youtube.py index fc355833..bd655198 100644 --- a/src/you_get/downloader/youtube.py +++ b/src/you_get/downloader/youtube.py @@ -51,7 +51,7 @@ def decrypt_signature(s): elif len(s) == 84: return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27] elif len(s) == 83: - return s[:15] + s[80] + s[16:80] + s[15] + return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0] elif len(s) == 82: return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34] elif len(s) == 81: From 9df4d108afae8711f87affdccfa0e8567211e703 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 15 Aug 2013 21:51:22 +0800 Subject: [PATCH 56/64] version 0.3.20 --- CHANGELOG.txt | 13 +++++++++++++ README.md | 8 ++++++++ README.txt | 4 ++++ src/you_get/version.py | 4 ++-- 4 files changed, 27 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index d3c75e3e..d1ae3709 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,6 +1,19 @@ Changelog ========= +0.3.20 +------ + +*Date: 2013-08-16* + +* Add support for: + - eHow + - Khan Academy + - TED + - 5sing +* Fix issues for: + - Tudou + 0.3.18 ------ diff --git a/README.md b/README.md index c8f6bf05..52d5d30c 100644 --- a/README.md +++ b/README.md @@ -17,9 +17,12 @@ Fork me on GitHub: * Coursera * Blip * Dailymotion +* eHow * Facebook * Google+ * Google Drive +* Khan Academy +* TED * Tumblr * Vine * Instagram @@ -47,6 +50,7 @@ Fork me on GitHub: * Sohu (搜狐视频) * 56 (56网) * Xiami (虾米) +* 5sing * Baidu Music (百度音乐) * Baidu Wangpan (百度网盘) * SongTaste @@ -235,9 +239,12 @@ You-Get基于优酷下载脚本[iambus/youku-lixian](https://github.com/iambus/y * Coursera * Blip * Dailymotion +* eHow * Facebook * Google+ * Google Drive +* Khan Academy +* TED * Tumblr * Vine * Instagram @@ -265,6 +272,7 @@ You-Get基于优酷下载脚本[iambus/youku-lixian](https://github.com/iambus/y * 搜狐视频 * 56网 * 虾米 +* 5sing * 百度音乐 * 百度网盘 * SongTaste diff --git a/README.txt b/README.txt index c544103a..7bd8a1ee 100644 --- a/README.txt +++ b/README.txt @@ -20,9 +20,12 @@ Supported Sites (As of Now) * Coursera https://www.coursera.org * Blip http://blip.tv * Dailymotion http://dailymotion.com +* eHow http://www.ehow.com * Facebook http://facebook.com * Google+ http://plus.google.com * Google Drive http://docs.google.com +* Khan Academy http://www.khanacademy.org +* TED http://www.ted.com * Tumblr http://www.tumblr.com * Vine http://vine.co * Instagram http://instagram.com @@ -50,6 +53,7 @@ Supported Sites (As of Now) * Sohu (搜狐视频) http://tv.sohu.com * 56 (56网) http://www.56.com * Xiami (虾米) http://www.xiami.com +* 5sing http://www.5sing.com * Baidu Music (百度音乐) http://music.baidu.com * Baidu Wangpan (百度网盘) http://pan.baidu.com * SongTaste http://www.songtaste.com diff --git a/src/you_get/version.py b/src/you_get/version.py index b784683f..ee61648a 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,5 +1,5 @@ #!/usr/bin/env python __all__ = ['__version__', '__date__'] -__version__ = '0.4.0dev' -__date__ = '2013-07-26' +__version__ = '0.3.20' +__date__ = '2013-08-16' From b643a6c6a53446be8e55f521058e85555c2841c7 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 16 Aug 2013 01:06:25 +0800 Subject: [PATCH 57/64] pan.baidu.com: fixed --- src/you_get/downloader/baidu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/downloader/baidu.py b/src/you_get/downloader/baidu.py index 405e0b78..79d7053d 100755 --- a/src/you_get/downloader/baidu.py +++ b/src/you_get/downloader/baidu.py @@ -75,7 +75,7 @@ def baidu_download(url, output_dir = '.', stream_type = None, merge = True, info if len(title.split('.')) > 1: title = ".".join(title.split('.')[:-1]) - real_url = r1(r'href="([^"]+)" id="downFileButtom"', html).replace('&', '&') + real_url = r1(r'\\"dlink\\":\\"([^"]*)\\"', html).replace('\\\\/', '/') type, ext, size = url_info(real_url, faker = True) print_info(site_info, title, ext, size) From 66d82c9b5115e9853e119e7bf363dccfc96ecaf4 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 16 Aug 2013 17:38:06 +0800 Subject: [PATCH 58/64] YinYueTai: fixed --- src/you_get/downloader/yinyuetai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/downloader/yinyuetai.py b/src/you_get/downloader/yinyuetai.py index e243eb3f..2048aed9 100644 --- a/src/you_get/downloader/yinyuetai.py +++ b/src/you_get/downloader/yinyuetai.py @@ -23,7 +23,7 @@ def yinyuetai_download(url, output_dir = '.', merge = True, info_only = False): id = r1(r'http://www.yinyuetai.com/video/(\d+)$', url) assert id html = get_html(url, 'utf-8') - title = r1(r'', html) + title = r1(r'', html) assert title title = parse.unquote(title) title = escape_file_path(title) From 657c1e20c25973c8d638d06ddecb5300820903cb Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 17 Aug 2013 15:11:03 +0800 Subject: [PATCH 59/64] YouTube: bite me --- src/you_get/downloader/youtube.py | 57 +++++++++++++++---------------- 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/src/you_get/downloader/youtube.py b/src/you_get/downloader/youtube.py index bd655198..51fbb07f 100644 --- a/src/you_get/downloader/youtube.py +++ b/src/you_get/downloader/youtube.py @@ -32,34 +32,30 @@ yt_codecs = [ {'itag': 17, 'container': '3GP', 'video_resolution': '144p', 'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.05', 'audio_encoding': 'AAC', 'audio_bitrate': '24'}, ] -# Signature decryption algorithm, reused code from youtube-dl -def decrypt_signature(s): - if len(s) == 92: - return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83] - elif len(s) == 90: - return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81] - elif len(s) == 89: - return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1] - elif len(s) == 88: - return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12] - elif len(s) == 87: - return s[83:53:-1] + s[3] + s[52:40:-1] + s[86] + s[39:10:-1] + s[0] + s[9:3:-1] + s[53] - elif len(s) == 86: - return s[5:20] + s[2] + s[21:] - elif len(s) == 85: - return s[2:8] + s[0] + s[9:21] + s[65] + s[22:65] + s[84] + s[66:82] + s[21] - elif len(s) == 84: - return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27] - elif len(s) == 83: - return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0] - elif len(s) == 82: - return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34] - elif len(s) == 81: - return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] - elif len(s) == 79: - return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] - else: - raise Exception('Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s))) +def decipher(js, s): + def tr_js(code): + code = re.sub(r'function', r'def', code) + code = re.sub(r'\{', r':\n\t', code) + code = re.sub(r'\}', r'\n', code) + code = re.sub(r'var\s+', r'', code) + code = re.sub(r'(\w+).join\(""\)', r'"".join(\1)', code) + code = re.sub(r'(\w+).length', r'len(\1)', code) + code = re.sub(r'(\w+).reverse\(\)', r'\1[::-1]', code) + code = re.sub(r'(\w+).slice\((\d+)\)', r'\1[\2:]', code) + code = re.sub(r'(\w+).split\(""\)', r'list(\1)', code) + return code + + f1 = match1(js, r'g.sig\|\|(\w+)\(g.s\)') + f1def = match1(js, r'(function %s\(\w+\)\{[^\{]+\})' % f1) + code = tr_js(f1def) + f2 = match1(f1def, r'(\w+)\(\w+,\d+\)') + if f2 is not None: + f2def = match1(js, r'(function %s\(\w+,\w+\)\{[^\{]+\})' % f2) + code = code + 'global %s\n' % f2 + tr_js(f2def) + + code = code + 'sig=%s(s)' % f1 + exec(code, globals(), locals()) + return locals()['sig'] def youtube_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False): """Downloads a YouTube video by its unique id. @@ -79,6 +75,8 @@ def youtube_download_by_id(id, title=None, output_dir='.', merge=True, info_only title = ytplayer_config['args']['title'] stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',') + + html5player = ytplayer_config['assets']['js'] streams = { parse.parse_qs(stream)['itag'][0] : parse.parse_qs(stream) @@ -95,7 +93,8 @@ def youtube_download_by_id(id, title=None, output_dir='.', merge=True, info_only if 'sig' in download_stream: sig = download_stream['sig'][0] else: - sig = decrypt_signature(download_stream['s'][0]) + js = get_content(html5player) + sig = decipher(js, download_stream['s'][0]) url = '%s&signature=%s' % (url, sig) type, ext, size = url_info(url) From 6386f1bbf3452161d03acfb1c3fabc41c550989e Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 17 Aug 2013 15:15:28 +0800 Subject: [PATCH 60/64] version 0.3.21 --- CHANGELOG.txt | 10 ++++++++++ src/you_get/version.py | 4 ++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index d1ae3709..67cbb1fb 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,6 +1,16 @@ Changelog ========= +0.3.21 +------ + +*Date: 2013-08-17* + +* Fix issues for: + - YouTube + - YinYueTai + - pan.baidu.com + 0.3.20 ------ diff --git a/src/you_get/version.py b/src/you_get/version.py index ee61648a..43c2747b 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,5 +1,5 @@ #!/usr/bin/env python __all__ = ['__version__', '__date__'] -__version__ = '0.3.20' -__date__ = '2013-08-16' +__version__ = '0.3.21' +__date__ = '2013-08-17' From 6c537e86b83f8b38c50a01ce54968213426aa58d Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 21 Aug 2013 06:06:44 +0200 Subject: [PATCH 61/64] Tumblr: fix title --- src/you_get/downloader/tumblr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/downloader/tumblr.py b/src/you_get/downloader/tumblr.py index d3f8d374..a4c437d9 100644 --- a/src/you_get/downloader/tumblr.py +++ b/src/you_get/downloader/tumblr.py @@ -10,7 +10,7 @@ def tumblr_download(url, output_dir = '.', merge = True, info_only = False): html = get_html(url) html = parse.unquote(html).replace('\/', '/') - title = unescape_html(r1(r'', html)) + title = unescape_html(r1(r'', html) or r1(r'(.*)', html)) real_url = r1(r'source src=\\x22([^\\]+)\\', html) if not real_url: real_url = r1(r'audio_file=([^&]+)&', html) + '?plead=please-dont-download-this-or-our-lawyers-wont-let-us-host-audio' From 58a2dc90eecb81a1f177db4867812a89d1491421 Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Wed, 21 Aug 2013 11:01:18 +0200 Subject: [PATCH 62/64] Nicovideo: fix --- src/you_get/downloader/nicovideo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/downloader/nicovideo.py b/src/you_get/downloader/nicovideo.py index 144c02a1..7d384f31 100644 --- a/src/you_get/downloader/nicovideo.py +++ b/src/you_get/downloader/nicovideo.py @@ -23,7 +23,7 @@ def nicovideo_download(url, output_dir = '.', merge = True, info_only = False): nicovideo_login(user, password) html = get_html(url) # necessary! - title = unicodize(r1(r'title:\s*\'(.*)\',', html)) + title = unicodize(r1(r'<span class="videoHeaderTitle">([^<]+)</span>', html)) api_html = get_html('http://www.nicovideo.jp/api/getflv?v=%s' % url.split('/')[-1]) real_url = parse.unquote(r1(r'url=([^&]+)&', api_html)) From 70cd7b3bd9fb32b422bd62b2bfa853720face197 Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Thu, 22 Aug 2013 22:25:51 +0200 Subject: [PATCH 63/64] Tumblr: fix title --- src/you_get/downloader/tumblr.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/you_get/downloader/tumblr.py b/src/you_get/downloader/tumblr.py index a4c437d9..8a2e2ed1 100644 --- a/src/you_get/downloader/tumblr.py +++ b/src/you_get/downloader/tumblr.py @@ -10,7 +10,9 @@ def tumblr_download(url, output_dir = '.', merge = True, info_only = False): html = get_html(url) html = parse.unquote(html).replace('\/', '/') - title = unescape_html(r1(r'<meta property="og:title" content="([^"]*)" />', html) or r1(r'<title>(.*)', html)) + title = unescape_html(r1(r'<meta property="og:title" content="([^"]*)" />', html) or + r1(r'<meta property="og:description" content="([^"]*)" />', html) or + r1(r'<title>([^<\n]*)', html)).replace('\n', '') real_url = r1(r'source src=\\x22([^\\]+)\\', html) if not real_url: real_url = r1(r'audio_file=([^&]+)&', html) + '?plead=please-dont-download-this-or-our-lawyers-wont-let-us-host-audio' From 6f77174f3495bfc01e3d88631a23187fb6ae1138 Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Sat, 24 Aug 2013 11:19:23 +0200 Subject: [PATCH 64/64] YinYueTai: fix --- src/you_get/downloader/yinyuetai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/downloader/yinyuetai.py b/src/you_get/downloader/yinyuetai.py index 2048aed9..1249845c 100644 --- a/src/you_get/downloader/yinyuetai.py +++ b/src/you_get/downloader/yinyuetai.py @@ -20,7 +20,7 @@ def yinyuetai_download_by_id(id, title = None, output_dir = '.', merge = True, i download_urls([url], title, ext, size, output_dir, merge = merge) def yinyuetai_download(url, output_dir = '.', merge = True, info_only = False): - id = r1(r'http://www.yinyuetai.com/video/(\d+)$', url) + id = r1(r'http://\w+.yinyuetai.com/video/(\d+)$', url) assert id html = get_html(url, 'utf-8') title = r1(r'<meta property="og:title"\s+content="([^"]+)"/>', html)