diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 67cbb1fb..d0409f10 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,6 +1,42 @@ Changelog ========= +0.3.24 +------ + +*Date: 2013-10-30* + +* Experimental: Sogou proxy server +* Fix issues for: + - Vimeo + +0.3.23 +------ + +*Date: 2013-10-23* + +* Support YouTube playlists +* Support general short URLs +* Fix issues for: + - Sina + +0.3.22 +------ + +*Date: 2013-10-18* + +* Fix issues for: + - Baidu + - Bilibili + - JPopsuki TV + - Niconico + - PPTV + - TED + - Tumblr + - YinYueTai + - YouTube + - ... + 0.3.21 ------ diff --git a/README.md b/README.md index 52d5d30c..d8a89bae 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # You-Get -[![Build Status](https://api.travis-ci.org/soimort/you-get.png)](https://travis-ci.org/soimort/you-get) +[![Build Status](https://api.travis-ci.org/soimort/you-get.png)](https://travis-ci.org/soimort/you-get) [![PyPI version](https://badge.fury.io/py/you-get.png)](http://badge.fury.io/py/you-get) [You-Get](https://github.com/soimort/you-get) is a video downloader runs on Python 3. It aims at easing the download of videos on [YouTube](http://www.youtube.com), [Youku](http://www.youku.com)/[Tudou](http://www.tudou.com) (biggest online video providers in China), [ Niconico](http://www.nicovideo.jp), etc., in one script. @@ -8,6 +8,8 @@ See the project homepage for further documentat Fork me on GitHub: +__中文说明__已移至[wiki](https://github.com/soimort/you-get/wiki/%E4%B8%AD%E6%96%87%E8%AF%B4%E6%98%8E)。 + ## Features ### Supported Sites (As of Now) @@ -29,6 +31,7 @@ Fork me on GitHub: * SoundCloud * Mixcloud * Freesound +* JPopsuki * VID48 * Niconico (ニコニコ動画) * Youku (优酷) @@ -64,23 +67,15 @@ Fork me on GitHub: ## Installation -### 1. Install via [Pip](http://www.pip-installer.org/): +### 1. Install via Pip: - $ pip install you-get + $ [sudo] pip install you-get Check if the installation was successful: $ you-get -V -### 2. Install via [EasyInstall](http://pypi.python.org/pypi/setuptools): - - $ easy_install you-get - - Check if the installation was successful: - - $ you-get -V - -### 3. Install from Git: +### 2. Install from Git: $ git clone git://github.com/soimort/you-get.git @@ -97,7 +92,7 @@ Fork me on GitHub: $ you-get -V -### 4. Direct download (from ): +### 3. Direct download (from ): $ wget -O you-get.zip https://github.com/soimort/you-get/zipball/master $ unzip you-get.zip @@ -115,27 +110,19 @@ Fork me on GitHub: $ you-get -V -### 5. Install from [AUR (Arch User Repository)](http://aur.archlinux.org/): +### 4. Install from your distro's repo: - Click [here](https://aur.archlinux.org/packages.php\?ID=62576). +* __AUR (Arch)__: -### Upgrading: +* __Overlay (Gentoo)__: + +## Upgrading Using Pip: - $ pip install --upgrade you-get + $ [sudo] pip install --upgrade you-get -### FAQ (For Windows Users): - -* Q: I don't know how to install it on Windows. - -* A: Then don't do it. Just put your `you-get` folder into system `%PATH%`. - -* Q: I got something like `UnicodeDecodeError: 'gbk' codec can't decode byte 0xb0 in position 1012: illegal multibyte sequence`. - -* A: Run `set PYTHONIOENCODING=utf-8`. - -## Examples (For End-Users) +## Examples Display the information of the video without downloading: @@ -172,31 +159,23 @@ By default, Python will apply the system proxy settings (i.e. environment variab For a complete list of all available options, see: $ you-get --help - -## Examples (For Developers) - -In Python 3 (interactive): - - >>> from you_get.downloader import * - >>> youtube.download("http://www.youtube.com/watch?v=8bQlxQJEzLk", info_only = True) - Video Site: YouTube.com - Title: If you're good at something, never do it for free! - Type: WebM video (video/webm) - Size: 0.13 MB (133176 Bytes) + Usage: you-get [OPTION]... [URL]... - >>> import you_get - >>> you_get.any_download("http://www.youtube.com/watch?v=sGwy8DsUJ4M") - Video Site: YouTube.com - Title: Mort from Madagascar LIKES - Type: WebM video (video/webm) - Size: 1.78 MB (1867072 Bytes) + Startup options: + -V | --version Display the version and exit. + -h | --help Print this help and exit. - Downloading Mort from Madagascar LIKES.webm ... - 100.0% ( 1.8/1.8 MB) [========================================] 1/1 - -## API Reference - -See source code. + Download options (use with URLs): + -f | --force Force overwriting existed files. + -i | --info Display the information of videos without downloading. + -u | --url Display the real URLs of videos without downloading. + -n | --no-merge Don't merge video parts. + -o | --output-dir Set the output directory for downloaded videos. + -x | --http-proxy Use specific HTTP proxy for downloading. + --no-proxy Don't use any proxy. (ignore $http_proxy) + -S | --sogou Use a Sogou proxy server for downloading. + --sogou-proxy Run a standalone Sogou proxy server. + --debug Show traceback on KeyboardInterrupt. ## License @@ -205,227 +184,3 @@ You-Get is licensed under the [MIT license](https://raw.github.com/soimort/you-g ## Contributing Please see [CONTRIBUTING.md](https://github.com/soimort/you-get/blob/master/CONTRIBUTING.md). - - - -*** - - - -# You-Get - 中文说明 - -[You-Get](https://github.com/soimort/you-get)是一个基于Python 3的视频下载工具。之所以写它的主要原因是,我找不到一个现成的下载工具能够同时支持[YouTube](http://www.youtube.com/)和[优酷](http://www.youku.com/);而且,几乎所有以前的视频下载程序都是基于Python 2的。 - -项目主页: - -GitHub地址: - -## 特点 - -### 说明 - -You-Get基于优酷下载脚本[iambus/youku-lixian](https://github.com/iambus/youku-lixian)用Python 3改写而成,增加了以下功能: - -* 支持YouTube、Vimeo等国外视频网站 -* 支持断点续传 -* 可设置HTTP代理 - -### 支持的站点(截至目前) - -已实现对以下站点的支持,以后会陆续增加(・∀・) - -* YouTube -* Vimeo -* Coursera -* Blip -* Dailymotion -* eHow -* Facebook -* Google+ -* Google Drive -* Khan Academy -* TED -* Tumblr -* Vine -* Instagram -* SoundCloud -* Mixcloud -* Freesound -* VID48 -* NICONICO动画 -* 优酷 -* 土豆 -* 音悦台 -* AcFun -* bilibili -* CNTV -* 豆瓣 -* 凤凰视频 -* 爱奇艺 -* 激动网 -* 酷6网 -* MioMio -* 网易视频 -* PPTV -* 腾讯视频 -* 新浪视频 -* 搜狐视频 -* 56网 -* 虾米 -* 5sing -* 百度音乐 -* 百度网盘 -* SongTaste -* Alive.in.th - -## 依赖 - -* [Python 3](http://www.python.org/download/releases/) -* __(可选)__ [FFmpeg](http://ffmpeg.org) - * 用于转换与合并视频文件。 - -## 安装说明 - -(以下命令格式均以Linux shell为例) - -### 1. 通过[Pip](http://www.pip-installer.org/)安装: - - $ pip install you-get - - 检查安装是否成功: - - $ you-get -V - -### 2. 通过[EasyInstall](http://pypi.python.org/pypi/setuptools)安装: - - $ easy_install you-get - - 检查安装是否成功: - - $ you-get -V - -### 3. 从Git安装: - - $ git clone git://github.com/soimort/you-get.git - - 在不安装的情况下直接使用脚本: - - $ cd you-get/ - $ ./you-get -V - - 若要将Python package安装到系统默认路径,执行: - - $ make install - - 检查安装是否成功: - - $ you-get -V - -### 4. 直接下载(从): - - $ wget -O you-get.zip https://github.com/soimort/you-get/zipball/master - $ unzip you-get.zip - - 在不安装的情况下直接使用脚本: - - $ cd soimort-you-get-*/ - $ ./you-get -V - - 若要将Python package安装到系统默认路径,执行: - - $ make install - - 检查安装是否成功: - - $ you-get -V - -### 5. 从[AUR (Arch User Repository)](http://aur.archlinux.org/)安装: - - 点击[这里](https://aur.archlinux.org/packages.php\?ID=62576)。 - -### 升级: - -使用Pip: - - $ pip install --upgrade you-get - -### FAQ(针对Windows用户): - -* Q:我不知道该如何在Windows下安装。 - -* A:不需要安装。直接把`you-get`目录放到系统`%PATH%`中。 - -* Q:出现错误提示`UnicodeDecodeError: 'gbk' codec can't decode byte 0xb0 in position 1012: illegal multibyte sequence`。 - -* A:执行`set PYTHONIOENCODING=utf-8`。 - -## 使用方法示例 - -### 如何下载视频 - -显示视频信息,但不进行下载(`-i`或`--info`选项): - - $ you-get -i http://www.yinyuetai.com/video/463772 - -下载视频: - - $ you-get http://www.yinyuetai.com/video/463772 - -下载多个视频: - - $ you-get http://www.yinyuetai.com/video/463772 http://www.yinyuetai.com/video/471500 - -若当前目录下已有与视频标题同名的文件,下载时会自动跳过。若有同名的`.download`临时文件,程序会从上次中断处开始下载。 -如要强制重新下载该视频,可使用`-f`(`--force`)选项: - - $ you-get -f http://www.yinyuetai.com/video/463772 - -`-l`(`--playlist`)选项用于下载播放列表(只对某些网站适用): - - $ you-get -l http://www.youku.com/playlist_show/id_5344313.html - -__注:从0.1.3以后的版本起,`-l`选项不再必须。You-Get可以自动识别并处理播放列表的下载。__ - -指定视频文件的下载目录: - - $ you-get -o ~/Downloads http://www.yinyuetai.com/video/463772 - -显示详细帮助: - - $ you-get -h - -### 如何设置代理 - -默认情况下,Python自动使用系统的代理配置。可以通过环境变量`http_proxy`来设置系统的HTTP代理。 - -`-x`(`--http-proxy`)选项用于手动指定You-Get所使用的HTTP代理。例如:GoAgent的代理服务器是`http://127.0.0.1:8087`,则通过该代理下载某YouTube视频的命令是: - - $ you-get -x 127.0.0.1:8087 http://www.youtube.com/watch?v=KbtO_Ayjw0M - -Windows下的自由门等翻墙软件会自动设置系统全局代理,因此无需指定HTTP代理即可下载YouTube视频: - - $ you-get http://www.youtube.com/watch?v=KbtO_Ayjw0M - -如果不希望程序在下载过程中使用任何代理(包括系统的代理配置),可以显式地指定`--no-proxy`选项: - - $ you-get --no-proxy http://v.youku.com/v_show/id_XMjI0ODc1NTc2.html - -### 断点续传 - -下载未完成时被中止(因为`Ctrl+C`终止程序或者网络中断等原因),在目标路径中会有一个扩展名为`.download`的临时文件。 - -下次运行只要在目标路径中找到相应的`.download`临时文件,程序会自动从中断处继续下载。(除非指定了`-f`选项) - -## 使用Python 2? - -优酷等国内视频网站的下载,请移步:[iambus/youku-lixian](https://github.com/iambus/youku-lixian) - -YouTube等国外视频网站的下载,请移步:[rg3/youtube-dl](https://github.com/rg3/youtube-dl) - -## 许可证 - -You-Get在[MIT License](https://raw.github.com/soimort/you-get/master/LICENSE.txt)下发布。 - -## 如何参与贡献 / 报告issue - -请阅读 [CONTRIBUTING.md](https://github.com/soimort/you-get/blob/master/CONTRIBUTING.md)。 diff --git a/README.txt b/README.txt index 7bd8a1ee..02a9408e 100644 --- a/README.txt +++ b/README.txt @@ -3,6 +3,8 @@ You-Get .. image:: https://api.travis-ci.org/soimort/you-get.png +.. image:: https://badge.fury.io/py/you-get.png + `You-Get `_ is a video downloader runs on Python 3. It aims at easing the download of videos on `YouTube `_, `Youku `_/`Tudou `_ (biggest online video providers in China), `Niconico `_, etc., in one script. See the project homepage http://www.soimort.org/you-get for further documentation. @@ -32,6 +34,7 @@ Supported Sites (As of Now) * SoundCloud http://soundcloud.com * Mixcloud http://www.mixcloud.com * Freesound http://www.freesound.org +* JPopsuki http://jpopsuki.tv * VID48 http://vid48.com * Niconico (ニコニコ動画) http://www.nicovideo.jp * Youku (优酷) http://www.youku.com @@ -69,17 +72,9 @@ Dependencies Installation ------------ -#) Install via `Pip `_:: +#) Install via Pip:: - $ pip install you-get - - Check if the installation was successful:: - - $ you-get -V - -#) Install via `EasyInstall `_:: - - $ easy_install you-get + $ [sudo] pip install you-get Check if the installation was successful:: @@ -120,12 +115,21 @@ Installation $ you-get -V -#) Install from `AUR (Arch User Repository) `_: +#) Install from your distro's repo: - Click `here `_. +* `AUR (Arch) `_ -Examples (For End-Users) ------------------------- +* `Overlay (Gentoo) `_ + +Upgrading +--------- + +Using Pip:: + + $ [sudo] pip install --upgrade you-get + +Examples +-------- Display the information of the video without downloading:: @@ -163,33 +167,23 @@ Command-Line Options For a complete list of all available options, see:: $ you-get --help - -Examples (For Developers) -------------------------- - -In Python 3 (interactive):: - - >>> from you_get.downloader import * - >>> youtube.download("http://www.youtube.com/watch?v=8bQlxQJEzLk", info_only = True) - Video Site: YouTube.com - Title: If you're good at something, never do it for free! - Type: WebM video (video/webm) - Size: 0.13 MB (133176 Bytes) + Usage: you-get [OPTION]... [URL]... - >>> import you_get - >>> you_get.any_download("http://www.youtube.com/watch?v=sGwy8DsUJ4M") - Video Site: YouTube.com - Title: Mort from Madagascar LIKES - Type: WebM video (video/webm) - Size: 1.78 MB (1867072 Bytes) + Startup options: + -V | --version Display the version and exit. + -h | --help Print this help and exit. - Downloading Mort from Madagascar LIKES.webm ... - 100.0% ( 1.8/1.8 MB) [========================================] 1/1 - -API Reference -------------- - -See source code. + Download options (use with URLs): + -f | --force Force overwriting existed files. + -i | --info Display the information of videos without downloading. + -u | --url Display the real URLs of videos without downloading. + -n | --no-merge Don't merge video parts. + -o | --output-dir Set the output directory for downloaded videos. + -x | --http-proxy Use specific HTTP proxy for downloading. + --no-proxy Don't use any proxy. (ignore $http_proxy) + -S | --sogou Use a Sogou proxy server for downloading. + --sogou-proxy Run a standalone Sogou proxy server. + --debug Show traceback on KeyboardInterrupt. License ------- diff --git a/src/you_get/__init__.py b/src/you_get/__init__.py index ecca35d2..f8ee6011 100644 --- a/src/you_get/__init__.py +++ b/src/you_get/__init__.py @@ -3,7 +3,5 @@ from .common import * from .version import * -# Easy import -#from .cli_wrapper.converter import * -#from .cli_wrapper.player import * -from .downloader import * +from .cli_wrapper import * +from .extractor import * diff --git a/src/you_get/cli_wrapper/__init__.py b/src/you_get/cli_wrapper/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/you_get/cli_wrapper/downloader/__init__.py b/src/you_get/cli_wrapper/downloader/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/you_get/cli_wrapper/openssl/__init__.py b/src/you_get/cli_wrapper/openssl/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/you_get/cli_wrapper/player/__init__.py b/src/you_get/cli_wrapper/player/__init__.py new file mode 100644 index 00000000..2f7636de --- /dev/null +++ b/src/you_get/cli_wrapper/player/__init__.py @@ -0,0 +1,3 @@ +#!/usr/bin/env python + +from .mplayer import * diff --git a/src/you_get/cli_wrapper/player/__main__.py b/src/you_get/cli_wrapper/player/__main__.py new file mode 100644 index 00000000..8d4958b9 --- /dev/null +++ b/src/you_get/cli_wrapper/player/__main__.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python + +def main(): + script_main('you-get', any_download, any_download_playlist) + +if __name__ == "__main__": + main() diff --git a/src/you_get/cli_wrapper/player/dragonplayer.py b/src/you_get/cli_wrapper/player/dragonplayer.py new file mode 100644 index 00000000..e69de29b diff --git a/src/you_get/cli_wrapper/player/gnome_mplayer.py b/src/you_get/cli_wrapper/player/gnome_mplayer.py new file mode 100644 index 00000000..e69de29b diff --git a/src/you_get/cli_wrapper/player/mplayer.py b/src/you_get/cli_wrapper/player/mplayer.py new file mode 100644 index 00000000..e69de29b diff --git a/src/you_get/cli_wrapper/player/vlc.py b/src/you_get/cli_wrapper/player/vlc.py new file mode 100644 index 00000000..4265cc3e --- /dev/null +++ b/src/you_get/cli_wrapper/player/vlc.py @@ -0,0 +1 @@ +#!/usr/bin/env python diff --git a/src/you_get/cli_wrapper/player/wmp.py b/src/you_get/cli_wrapper/player/wmp.py new file mode 100644 index 00000000..e69de29b diff --git a/src/you_get/cli_wrapper/transcoder/__init__.py b/src/you_get/cli_wrapper/transcoder/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/you_get/cli_wrapper/transcoder/ffmpeg.py b/src/you_get/cli_wrapper/transcoder/ffmpeg.py new file mode 100644 index 00000000..e69de29b diff --git a/src/you_get/cli_wrapper/transcoder/libav.py b/src/you_get/cli_wrapper/transcoder/libav.py new file mode 100644 index 00000000..e69de29b diff --git a/src/you_get/cli_wrapper/transcoder/mencoder.py b/src/you_get/cli_wrapper/transcoder/mencoder.py new file mode 100644 index 00000000..e69de29b diff --git a/src/you_get/common.py b/src/you_get/common.py index 8faf907a..25df1090 100644 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -8,11 +8,15 @@ import re import sys from urllib import request, parse import platform +import threading from .version import __version__ +from .util import log, legitimize, sogou_proxy_server dry_run = False force = False +sogou_proxy = None +sogou_env = None fake_headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', @@ -85,12 +89,15 @@ def parse_query_param(url, param): The value of the parameter. """ - return parse.parse_qs(parse.urlparse(url).query)[param][0] + try: + return parse.parse_qs(parse.urlparse(url).query)[param][0] + except: + return None def unicodize(text): return re.sub(r'\\u([0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])', lambda x: chr(int(x.group(0)[2:], 16)), text) -# DEPRECATED in favor of filenameable() +# DEPRECATED in favor of util.legitimize() def escape_file_path(path): path = path.replace('/', '-') path = path.replace('\\', '-') @@ -98,6 +105,7 @@ def escape_file_path(path): path = path.replace('?', '-') return path +# DEPRECATED in favor of util.legitimize() def filenameable(text): """Converts a string to a legal filename through various OSes. """ @@ -106,11 +114,7 @@ def filenameable(text): 0: None, ord('/'): '-', }) - if platform.system() == 'Darwin': # For Mac OS - text = text.translate({ - ord(':'): '-', - }) - elif platform.system() == 'Windows': # For Windows + if platform.system() == 'Windows': # For Windows text = text.translate({ ord(':'): '-', ord('*'): '-', @@ -124,6 +128,13 @@ def filenameable(text): ord('['): '(', ord(']'): ')', }) + else: + if text.startswith("."): + text = text[1:] + if platform.system() == 'Darwin': # For Mac OS + text = text.translate({ + ord(':'): '-', + }) return text def unescape_html(html): @@ -146,7 +157,8 @@ def undeflate(data): (the zlib compression is used.) """ import zlib - return zlib.decompress(data, -zlib.MAX_WBITS) + decompressobj = zlib.decompressobj(-zlib.MAX_WBITS) + return decompressobj.decompress(data)+decompressobj.flush() # DEPRECATED in favor of get_content() def get_response(url, faker = False): @@ -501,7 +513,7 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None, traceback.print_exc(file = sys.stdout) pass - title = filenameable(title) + title = legitimize(title) filename = '%s.%s' % (title, ext) filepath = os.path.join(output_dir, filename) @@ -577,7 +589,7 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir = '.', refer assert ext in ('ts') - title = filenameable(title) + title = legitimize(title) filename = '%s.%s' % (title, 'ts') filepath = os.path.join(output_dir, filename) @@ -703,9 +715,38 @@ def print_info(site_info, title, type, size): print("Video Site:", site_info) print("Title: ", tr(title)) print("Type: ", type_info) - print("Size: ", round(size / 1048576, 2), "MB (" + str(size) + " Bytes)") + print("Size: ", round(size / 1048576, 2), "MiB (" + str(size) + " Bytes)") print() +def parse_host(host): + """Parses host name and port number from a string. + """ + if re.match(r'^(\d+)$', host) is not None: + return ("0.0.0.0", int(host)) + if re.match(r'^(\w+)://', host) is None: + host = "//" + host + o = parse.urlparse(host) + hostname = o.hostname or "0.0.0.0" + port = o.port or 0 + return (hostname, port) + +def get_sogou_proxy(): + return sogou_proxy + +def set_proxy(proxy): + proxy_handler = request.ProxyHandler({ + 'http': '%s:%s' % proxy, + 'https': '%s:%s' % proxy, + }) + opener = request.build_opener(proxy_handler) + request.install_opener(opener) + +def unset_proxy(): + proxy_handler = request.ProxyHandler({}) + opener = request.build_opener(proxy_handler) + request.install_opener(opener) + +# DEPRECATED in favor of set_proxy() and unset_proxy() def set_http_proxy(proxy): if proxy == None: # Use system default setting proxy_support = request.ProxyHandler() @@ -751,13 +792,15 @@ def script_main(script_name, download, download_playlist = None): -u | --url Display the real URLs of videos without downloading. -n | --no-merge Don't merge video parts. -o | --output-dir Set the output directory for downloaded videos. - -x | --http-proxy Use specific HTTP proxy for downloading. + -x | --http-proxy Use specific HTTP proxy for downloading. --no-proxy Don't use any proxy. (ignore $http_proxy) + -S | --sogou Use a Sogou proxy server for downloading. + --sogou-proxy Run a standalone Sogou proxy server. --debug Show traceback on KeyboardInterrupt. ''' - short_opts = 'Vhfiuno:x:' - opts = ['version', 'help', 'force', 'info', 'url', 'no-merge', 'no-proxy', 'debug', 'output-dir=', 'http-proxy='] + short_opts = 'VhfiunSo:x:' + opts = ['version', 'help', 'force', 'info', 'url', 'no-merge', 'no-proxy', 'debug', 'sogou', 'output-dir=', 'http-proxy=', 'sogou-proxy=', 'sogou-env='] if download_playlist: short_opts = 'l' + short_opts opts = ['playlist'] + opts @@ -765,10 +808,15 @@ def script_main(script_name, download, download_playlist = None): try: opts, args = getopt.getopt(sys.argv[1:], short_opts, opts) except getopt.GetoptError as err: - print(err) - print(help) + log.e(err) + log.e("try 'you-get --help' for more options") sys.exit(2) + global force + global dry_run + global sogou_proxy + global sogou_env + info_only = False playlist = False merge = True @@ -784,12 +832,10 @@ def script_main(script_name, download, download_playlist = None): print(help) sys.exit() elif o in ('-f', '--force'): - global force force = True elif o in ('-i', '--info'): info_only = True elif o in ('-u', '--url'): - global dry_run dry_run = True elif o in ('-l', '--playlist'): playlist = True @@ -803,19 +849,38 @@ def script_main(script_name, download, download_playlist = None): output_dir = a elif o in ('-x', '--http-proxy'): proxy = a + elif o in ('-S', '--sogou'): + sogou_proxy = ("0.0.0.0", 0) + elif o in ('--sogou-proxy'): + sogou_proxy = parse_host(a) + elif o in ('--sogou-env'): + sogou_env = a + else: + log.e("try 'you-get --help' for more options") + sys.exit(2) + if not args: + if sogou_proxy is not None: + try: + if sogou_env is not None: + server = sogou_proxy_server(sogou_proxy, network_env=sogou_env) + else: + server = sogou_proxy_server(sogou_proxy) + server.serve_forever() + except KeyboardInterrupt: + if traceback: + raise + else: + sys.exit() else: print(help) - sys.exit(1) - if not args: - print(help) - sys.exit() + sys.exit() set_http_proxy(proxy) - - if traceback: + + try: download_main(download, download_playlist, args, playlist, output_dir, merge, info_only) - else: - try: - download_main(download, download_playlist, args, playlist, output_dir, merge, info_only) - except KeyboardInterrupt: + except KeyboardInterrupt: + if traceback: + raise + else: sys.exit(1) diff --git a/src/you_get/downloader/baidu.py b/src/you_get/downloader/baidu.py deleted file mode 100755 index 79d7053d..00000000 --- a/src/you_get/downloader/baidu.py +++ /dev/null @@ -1,95 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -__all__ = ['baidu_download'] - -from ..common import * -from .. import common - -from urllib import parse - -def baidu_get_song_html(sid): - return get_html('http://music.baidu.com/song/%s/download?__o=%%2Fsong%%2F%s' % (sid, sid), faker = True) - -def baidu_get_song_url(html): - return r1(r'downlink="/data/music/file\?link=(.+?)"', html) - -def baidu_get_song_artist(html): - return r1(r'singer_name:"(.+?)"', html) - -def baidu_get_song_album(html): - return r1(r'ablum_name:"(.+?)"', html) - -def baidu_get_song_title(html): - return r1(r'song_title:"(.+?)"', html) - -def baidu_download_lyric(sid, file_name, output_dir): - if common.dry_run: - return - - html = get_html('http://music.baidu.com/song/' + sid) - href = r1(r'', html) - if href: - lrc = get_html('http://music.baidu.com' + href) - if len(lrc) > 0: - with open(output_dir + "/" + file_name.replace('/', '-') + '.lrc', 'w') as x: - x.write(lrc) - -def baidu_download_song(sid, output_dir = '.', merge = True, info_only = False): - html = baidu_get_song_html(sid) - url = baidu_get_song_url(html) - title = baidu_get_song_title(html) - artist = baidu_get_song_artist(html) - album = baidu_get_song_album(html) - type, ext, size = url_info(url, faker = True) - print_info(site_info, title, type, size) - if not info_only: - file_name = "%s - %s - %s" % (title, album, artist) - download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True) - baidu_download_lyric(sid, file_name, output_dir) - -def baidu_download_album(aid, output_dir = '.', merge = True, info_only = False): - html = get_html('http://music.baidu.com/album/%s' % aid, faker = True) - album_name = r1(r'

(.+?)<\/h2>', html) - artist = r1(r'', html) - output_dir = '%s/%s - %s' % (output_dir, artist, album_name) - ids = json.loads(r1(r'', html).replace('"', '').replace(';', '"'))['ids'] - track_nr = 1 - for id in ids: - song_html = baidu_get_song_html(id) - song_url = baidu_get_song_url(song_html) - song_title = baidu_get_song_title(song_html) - file_name = '%02d.%s' % (track_nr, song_title) - type, ext, size = url_info(song_url, faker = True) - print_info(site_info, song_title, type, size) - if not info_only: - download_urls([song_url], file_name, ext, size, output_dir, merge = merge, faker = True) - baidu_download_lyric(id, file_name, output_dir) - track_nr += 1 - -def baidu_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False): - if re.match(r'http://pan.baidu.com', url): - html = get_html(url) - - title = r1(r'server_filename="([^"]+)"', html) - if len(title.split('.')) > 1: - title = ".".join(title.split('.')[:-1]) - - real_url = r1(r'\\"dlink\\":\\"([^"]*)\\"', html).replace('\\\\/', '/') - type, ext, size = url_info(real_url, faker = True) - - print_info(site_info, title, ext, size) - if not info_only: - download_urls([real_url], title, ext, size, output_dir, merge = merge) - - elif re.match(r'http://music.baidu.com/album/\d+', url): - id = r1(r'http://music.baidu.com/album/(\d+)', url) - baidu_download_album(id, output_dir, merge, info_only) - - elif re.match('http://music.baidu.com/song/\d+', url): - id = r1(r'http://music.baidu.com/song/(\d+)', url) - baidu_download_song(id, output_dir, merge, info_only) - -site_info = "Baidu.com" -download = baidu_download -download_playlist = playlist_not_supported("baidu") diff --git a/src/you_get/downloader/__init__.py b/src/you_get/extractor/__init__.py similarity index 97% rename from src/you_get/downloader/__init__.py rename to src/you_get/extractor/__init__.py index 99e331f4..874824fe 100644 --- a/src/you_get/downloader/__init__.py +++ b/src/you_get/extractor/__init__.py @@ -18,6 +18,7 @@ from .ifeng import * from .instagram import * from .iqiyi import * from .joy import * +from .jpopsuki import * from .ku6 import * from .miomio import * from .mixcloud import * diff --git a/src/you_get/downloader/__main__.py b/src/you_get/extractor/__main__.py similarity index 69% rename from src/you_get/downloader/__main__.py rename to src/you_get/extractor/__main__.py index ed07f702..f8d9ecea 100644 --- a/src/you_get/downloader/__main__.py +++ b/src/you_get/extractor/__main__.py @@ -1,18 +1,17 @@ #!/usr/bin/env python __all__ = ['main', 'any_download', 'any_download_playlist'] -from ..downloader import * +from ..extractor import * from ..common import * def url_to_module(url): - site = r1(r'http://([^/]+)/', url) - assert site, 'invalid url: ' + url + video_host = r1(r'http://([^/]+)/', url) + video_url = r1(r'http://[^/]+(.*)', url) + assert video_host and video_url, 'invalid url: ' + url - if site.endswith('.com.cn'): - site = site[:-3] - domain = r1(r'(\.[^.]+\.[^.]+)$', site) - if not domain: - domain = site + if video_host.endswith('.com.cn'): + video_host = video_host[:-3] + domain = r1(r'(\.[^.]+\.[^.]+)$', video_host) or video_host assert domain, 'unsupported url: ' + url k = r1(r'([^.]+)', domain) @@ -38,6 +37,7 @@ def url_to_module(url): 'instagram': instagram, 'iqiyi': iqiyi, 'joy': joy, + 'jpopsuki': jpopsuki, 'kankanews': bilibili, 'ku6': ku6, 'miomio': miomio, @@ -67,14 +67,28 @@ def url_to_module(url): if k in downloads: return downloads[k] else: - raise NotImplementedError(url) + import http.client + conn = http.client.HTTPConnection(video_host) + conn.request("HEAD", video_url) + res = conn.getresponse() + location = res.getheader('location') + if location is None: + raise NotImplementedError(url) + else: + return url_to_module(location), location def any_download(url, output_dir = '.', merge = True, info_only = False): - m = url_to_module(url) + try: + m, url = url_to_module(url) + except: + m = url_to_module(url) m.download(url, output_dir = output_dir, merge = merge, info_only = info_only) def any_download_playlist(url, output_dir = '.', merge = True, info_only = False): - m = url_to_module(url) + try: + m, url = url_to_module(url) + except: + m = url_to_module(url) m.download_playlist(url, output_dir = output_dir, merge = merge, info_only = info_only) def main(): diff --git a/src/you_get/downloader/acfun.py b/src/you_get/extractor/acfun.py similarity index 100% rename from src/you_get/downloader/acfun.py rename to src/you_get/extractor/acfun.py diff --git a/src/you_get/downloader/alive.py b/src/you_get/extractor/alive.py similarity index 100% rename from src/you_get/downloader/alive.py rename to src/you_get/extractor/alive.py diff --git a/src/you_get/extractor/baidu.py b/src/you_get/extractor/baidu.py new file mode 100755 index 00000000..b93b0333 --- /dev/null +++ b/src/you_get/extractor/baidu.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +__all__ = ['baidu_download'] + +from ..common import * +from .. import common + +from urllib import parse + +def baidu_get_song_data(sid): + data = json.loads(get_html('http://music.baidu.com/data/music/fmlink?songIds=%s' % sid, faker = True))['data'] + + if data['xcode'] != '': + # inside china mainland + return data['songList'][0] + else: + # outside china mainland + html = get_html("http://music.baidu.com/song/%s" % sid) + + # baidu pan link + sourceLink = r1(r'"link-src-info">(.+?)<\/h2>', html) + artist = r1(r'', html) + output_dir = '%s/%s - %s' % (output_dir, artist, album_name) + ids = json.loads(r1(r'', html).replace('"', '').replace(';', '"'))['ids'] + track_nr = 1 + for id in ids: + song_data = baidu_get_song_data(id) + song_url = baidu_get_song_url(song_data) + song_title = baidu_get_song_title(song_data) + song_lrc = baidu_get_song_lyric(song_data) + file_name = '%02d.%s' % (track_nr, song_title) + + type, ext, size = url_info(song_url, faker = True) + print_info(site_info, song_title, type, size) + if not info_only: + download_urls([song_url], file_name, ext, size, output_dir, merge = merge, faker = True) + + if song_lrc: + type, ext, size = url_info(song_lrc, faker = True) + print_info(site_info, song_title, type, size) + if not info_only: + download_urls([song_lrc], file_name, ext, size, output_dir, faker = True) + + track_nr += 1 + +def baidu_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False): + if re.match(r'http://pan.baidu.com', url): + html = get_html(url) + + title = r1(r'server_filename="([^"]+)"', html) + if len(title.split('.')) > 1: + title = ".".join(title.split('.')[:-1]) + + real_url = r1(r'\\"dlink\\":\\"([^"]*)\\"', html).replace('\\\\/', '/') + type, ext, size = url_info(real_url, faker = True) + + print_info(site_info, title, ext, size) + if not info_only: + download_urls([real_url], title, ext, size, output_dir, merge = merge) + + elif re.match(r'http://music.baidu.com/album/\d+', url): + id = r1(r'http://music.baidu.com/album/(\d+)', url) + baidu_download_album(id, output_dir, merge, info_only) + + elif re.match('http://music.baidu.com/song/\d+', url): + id = r1(r'http://music.baidu.com/song/(\d+)', url) + baidu_download_song(id, output_dir, merge, info_only) + +site_info = "Baidu.com" +download = baidu_download +download_playlist = playlist_not_supported("baidu") diff --git a/src/you_get/downloader/bilibili.py b/src/you_get/extractor/bilibili.py similarity index 97% rename from src/you_get/downloader/bilibili.py rename to src/you_get/extractor/bilibili.py index 8512d362..322b60fc 100644 --- a/src/you_get/downloader/bilibili.py +++ b/src/you_get/extractor/bilibili.py @@ -83,7 +83,7 @@ def bilibili_download(url, output_dir = '.', merge = True, info_only = False): title = unescape_html(title) title = escape_file_path(title) - flashvars = r1_of([r'player_params=\'(cid=\d+)', r'flashvars="([^"]+)"', r'"https://secure.bilibili.tv/secure,(cid=\d+)(?:&aid=\d+)?"'], html) + flashvars = r1_of([r'player_params=\'(cid=\d+)', r'flashvars="([^"]+)"', r'"https://[a-z]+\.bilibili\.tv/secure,(cid=\d+)(?:&aid=\d+)?"'], html) assert flashvars t, id = flashvars.split('=', 1) id = id.split('&')[0] diff --git a/src/you_get/downloader/blip.py b/src/you_get/extractor/blip.py similarity index 100% rename from src/you_get/downloader/blip.py rename to src/you_get/extractor/blip.py diff --git a/src/you_get/downloader/cntv.py b/src/you_get/extractor/cntv.py similarity index 100% rename from src/you_get/downloader/cntv.py rename to src/you_get/extractor/cntv.py diff --git a/src/you_get/downloader/coursera.py b/src/you_get/extractor/coursera.py similarity index 100% rename from src/you_get/downloader/coursera.py rename to src/you_get/extractor/coursera.py diff --git a/src/you_get/downloader/dailymotion.py b/src/you_get/extractor/dailymotion.py similarity index 100% rename from src/you_get/downloader/dailymotion.py rename to src/you_get/extractor/dailymotion.py diff --git a/src/you_get/downloader/douban.py b/src/you_get/extractor/douban.py similarity index 100% rename from src/you_get/downloader/douban.py rename to src/you_get/extractor/douban.py diff --git a/src/you_get/downloader/ehow.py b/src/you_get/extractor/ehow.py similarity index 100% rename from src/you_get/downloader/ehow.py rename to src/you_get/extractor/ehow.py diff --git a/src/you_get/downloader/facebook.py b/src/you_get/extractor/facebook.py similarity index 100% rename from src/you_get/downloader/facebook.py rename to src/you_get/extractor/facebook.py diff --git a/src/you_get/downloader/fivesing.py b/src/you_get/extractor/fivesing.py similarity index 100% rename from src/you_get/downloader/fivesing.py rename to src/you_get/extractor/fivesing.py diff --git a/src/you_get/downloader/freesound.py b/src/you_get/extractor/freesound.py similarity index 100% rename from src/you_get/downloader/freesound.py rename to src/you_get/extractor/freesound.py diff --git a/src/you_get/downloader/google.py b/src/you_get/extractor/google.py similarity index 100% rename from src/you_get/downloader/google.py rename to src/you_get/extractor/google.py diff --git a/src/you_get/downloader/ifeng.py b/src/you_get/extractor/ifeng.py similarity index 100% rename from src/you_get/downloader/ifeng.py rename to src/you_get/extractor/ifeng.py diff --git a/src/you_get/downloader/instagram.py b/src/you_get/extractor/instagram.py similarity index 100% rename from src/you_get/downloader/instagram.py rename to src/you_get/extractor/instagram.py diff --git a/src/you_get/downloader/iqiyi.py b/src/you_get/extractor/iqiyi.py similarity index 100% rename from src/you_get/downloader/iqiyi.py rename to src/you_get/extractor/iqiyi.py diff --git a/src/you_get/downloader/joy.py b/src/you_get/extractor/joy.py similarity index 100% rename from src/you_get/downloader/joy.py rename to src/you_get/extractor/joy.py diff --git a/src/you_get/extractor/jpopsuki.py b/src/you_get/extractor/jpopsuki.py new file mode 100644 index 00000000..cf4ec052 --- /dev/null +++ b/src/you_get/extractor/jpopsuki.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python + +__all__ = ['jpopsuki_download'] + +from ..common import * + +def jpopsuki_download(url, output_dir='.', merge=True, info_only=False): + html = get_html(url, faker=True) + + title = r1(r']+>([^<>]+)', xml) rid = r1(r'rid="([^"]+)"', xml) title = r1(r'nm="([^"]+)"', xml) - pieces = re.findall(']+fs="(\d+)"', xml) numbers, fs = zip(*pieces) urls = ['http://%s/%s/%s?k=%s' % (host, i, rid, key) for i in numbers] total_size = sum(map(int, fs)) diff --git a/src/you_get/downloader/qq.py b/src/you_get/extractor/qq.py similarity index 100% rename from src/you_get/downloader/qq.py rename to src/you_get/extractor/qq.py diff --git a/src/you_get/downloader/sina.py b/src/you_get/extractor/sina.py similarity index 96% rename from src/you_get/downloader/sina.py rename to src/you_get/extractor/sina.py index af030a9e..33cc0c7c 100644 --- a/src/you_get/downloader/sina.py +++ b/src/you_get/extractor/sina.py @@ -5,7 +5,7 @@ __all__ = ['sina_download', 'sina_download_by_vid', 'sina_download_by_vkey'] from ..common import * def video_info(id): - xml = get_content('http://v.iask.com/v_play.php?vid=%s' % id, decoded=True) + xml = get_content('http://www.tucao.cc/api/sina.php?vid=%s' % id, decoded=True) urls = re.findall(r'(?:)?', xml) name = match1(xml, r'(?:)?') vstr = match1(xml, r'(?:)?') diff --git a/src/you_get/downloader/sohu.py b/src/you_get/extractor/sohu.py similarity index 70% rename from src/you_get/downloader/sohu.py rename to src/you_get/extractor/sohu.py index 4400836a..a084f116 100644 --- a/src/you_get/downloader/sohu.py +++ b/src/you_get/extractor/sohu.py @@ -12,8 +12,19 @@ def real_url(host, prot, file, new): return '%s%s?key=%s' % (start[:-1], new, key) def sohu_download(url, output_dir = '.', merge = True, info_only = False): - vid = r1('vid\s*=\s*"(\d+)"', get_html(url)) - + html = get_html(url) + vid = r1('vid\s*=\s*"(\d+)"', html) + if not vid: + vid = r1('vid\s*:\s*"(\d+)"', html) + + # Open Sogou proxy if required + if get_sogou_proxy() is not None: + server = sogou_proxy_server(get_sogou_proxy(), ostream=open(os.devnull, 'w')) + server_thread = threading.Thread(target=server.serve_forever) + server_thread.daemon = True + server_thread.start() + set_proxy(server.server_address) + if vid: data = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid)) for qtyp in ["oriVid","superVid","highVid" ,"norVid","relativeId"]: @@ -31,10 +42,13 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False): for file, new in zip(data['clipsURL'], data['su']): urls.append(real_url(host, prot, file, new)) assert data['clipsURL'][0].endswith('.mp4') - + else: - vid = r1('vid\s*=\s*\'(\d+)\'', get_html(url)) - data = json.loads(get_decoded_html('http://my.tv.sohu.com/videinfo.jhtml?m=viewnew&vid=%s' % vid)) + if re.match(r'http://share.vrs.sohu.com', url): + vid = r1('id=(\d+)', url) + else: + vid = r1('vid\s*=\s*\'(\d+)\'', get_html(url)) + data = json.loads(get_decoded_html('http://my.tv.sohu.com/play/videonew.do?vid=%s&referer=http://my.tv.sohu.com' % vid)) host = data['allot'] prot = data['prot'] urls = [] @@ -45,7 +59,12 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False): for file, new in zip(data['clipsURL'], data['su']): urls.append(real_url(host, prot, file, new)) assert data['clipsURL'][0].endswith('.mp4') - + + # Close Sogou proxy if required + if get_sogou_proxy() is not None: + server.shutdown() + unset_proxy() + print_info(site_info, title, 'mp4', size) if not info_only: download_urls(urls, title, 'mp4', size, output_dir, refer = url, merge = merge) diff --git a/src/you_get/downloader/songtaste.py b/src/you_get/extractor/songtaste.py similarity index 100% rename from src/you_get/downloader/songtaste.py rename to src/you_get/extractor/songtaste.py diff --git a/src/you_get/downloader/soundcloud.py b/src/you_get/extractor/soundcloud.py similarity index 100% rename from src/you_get/downloader/soundcloud.py rename to src/you_get/extractor/soundcloud.py diff --git a/src/you_get/downloader/ted.py b/src/you_get/extractor/ted.py similarity index 95% rename from src/you_get/downloader/ted.py rename to src/you_get/extractor/ted.py index 167da2a8..23a7054c 100644 --- a/src/you_get/downloader/ted.py +++ b/src/you_get/extractor/ted.py @@ -11,7 +11,7 @@ def ted_download(url, output_dir = '.', merge = True, info_only = False): title = line.replace("", "").replace("", "").replace("\t", "") title = title[:title.find(' | ')] if line.find("no-flash-video-download") > -1: - url = line.replace(' 0: vids.append({"k": data[k][0]["k"], "size": data[k][0]["size"]}) temp = max(vids, key=lambda x:x["size"]) diff --git a/src/you_get/downloader/tumblr.py b/src/you_get/extractor/tumblr.py similarity index 100% rename from src/you_get/downloader/tumblr.py rename to src/you_get/extractor/tumblr.py diff --git a/src/you_get/downloader/vid48.py b/src/you_get/extractor/vid48.py similarity index 100% rename from src/you_get/downloader/vid48.py rename to src/you_get/extractor/vid48.py diff --git a/src/you_get/downloader/vimeo.py b/src/you_get/extractor/vimeo.py similarity index 59% rename from src/you_get/downloader/vimeo.py rename to src/you_get/extractor/vimeo.py index 1a18dfbb..60611f74 100644 --- a/src/you_get/downloader/vimeo.py +++ b/src/you_get/extractor/vimeo.py @@ -5,19 +5,16 @@ __all__ = ['vimeo_download', 'vimeo_download_by_id'] from ..common import * def vimeo_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): - html = get_html('http://vimeo.com/%s' % id, faker = True) + video_page = get_content('http://player.vimeo.com/video/%s' % id, headers=fake_headers) + title = r1(r'([^<]+)', video_page) + info = dict(re.findall(r'"([^"]+)":\{[^{]+"url":"([^"]+)"', video_page)) + for quality in ['hd', 'sd', 'mobile']: + if quality in info: + url = info[quality] + break + assert url - signature = r1(r'"signature":"([^"]+)"', html) - timestamp = r1(r'"timestamp":([^,]+)', html) - hd = r1(r',"hd":(\d+),', html) - - title = r1(r'"title":"([^"]+)"', html) - title = escape_file_path(title) - - url = 'http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s' % (id, signature, timestamp) - if hd == "1": - url += '&quality=hd' - type, ext, size = url_info(url, faker = True) + type, ext, size = url_info(url, faker=True) print_info(site_info, title, type, size) if not info_only: diff --git a/src/you_get/downloader/vine.py b/src/you_get/extractor/vine.py similarity index 100% rename from src/you_get/downloader/vine.py rename to src/you_get/extractor/vine.py diff --git a/src/you_get/downloader/w56.py b/src/you_get/extractor/w56.py similarity index 100% rename from src/you_get/downloader/w56.py rename to src/you_get/extractor/w56.py diff --git a/src/you_get/downloader/xiami.py b/src/you_get/extractor/xiami.py similarity index 100% rename from src/you_get/downloader/xiami.py rename to src/you_get/extractor/xiami.py diff --git a/src/you_get/downloader/yinyuetai.py b/src/you_get/extractor/yinyuetai.py similarity index 100% rename from src/you_get/downloader/yinyuetai.py rename to src/you_get/extractor/yinyuetai.py diff --git a/src/you_get/downloader/youku.py b/src/you_get/extractor/youku.py similarity index 94% rename from src/you_get/downloader/youku.py rename to src/you_get/extractor/youku.py index 20c79c4d..4abedc97 100644 --- a/src/you_get/downloader/youku.py +++ b/src/you_get/extractor/youku.py @@ -121,7 +121,21 @@ def file_type_of_url(url): return str(re.search(r'/st/([^/]+)/', url).group(1)) def youku_download_by_id(id, title, output_dir = '.', stream_type = None, merge = True, info_only = False): + # Open Sogou proxy if required + if get_sogou_proxy() is not None: + server = sogou_proxy_server(get_sogou_proxy(), ostream=open(os.devnull, 'w')) + server_thread = threading.Thread(target=server.serve_forever) + server_thread.daemon = True + server_thread.start() + set_proxy(server.server_address) + info = get_info(id) + + # Close Sogou proxy if required + if get_sogou_proxy() is not None: + server.shutdown() + unset_proxy() + urls, sizes = zip(*find_video(info, stream_type)) ext = file_type_of_url(urls[0]) total_size = sum(sizes) diff --git a/src/you_get/downloader/youtube.py b/src/you_get/extractor/youtube.py similarity index 89% rename from src/you_get/downloader/youtube.py rename to src/you_get/extractor/youtube.py index 51fbb07f..9ff08e07 100644 --- a/src/you_get/downloader/youtube.py +++ b/src/you_get/extractor/youtube.py @@ -45,7 +45,7 @@ def decipher(js, s): code = re.sub(r'(\w+).split\(""\)', r'list(\1)', code) return code - f1 = match1(js, r'g.sig\|\|(\w+)\(g.s\)') + f1 = match1(js, r'\w+\.sig\|\|(\w+)\(\w+\.\w+\)') f1def = match1(js, r'(function %s\(\w+\)\{[^\{]+\})' % f1) code = tr_js(f1def) f2 = match1(f1def, r'(\w+)\(\w+,\d+\)') @@ -103,14 +103,30 @@ def youtube_download_by_id(id, title=None, output_dir='.', merge=True, info_only if not info_only: download_urls([url], title, ext, size, output_dir, merge = merge) +def youtube_list_download_by_id(list_id, title=None, output_dir='.', merge=True, info_only=False): + """Downloads a YouTube video list by its unique id. + """ + + video_page = get_content('http://www.youtube.com/playlist?list=%s' % list_id) + ids = set(re.findall(r''): '-', + ord('['): '(', + ord(']'): ')', + }) + else: + # *nix + if os == 'Darwin': + # Mac OS HFS+ + text = text.translate({ + ord(':'): '-', + }) + + # Remove leading . + if text.startswith("."): + text = text[1:] + + return text diff --git a/src/you_get/util/log.py b/src/you_get/util/log.py new file mode 100644 index 00000000..c28fd4e7 --- /dev/null +++ b/src/you_get/util/log.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python + +from ..version import __name__ + +import os, sys + +# Is terminal ANSI/VT100 compatible +if os.getenv('TERM') in ( + 'xterm', + 'vt100', + 'linux', + 'eterm-color', + 'screen', + ): + has_colors = True +else: + try: + # Eshell + ppid = os.getppid() + has_colors = (os.popen('ps -p %d -ocomm=' % ppid).read().strip() + == 'emacs') + except: + has_colors = False + +# ANSI/VT100 escape code +# http://en.wikipedia.org/wiki/ANSI_escape_code +colors = { + 'none': '', + 'reset': '\033[0m', + + 'black': '\033[30m', + 'bold-black': '\033[30;1m', + 'dark-gray': '\033[90m', + 'bold-dark-gray': '\033[90;1m', + + 'red': '\033[31m', + 'bold-red': '\033[31;1m', + 'light-red': '\033[91m', + 'bold-light-red': '\033[91;1m', + + 'green': '\033[32m', + 'bold-green': '\033[32;1m', + 'light-green': '\033[92m', + 'bold-light-green': '\033[92;1m', + + 'yellow': '\033[33m', + 'bold-yellow': '\033[33;1m', + 'light-yellow': '\033[93m', + 'bold-light-yellow': '\033[93;1m', + + 'blue': '\033[34m', + 'bold-blue': '\033[34;1m', + 'light-blue': '\033[94m', + 'bold-light-blue': '\033[94;1m', + + 'magenta': '\033[35m', + 'bold-magenta': '\033[35;1m', + 'light-magenta': '\033[95m', + 'bold-light-magenta': '\033[95;1m', + + 'cyan': '\033[36m', + 'bold-cyan': '\033[36;1m', + 'light-cyan': '\033[96m', + 'bold-light-cyan': '\033[96;1m', + + 'light-gray': '\033[37m', + 'bold-light-gray': '\033[37;1m', + 'white': '\033[97m', + 'bold-white': '\033[97;1m', +} + +def underlined(text): + """Returns an underlined text. + """ + return "\33[4m%s\33[24m" % text if has_colors else text + +def println(text, color=None, ostream=sys.stdout): + """Prints a text line to stream. + """ + if has_colors and color in colors: + ostream.write("{0}{1}{2}\n".format(colors[color], text, colors['reset'])) + else: + ostream.write("{0}\n".format(text)) + +def printlog(message, color=None, ostream=sys.stderr): + """Prints a log message to stream. + """ + if has_colors and color in colors: + ostream.write("{0}{1}: {2}{3}\n".format(colors[color], __name__, message, colors['reset'])) + else: + ostream.write("{0}: {1}\n".format(__name__, message)) + +def i(message, ostream=sys.stderr): + """Sends an info log message. + """ + printlog(message, + None, + ostream=ostream) + +def d(message, ostream=sys.stderr): + """Sends a debug log message. + """ + printlog(message, + 'blue' if has_colors else None, + ostream=ostream) + +def w(message, ostream=sys.stderr): + """Sends a warning log message. + """ + printlog(message, + 'yellow' if has_colors else None, + ostream=ostream) + +def e(message, ostream=sys.stderr): + """Sends an error log message. + """ + printlog(message, + 'bold-yellow' if has_colors else None, + ostream=ostream) + +def wtf(message, ostream=sys.stderr): + """What a Terrible Failure. + """ + printlog(message, + 'bold-red' if has_colors else None, + ostream=ostream) diff --git a/src/you_get/util/sogou_proxy.py b/src/you_get/util/sogou_proxy.py new file mode 100644 index 00000000..01ffb572 --- /dev/null +++ b/src/you_get/util/sogou_proxy.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python + +# Original code from: +# http://xiaoxia.org/2011/03/26/using-python-to-write-a-local-sogou-proxy-server-procedures/ + +from . import log + +from http.client import HTTPResponse +from http.server import BaseHTTPRequestHandler, HTTPServer +from socketserver import ThreadingMixIn +from threading import Thread +import random, socket, struct, sys, time + +def sogou_proxy_server( + host=("0.0.0.0", 0), + network_env='CERNET', + ostream=sys.stderr): + """Returns a Sogou proxy server object. + """ + + x_sogou_auth = '9CD285F1E7ADB0BD403C22AD1D545F40/30/853edc6d49ba4e27' + proxy_host = 'h0.cnc.bj.ie.sogou.com' + proxy_port = 80 + + def sogou_hash(t, host): + s = (t + host + 'SogouExplorerProxy').encode('ascii') + code = len(s) + dwords = int(len(s) / 4) + rest = len(s) % 4 + v = struct.unpack(str(dwords) + 'i' + str(rest) + 's', s) + for vv in v: + if type(vv) != bytes: + a = (vv & 0xFFFF) + b = (vv >> 16) + code += a + code = code ^ (((code << 5) ^ b) << 0xb) + # To avoid overflows + code &= 0xffffffff + code += code >> 0xb + if rest == 3: + code += s[len(s) - 2] * 256 + s[len(s) - 3] + code = code ^ ((code ^ (s[len(s) - 1]) * 4) << 0x10) + code &= 0xffffffff + code += code >> 0xb + elif rest == 2: + code += (s[len(s) - 1]) * 256 + (s[len(s) - 2]) + code ^= code << 0xb + code &= 0xffffffff + code += code >> 0x11 + elif rest == 1: + code += s[len(s) - 1] + code ^= code << 0xa + code &= 0xffffffff + code += code >> 0x1 + code ^= code * 8 + code &= 0xffffffff + code += code >> 5 + code ^= code << 4 + code = code & 0xffffffff + code += code >> 0x11 + code ^= code << 0x19 + code = code & 0xffffffff + code += code >> 6 + code = code & 0xffffffff + return hex(code)[2:].rstrip('L').zfill(8) + + class Handler(BaseHTTPRequestHandler): + _socket = None + def do_proxy(self): + try: + if self._socket is None: + self._socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self._socket.connect((proxy_host, proxy_port)) + self._socket.send(self.requestline.encode('ascii') + b'\r\n') + log.d(self.requestline, ostream) + + # Add Sogou Verification Tags + self.headers['X-Sogou-Auth'] = x_sogou_auth + t = hex(int(time.time()))[2:].rstrip('L').zfill(8) + self.headers['X-Sogou-Tag'] = sogou_hash(t, self.headers['Host']) + self.headers['X-Sogou-Timestamp'] = t + self._socket.send(str(self.headers).encode('ascii') + b'\r\n') + + # Send POST data + if self.command == 'POST': + self._socket.send(self.rfile.read(int(self.headers['Content-Length']))) + response = HTTPResponse(self._socket, method=self.command) + response.begin() + + # Response + status = 'HTTP/1.1 %s %s' % (response.status, response.reason) + self.wfile.write(status.encode('ascii') + b'\r\n') + h = '' + for hh, vv in response.getheaders(): + if hh.upper() != 'TRANSFER-ENCODING': + h += hh + ': ' + vv + '\r\n' + self.wfile.write(h.encode('ascii') + b'\r\n') + while True: + response_data = response.read(8192) + if len(response_data) == 0: + break + self.wfile.write(response_data) + + except socket.error: + log.e('Socket error for ' + self.requestline, ostream) + + def do_POST(self): + self.do_proxy() + + def do_GET(self): + self.do_proxy() + + class ThreadingHTTPServer(ThreadingMixIn, HTTPServer): + pass + + # Server starts + log.printlog('Sogou Proxy Mini-Server', color='bold-green', ostream=ostream) + + try: + server = ThreadingHTTPServer(host, Handler) + except Exception as ex: + log.wtf("Socket error: %s" % ex, ostream) + exit(1) + host = server.server_address + + if network_env.upper() == 'CERNET': + proxy_host = 'h%s.edu.bj.ie.sogou.com' % random.randint(0, 10) + elif network_env.upper() == 'CTCNET': + proxy_host = 'h%s.ctc.bj.ie.sogou.com' % random.randint(0, 3) + elif network_env.upper() == 'CNCNET': + proxy_host = 'h%s.cnc.bj.ie.sogou.com' % random.randint(0, 3) + elif network_env.upper() == 'DXT': + proxy_host = 'h%s.dxt.bj.ie.sogou.com' % random.randint(0, 10) + else: + proxy_host = 'h%s.edu.bj.ie.sogou.com' % random.randint(0, 10) + + log.i('Remote host: %s' % log.underlined(proxy_host), ostream) + log.i('Proxy server running on %s' % + log.underlined("%s:%s" % host), ostream) + + return server diff --git a/src/you_get/version.py b/src/you_get/version.py index 43c2747b..4e983583 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,5 +1,6 @@ #!/usr/bin/env python __all__ = ['__version__', '__date__'] -__version__ = '0.3.21' -__date__ = '2013-08-17' +__name__ = 'you-get' +__version__ = '0.3.24' +__date__ = '2013-10-30' diff --git a/tests/test.py b/tests/test.py index 641878ef..9ecf4d68 100644 --- a/tests/test.py +++ b/tests/test.py @@ -4,7 +4,7 @@ import unittest from you_get import * -from you_get.downloader.__main__ import url_to_module +from you_get.extractor.__main__ import url_to_module def test_urls(urls): for url in urls: @@ -23,18 +23,20 @@ class YouGetTests(unittest.TestCase): "http://www.mixcloud.com/DJVadim/north-america-are-you-ready/", ]) + def test_ted(self): + test_urls([ + "http://www.ted.com/talks/jennifer_lin_improvs_piano_magic.html", + "http://www.ted.com/talks/derek_paravicini_and_adam_ockelford_in_the_key_of_genius.html", + ]) + def test_vimeo(self): test_urls([ "http://vimeo.com/56810854", ]) - def test_xiami(self): - test_urls([ - "http://www.xiami.com/song/1769835121", - ]) - def test_youtube(self): test_urls([ "http://www.youtube.com/watch?v=pzKerr0JIPA", "http://youtu.be/pzKerr0JIPA", + "http://www.youtube.com/attribution_link?u=/watch?v%3DldAKIzq7bvs%26feature%3Dshare" ]) diff --git a/tests/test_util.py b/tests/test_util.py new file mode 100644 index 00000000..0b7b0231 --- /dev/null +++ b/tests/test_util.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python + +import unittest + +from you_get.util import * + +class TestUtil(unittest.TestCase): + def test_legitimize(self): + self.assertEqual(legitimize("1*2", os="Linux"), "1*2") + self.assertEqual(legitimize("1*2", os="Darwin"), "1*2") + self.assertEqual(legitimize("1*2", os="Windows"), "1-2") diff --git a/you-get b/you-get index 86b44109..8bdc77c7 100755 --- a/you-get +++ b/you-get @@ -4,7 +4,7 @@ import os, sys __path__ = os.path.dirname(os.path.realpath(__file__)) __srcdir__ = 'src' sys.path.insert(1, os.path.join(__path__, __srcdir__)) -from you_get.downloader import main +from you_get.extractor import main if __name__ == '__main__': main() diff --git a/you-get.json b/you-get.json index 92114cff..dc988868 100644 --- a/you-get.json +++ b/you-get.json @@ -31,6 +31,6 @@ ], "console_scripts": [ - "you-get = you_get.downloader.__main__:main" + "you-get = you_get.extractor.__main__:main" ] }