From d1d02fe7ae3e41133981323f951c3fd540b63e09 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 11 Sep 2013 13:45:34 +0200 Subject: [PATCH 001/147] downloader -> extractor --- src/you_get/__init__.py | 3 ++- src/you_get/{downloader => extractor}/__init__.py | 0 src/you_get/{downloader => extractor}/__main__.py | 2 +- src/you_get/{downloader => extractor}/acfun.py | 0 src/you_get/{downloader => extractor}/alive.py | 0 src/you_get/{downloader => extractor}/baidu.py | 0 src/you_get/{downloader => extractor}/bilibili.py | 0 src/you_get/{downloader => extractor}/blip.py | 0 src/you_get/{downloader => extractor}/cntv.py | 0 src/you_get/{downloader => extractor}/coursera.py | 0 src/you_get/{downloader => extractor}/dailymotion.py | 0 src/you_get/{downloader => extractor}/douban.py | 0 src/you_get/{downloader => extractor}/ehow.py | 0 src/you_get/{downloader => extractor}/facebook.py | 0 src/you_get/{downloader => extractor}/fivesing.py | 0 src/you_get/{downloader => extractor}/freesound.py | 0 src/you_get/{downloader => extractor}/google.py | 0 src/you_get/{downloader => extractor}/ifeng.py | 0 src/you_get/{downloader => extractor}/instagram.py | 0 src/you_get/{downloader => extractor}/iqiyi.py | 0 src/you_get/{downloader => extractor}/joy.py | 0 src/you_get/{downloader => extractor}/khan.py | 0 src/you_get/{downloader => extractor}/ku6.py | 0 src/you_get/{downloader => extractor}/miomio.py | 0 src/you_get/{downloader => extractor}/mixcloud.py | 0 src/you_get/{downloader => extractor}/netease.py | 0 src/you_get/{downloader => extractor}/nicovideo.py | 0 src/you_get/{downloader => extractor}/pptv.py | 0 src/you_get/{downloader => extractor}/qq.py | 0 src/you_get/{downloader => extractor}/sina.py | 0 src/you_get/{downloader => extractor}/sohu.py | 0 src/you_get/{downloader => extractor}/songtaste.py | 0 src/you_get/{downloader => extractor}/soundcloud.py | 0 src/you_get/{downloader => extractor}/ted.py | 0 src/you_get/{downloader => extractor}/tudou.py | 0 src/you_get/{downloader => extractor}/tumblr.py | 0 src/you_get/{downloader => extractor}/vid48.py | 0 src/you_get/{downloader => extractor}/vimeo.py | 0 src/you_get/{downloader => extractor}/vine.py | 0 src/you_get/{downloader => extractor}/w56.py | 0 src/you_get/{downloader => extractor}/xiami.py | 0 src/you_get/{downloader => extractor}/yinyuetai.py | 0 src/you_get/{downloader => extractor}/youku.py | 0 src/you_get/{downloader => extractor}/youtube.py | 0 tests/test.py | 2 +- you-get | 2 +- you-get.json | 2 +- 47 files changed, 6 insertions(+), 5 deletions(-) rename src/you_get/{downloader => extractor}/__init__.py (100%) rename src/you_get/{downloader => extractor}/__main__.py (98%) rename src/you_get/{downloader => extractor}/acfun.py (100%) rename src/you_get/{downloader => extractor}/alive.py (100%) rename src/you_get/{downloader => extractor}/baidu.py (100%) rename src/you_get/{downloader => extractor}/bilibili.py (100%) rename src/you_get/{downloader => extractor}/blip.py (100%) rename src/you_get/{downloader => extractor}/cntv.py (100%) rename src/you_get/{downloader => extractor}/coursera.py (100%) rename src/you_get/{downloader => extractor}/dailymotion.py (100%) rename src/you_get/{downloader => extractor}/douban.py (100%) rename src/you_get/{downloader => extractor}/ehow.py (100%) rename src/you_get/{downloader => extractor}/facebook.py (100%) rename src/you_get/{downloader => extractor}/fivesing.py (100%) rename src/you_get/{downloader => extractor}/freesound.py (100%) rename src/you_get/{downloader => extractor}/google.py (100%) rename src/you_get/{downloader => extractor}/ifeng.py (100%) rename src/you_get/{downloader => extractor}/instagram.py (100%) rename src/you_get/{downloader => extractor}/iqiyi.py (100%) rename src/you_get/{downloader => extractor}/joy.py (100%) rename src/you_get/{downloader => extractor}/khan.py (100%) rename src/you_get/{downloader => extractor}/ku6.py (100%) rename src/you_get/{downloader => extractor}/miomio.py (100%) rename src/you_get/{downloader => extractor}/mixcloud.py (100%) rename src/you_get/{downloader => extractor}/netease.py (100%) rename src/you_get/{downloader => extractor}/nicovideo.py (100%) rename src/you_get/{downloader => extractor}/pptv.py (100%) rename src/you_get/{downloader => extractor}/qq.py (100%) rename src/you_get/{downloader => extractor}/sina.py (100%) rename src/you_get/{downloader => extractor}/sohu.py (100%) rename src/you_get/{downloader => extractor}/songtaste.py (100%) rename src/you_get/{downloader => extractor}/soundcloud.py (100%) rename src/you_get/{downloader => extractor}/ted.py (100%) rename src/you_get/{downloader => extractor}/tudou.py (100%) rename src/you_get/{downloader => extractor}/tumblr.py (100%) rename src/you_get/{downloader => extractor}/vid48.py (100%) rename src/you_get/{downloader => extractor}/vimeo.py (100%) rename src/you_get/{downloader => extractor}/vine.py (100%) rename src/you_get/{downloader => extractor}/w56.py (100%) rename src/you_get/{downloader => extractor}/xiami.py (100%) rename src/you_get/{downloader => extractor}/yinyuetai.py (100%) rename src/you_get/{downloader => extractor}/youku.py (100%) rename src/you_get/{downloader => extractor}/youtube.py (100%) diff --git a/src/you_get/__init__.py b/src/you_get/__init__.py index ecca35d2..7cae2415 100644 --- a/src/you_get/__init__.py +++ b/src/you_get/__init__.py @@ -6,4 +6,5 @@ from .version import * # Easy import #from .cli_wrapper.converter import * #from .cli_wrapper.player import * -from .downloader import * +from .cli_wrapper import * +from .extractor import * diff --git a/src/you_get/downloader/__init__.py b/src/you_get/extractor/__init__.py similarity index 100% rename from src/you_get/downloader/__init__.py rename to src/you_get/extractor/__init__.py diff --git a/src/you_get/downloader/__main__.py b/src/you_get/extractor/__main__.py similarity index 98% rename from src/you_get/downloader/__main__.py rename to src/you_get/extractor/__main__.py index ed07f702..d5d88d04 100644 --- a/src/you_get/downloader/__main__.py +++ b/src/you_get/extractor/__main__.py @@ -1,7 +1,7 @@ #!/usr/bin/env python __all__ = ['main', 'any_download', 'any_download_playlist'] -from ..downloader import * +from ..extractor import * from ..common import * def url_to_module(url): diff --git a/src/you_get/downloader/acfun.py b/src/you_get/extractor/acfun.py similarity index 100% rename from src/you_get/downloader/acfun.py rename to src/you_get/extractor/acfun.py diff --git a/src/you_get/downloader/alive.py b/src/you_get/extractor/alive.py similarity index 100% rename from src/you_get/downloader/alive.py rename to src/you_get/extractor/alive.py diff --git a/src/you_get/downloader/baidu.py b/src/you_get/extractor/baidu.py similarity index 100% rename from src/you_get/downloader/baidu.py rename to src/you_get/extractor/baidu.py diff --git a/src/you_get/downloader/bilibili.py b/src/you_get/extractor/bilibili.py similarity index 100% rename from src/you_get/downloader/bilibili.py rename to src/you_get/extractor/bilibili.py diff --git a/src/you_get/downloader/blip.py b/src/you_get/extractor/blip.py similarity index 100% rename from src/you_get/downloader/blip.py rename to src/you_get/extractor/blip.py diff --git a/src/you_get/downloader/cntv.py b/src/you_get/extractor/cntv.py similarity index 100% rename from src/you_get/downloader/cntv.py rename to src/you_get/extractor/cntv.py diff --git a/src/you_get/downloader/coursera.py b/src/you_get/extractor/coursera.py similarity index 100% rename from src/you_get/downloader/coursera.py rename to src/you_get/extractor/coursera.py diff --git a/src/you_get/downloader/dailymotion.py b/src/you_get/extractor/dailymotion.py similarity index 100% rename from src/you_get/downloader/dailymotion.py rename to src/you_get/extractor/dailymotion.py diff --git a/src/you_get/downloader/douban.py b/src/you_get/extractor/douban.py similarity index 100% rename from src/you_get/downloader/douban.py rename to src/you_get/extractor/douban.py diff --git a/src/you_get/downloader/ehow.py b/src/you_get/extractor/ehow.py similarity index 100% rename from src/you_get/downloader/ehow.py rename to src/you_get/extractor/ehow.py diff --git a/src/you_get/downloader/facebook.py b/src/you_get/extractor/facebook.py similarity index 100% rename from src/you_get/downloader/facebook.py rename to src/you_get/extractor/facebook.py diff --git a/src/you_get/downloader/fivesing.py b/src/you_get/extractor/fivesing.py similarity index 100% rename from src/you_get/downloader/fivesing.py rename to src/you_get/extractor/fivesing.py diff --git a/src/you_get/downloader/freesound.py b/src/you_get/extractor/freesound.py similarity index 100% rename from src/you_get/downloader/freesound.py rename to src/you_get/extractor/freesound.py diff --git a/src/you_get/downloader/google.py b/src/you_get/extractor/google.py similarity index 100% rename from src/you_get/downloader/google.py rename to src/you_get/extractor/google.py diff --git a/src/you_get/downloader/ifeng.py b/src/you_get/extractor/ifeng.py similarity index 100% rename from src/you_get/downloader/ifeng.py rename to src/you_get/extractor/ifeng.py diff --git a/src/you_get/downloader/instagram.py b/src/you_get/extractor/instagram.py similarity index 100% rename from src/you_get/downloader/instagram.py rename to src/you_get/extractor/instagram.py diff --git a/src/you_get/downloader/iqiyi.py b/src/you_get/extractor/iqiyi.py similarity index 100% rename from src/you_get/downloader/iqiyi.py rename to src/you_get/extractor/iqiyi.py diff --git a/src/you_get/downloader/joy.py b/src/you_get/extractor/joy.py similarity index 100% rename from src/you_get/downloader/joy.py rename to src/you_get/extractor/joy.py diff --git a/src/you_get/downloader/khan.py b/src/you_get/extractor/khan.py similarity index 100% rename from src/you_get/downloader/khan.py rename to src/you_get/extractor/khan.py diff --git a/src/you_get/downloader/ku6.py b/src/you_get/extractor/ku6.py similarity index 100% rename from src/you_get/downloader/ku6.py rename to src/you_get/extractor/ku6.py diff --git a/src/you_get/downloader/miomio.py b/src/you_get/extractor/miomio.py similarity index 100% rename from src/you_get/downloader/miomio.py rename to src/you_get/extractor/miomio.py diff --git a/src/you_get/downloader/mixcloud.py b/src/you_get/extractor/mixcloud.py similarity index 100% rename from src/you_get/downloader/mixcloud.py rename to src/you_get/extractor/mixcloud.py diff --git a/src/you_get/downloader/netease.py b/src/you_get/extractor/netease.py similarity index 100% rename from src/you_get/downloader/netease.py rename to src/you_get/extractor/netease.py diff --git a/src/you_get/downloader/nicovideo.py b/src/you_get/extractor/nicovideo.py similarity index 100% rename from src/you_get/downloader/nicovideo.py rename to src/you_get/extractor/nicovideo.py diff --git a/src/you_get/downloader/pptv.py b/src/you_get/extractor/pptv.py similarity index 100% rename from src/you_get/downloader/pptv.py rename to src/you_get/extractor/pptv.py diff --git a/src/you_get/downloader/qq.py b/src/you_get/extractor/qq.py similarity index 100% rename from src/you_get/downloader/qq.py rename to src/you_get/extractor/qq.py diff --git a/src/you_get/downloader/sina.py b/src/you_get/extractor/sina.py similarity index 100% rename from src/you_get/downloader/sina.py rename to src/you_get/extractor/sina.py diff --git a/src/you_get/downloader/sohu.py b/src/you_get/extractor/sohu.py similarity index 100% rename from src/you_get/downloader/sohu.py rename to src/you_get/extractor/sohu.py diff --git a/src/you_get/downloader/songtaste.py b/src/you_get/extractor/songtaste.py similarity index 100% rename from src/you_get/downloader/songtaste.py rename to src/you_get/extractor/songtaste.py diff --git a/src/you_get/downloader/soundcloud.py b/src/you_get/extractor/soundcloud.py similarity index 100% rename from src/you_get/downloader/soundcloud.py rename to src/you_get/extractor/soundcloud.py diff --git a/src/you_get/downloader/ted.py b/src/you_get/extractor/ted.py similarity index 100% rename from src/you_get/downloader/ted.py rename to src/you_get/extractor/ted.py diff --git a/src/you_get/downloader/tudou.py b/src/you_get/extractor/tudou.py similarity index 100% rename from src/you_get/downloader/tudou.py rename to src/you_get/extractor/tudou.py diff --git a/src/you_get/downloader/tumblr.py b/src/you_get/extractor/tumblr.py similarity index 100% rename from src/you_get/downloader/tumblr.py rename to src/you_get/extractor/tumblr.py diff --git a/src/you_get/downloader/vid48.py b/src/you_get/extractor/vid48.py similarity index 100% rename from src/you_get/downloader/vid48.py rename to src/you_get/extractor/vid48.py diff --git a/src/you_get/downloader/vimeo.py b/src/you_get/extractor/vimeo.py similarity index 100% rename from src/you_get/downloader/vimeo.py rename to src/you_get/extractor/vimeo.py diff --git a/src/you_get/downloader/vine.py b/src/you_get/extractor/vine.py similarity index 100% rename from src/you_get/downloader/vine.py rename to src/you_get/extractor/vine.py diff --git a/src/you_get/downloader/w56.py b/src/you_get/extractor/w56.py similarity index 100% rename from src/you_get/downloader/w56.py rename to src/you_get/extractor/w56.py diff --git a/src/you_get/downloader/xiami.py b/src/you_get/extractor/xiami.py similarity index 100% rename from src/you_get/downloader/xiami.py rename to src/you_get/extractor/xiami.py diff --git a/src/you_get/downloader/yinyuetai.py b/src/you_get/extractor/yinyuetai.py similarity index 100% rename from src/you_get/downloader/yinyuetai.py rename to src/you_get/extractor/yinyuetai.py diff --git a/src/you_get/downloader/youku.py b/src/you_get/extractor/youku.py similarity index 100% rename from src/you_get/downloader/youku.py rename to src/you_get/extractor/youku.py diff --git a/src/you_get/downloader/youtube.py b/src/you_get/extractor/youtube.py similarity index 100% rename from src/you_get/downloader/youtube.py rename to src/you_get/extractor/youtube.py diff --git a/tests/test.py b/tests/test.py index 641878ef..5d21da2d 100644 --- a/tests/test.py +++ b/tests/test.py @@ -4,7 +4,7 @@ import unittest from you_get import * -from you_get.downloader.__main__ import url_to_module +from you_get.extractor.__main__ import url_to_module def test_urls(urls): for url in urls: diff --git a/you-get b/you-get index 86b44109..8bdc77c7 100755 --- a/you-get +++ b/you-get @@ -4,7 +4,7 @@ import os, sys __path__ = os.path.dirname(os.path.realpath(__file__)) __srcdir__ = 'src' sys.path.insert(1, os.path.join(__path__, __srcdir__)) -from you_get.downloader import main +from you_get.extractor import main if __name__ == '__main__': main() diff --git a/you-get.json b/you-get.json index 92114cff..dc988868 100644 --- a/you-get.json +++ b/you-get.json @@ -31,6 +31,6 @@ ], "console_scripts": [ - "you-get = you_get.downloader.__main__:main" + "you-get = you_get.extractor.__main__:main" ] } From 610365a37ca3a3f46cd6da67fa4a110e317513cb Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 11 Sep 2013 13:50:39 +0200 Subject: [PATCH 002/147] cli_wrapper --- src/you_get/__init__.py | 3 --- src/you_get/cli_wrapper/__init__.py | 0 src/you_get/cli_wrapper/downloader/__init__.py | 0 src/you_get/cli_wrapper/openssl/__init__.py | 0 src/you_get/cli_wrapper/player/__init__.py | 3 +++ src/you_get/cli_wrapper/player/__main__.py | 7 +++++++ src/you_get/cli_wrapper/player/dragonplayer.py | 0 src/you_get/cli_wrapper/player/gnome_mplayer.py | 0 src/you_get/cli_wrapper/player/mplayer.py | 0 src/you_get/cli_wrapper/player/vlc.py | 1 + src/you_get/cli_wrapper/player/wmp.py | 0 src/you_get/cli_wrapper/transcoder/__init__.py | 0 src/you_get/cli_wrapper/transcoder/ffmpeg.py | 0 src/you_get/cli_wrapper/transcoder/libav.py | 0 src/you_get/cli_wrapper/transcoder/mencoder.py | 0 15 files changed, 11 insertions(+), 3 deletions(-) create mode 100644 src/you_get/cli_wrapper/__init__.py create mode 100644 src/you_get/cli_wrapper/downloader/__init__.py create mode 100644 src/you_get/cli_wrapper/openssl/__init__.py create mode 100644 src/you_get/cli_wrapper/player/__init__.py create mode 100644 src/you_get/cli_wrapper/player/__main__.py create mode 100644 src/you_get/cli_wrapper/player/dragonplayer.py create mode 100644 src/you_get/cli_wrapper/player/gnome_mplayer.py create mode 100644 src/you_get/cli_wrapper/player/mplayer.py create mode 100644 src/you_get/cli_wrapper/player/vlc.py create mode 100644 src/you_get/cli_wrapper/player/wmp.py create mode 100644 src/you_get/cli_wrapper/transcoder/__init__.py create mode 100644 src/you_get/cli_wrapper/transcoder/ffmpeg.py create mode 100644 src/you_get/cli_wrapper/transcoder/libav.py create mode 100644 src/you_get/cli_wrapper/transcoder/mencoder.py diff --git a/src/you_get/__init__.py b/src/you_get/__init__.py index 7cae2415..f8ee6011 100644 --- a/src/you_get/__init__.py +++ b/src/you_get/__init__.py @@ -3,8 +3,5 @@ from .common import * from .version import * -# Easy import -#from .cli_wrapper.converter import * -#from .cli_wrapper.player import * from .cli_wrapper import * from .extractor import * diff --git a/src/you_get/cli_wrapper/__init__.py b/src/you_get/cli_wrapper/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/you_get/cli_wrapper/downloader/__init__.py b/src/you_get/cli_wrapper/downloader/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/you_get/cli_wrapper/openssl/__init__.py b/src/you_get/cli_wrapper/openssl/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/you_get/cli_wrapper/player/__init__.py b/src/you_get/cli_wrapper/player/__init__.py new file mode 100644 index 00000000..2f7636de --- /dev/null +++ b/src/you_get/cli_wrapper/player/__init__.py @@ -0,0 +1,3 @@ +#!/usr/bin/env python + +from .mplayer import * diff --git a/src/you_get/cli_wrapper/player/__main__.py b/src/you_get/cli_wrapper/player/__main__.py new file mode 100644 index 00000000..8d4958b9 --- /dev/null +++ b/src/you_get/cli_wrapper/player/__main__.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python + +def main(): + script_main('you-get', any_download, any_download_playlist) + +if __name__ == "__main__": + main() diff --git a/src/you_get/cli_wrapper/player/dragonplayer.py b/src/you_get/cli_wrapper/player/dragonplayer.py new file mode 100644 index 00000000..e69de29b diff --git a/src/you_get/cli_wrapper/player/gnome_mplayer.py b/src/you_get/cli_wrapper/player/gnome_mplayer.py new file mode 100644 index 00000000..e69de29b diff --git a/src/you_get/cli_wrapper/player/mplayer.py b/src/you_get/cli_wrapper/player/mplayer.py new file mode 100644 index 00000000..e69de29b diff --git a/src/you_get/cli_wrapper/player/vlc.py b/src/you_get/cli_wrapper/player/vlc.py new file mode 100644 index 00000000..4265cc3e --- /dev/null +++ b/src/you_get/cli_wrapper/player/vlc.py @@ -0,0 +1 @@ +#!/usr/bin/env python diff --git a/src/you_get/cli_wrapper/player/wmp.py b/src/you_get/cli_wrapper/player/wmp.py new file mode 100644 index 00000000..e69de29b diff --git a/src/you_get/cli_wrapper/transcoder/__init__.py b/src/you_get/cli_wrapper/transcoder/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/you_get/cli_wrapper/transcoder/ffmpeg.py b/src/you_get/cli_wrapper/transcoder/ffmpeg.py new file mode 100644 index 00000000..e69de29b diff --git a/src/you_get/cli_wrapper/transcoder/libav.py b/src/you_get/cli_wrapper/transcoder/libav.py new file mode 100644 index 00000000..e69de29b diff --git a/src/you_get/cli_wrapper/transcoder/mencoder.py b/src/you_get/cli_wrapper/transcoder/mencoder.py new file mode 100644 index 00000000..e69de29b From 2714ad86ef78be1abdbc8b3ece9a088fdbda93f9 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 12 Sep 2013 23:21:15 +0200 Subject: [PATCH 003/147] JPopsuki.tv is back --- src/you_get/extractor/__init__.py | 1 + src/you_get/extractor/__main__.py | 1 + src/you_get/extractor/jpopsuki.py | 23 +++++++++++++++++++++++ 3 files changed, 25 insertions(+) create mode 100644 src/you_get/extractor/jpopsuki.py diff --git a/src/you_get/extractor/__init__.py b/src/you_get/extractor/__init__.py index 99e331f4..874824fe 100644 --- a/src/you_get/extractor/__init__.py +++ b/src/you_get/extractor/__init__.py @@ -18,6 +18,7 @@ from .ifeng import * from .instagram import * from .iqiyi import * from .joy import * +from .jpopsuki import * from .ku6 import * from .miomio import * from .mixcloud import * diff --git a/src/you_get/extractor/__main__.py b/src/you_get/extractor/__main__.py index d5d88d04..ed7d5c10 100644 --- a/src/you_get/extractor/__main__.py +++ b/src/you_get/extractor/__main__.py @@ -38,6 +38,7 @@ def url_to_module(url): 'instagram': instagram, 'iqiyi': iqiyi, 'joy': joy, + 'jpopsuki': jpopsuki, 'kankanews': bilibili, 'ku6': ku6, 'miomio': miomio, diff --git a/src/you_get/extractor/jpopsuki.py b/src/you_get/extractor/jpopsuki.py new file mode 100644 index 00000000..cf4ec052 --- /dev/null +++ b/src/you_get/extractor/jpopsuki.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python + +__all__ = ['jpopsuki_download'] + +from ..common import * + +def jpopsuki_download(url, output_dir='.', merge=True, info_only=False): + html = get_html(url, faker=True) + + title = r1(r' Date: Fri, 13 Sep 2013 00:52:43 +0200 Subject: [PATCH 004/147] JPopsuki.tv is back --- README.md | 2 ++ README.txt | 1 + 2 files changed, 3 insertions(+) diff --git a/README.md b/README.md index 52d5d30c..001fd4b4 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,7 @@ Fork me on GitHub: * SoundCloud * Mixcloud * Freesound +* JPopsuki * VID48 * Niconico (ニコニコ動画) * Youku (优酷) @@ -251,6 +252,7 @@ You-Get基于优酷下载脚本[iambus/youku-lixian](https://github.com/iambus/y * SoundCloud * Mixcloud * Freesound +* JPopsuki * VID48 * NICONICO动画 * 优酷 diff --git a/README.txt b/README.txt index 7bd8a1ee..74cedfd3 100644 --- a/README.txt +++ b/README.txt @@ -32,6 +32,7 @@ Supported Sites (As of Now) * SoundCloud http://soundcloud.com * Mixcloud http://www.mixcloud.com * Freesound http://www.freesound.org +* JPopsuki http://jpopsuki.tv * VID48 http://vid48.com * Niconico (ニコニコ動画) http://www.nicovideo.jp * Youku (优酷) http://www.youku.com From 4d93cc3dc5c0cba5b4e4f95c7dbeccbb4d963de6 Mon Sep 17 00:00:00 2001 From: gongqijian Date: Sat, 21 Sep 2013 00:16:59 +0800 Subject: [PATCH 005/147] Baidu: fix #242 --- src/you_get/extractor/baidu.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/you_get/extractor/baidu.py b/src/you_get/extractor/baidu.py index 79d7053d..10127aeb 100755 --- a/src/you_get/extractor/baidu.py +++ b/src/you_get/extractor/baidu.py @@ -12,16 +12,16 @@ def baidu_get_song_html(sid): return get_html('http://music.baidu.com/song/%s/download?__o=%%2Fsong%%2F%s' % (sid, sid), faker = True) def baidu_get_song_url(html): - return r1(r'downlink="/data/music/file\?link=(.+?)"', html) + return r1(r'href="/data/music/file\?link=(http.+?)"', html) def baidu_get_song_artist(html): - return r1(r'singer_name:"(.+?)"', html) + return r1(r'singer_name\s*:\s*"(.+?)"', html) def baidu_get_song_album(html): - return r1(r'ablum_name:"(.+?)"', html) + return r1(r'album_name\s*:\s*"(.+?)"', html) def baidu_get_song_title(html): - return r1(r'song_title:"(.+?)"', html) + return r1(r'song_title\s*:\s*"(.+?)"', html) def baidu_download_lyric(sid, file_name, output_dir): if common.dry_run: @@ -41,6 +41,8 @@ def baidu_download_song(sid, output_dir = '.', merge = True, info_only = False): title = baidu_get_song_title(html) artist = baidu_get_song_artist(html) album = baidu_get_song_album(html) + assert url + type, ext, size = url_info(url, faker = True) print_info(site_info, title, type, size) if not info_only: From ced8a92873dc7e6156411913a9857118bf1eea97 Mon Sep 17 00:00:00 2001 From: Star Brilliant Date: Sat, 21 Sep 2013 09:57:29 +0800 Subject: [PATCH 006/147] Use decompressobj for zlib decompressing This solves issue #224 and #238 --- src/you_get/common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 8faf907a..0cb9c337 100644 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -146,7 +146,8 @@ def undeflate(data): (the zlib compression is used.) """ import zlib - return zlib.decompress(data, -zlib.MAX_WBITS) + decompressobj = zlib.decompressobj(-zlib.MAX_WBITS) + return decompressobj.decompress(data)+decompressobj.flush() # DEPRECATED in favor of get_content() def get_response(url, faker = False): From 47753422acbb0578c1ec9328618000225c70f64a Mon Sep 17 00:00:00 2001 From: nagev Date: Fri, 20 Sep 2013 05:09:27 +0100 Subject: [PATCH 007/147] Update youtube.py No longer works on VEVO encoded videos such as http://www.youtube.com/watch?v=3O1_3zBUKM8 This more general regular expression fixes it. --- src/you_get/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractor/youtube.py b/src/you_get/extractor/youtube.py index 51fbb07f..1b766cc2 100644 --- a/src/you_get/extractor/youtube.py +++ b/src/you_get/extractor/youtube.py @@ -45,7 +45,7 @@ def decipher(js, s): code = re.sub(r'(\w+).split\(""\)', r'list(\1)', code) return code - f1 = match1(js, r'g.sig\|\|(\w+)\(g.s\)') + f1 = match1(js, r'\w+\.sig\|\|(\w+)\(\w+\.\w+\)') f1def = match1(js, r'(function %s\(\w+\)\{[^\{]+\})' % f1) code = tr_js(f1def) f2 = match1(f1def, r'(\w+)\(\w+,\d+\)') From b6868cbc12e1c05e7101244080c0422811e48972 Mon Sep 17 00:00:00 2001 From: Fwolf Date: Tue, 17 Sep 2013 21:42:46 +0800 Subject: [PATCH 008/147] Sohu: Fix vid get error --- src/you_get/extractor/sohu.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/you_get/extractor/sohu.py b/src/you_get/extractor/sohu.py index 4400836a..dcaac7eb 100644 --- a/src/you_get/extractor/sohu.py +++ b/src/you_get/extractor/sohu.py @@ -12,8 +12,11 @@ def real_url(host, prot, file, new): return '%s%s?key=%s' % (start[:-1], new, key) def sohu_download(url, output_dir = '.', merge = True, info_only = False): - vid = r1('vid\s*=\s*"(\d+)"', get_html(url)) - + html = get_html(url) + vid = r1('vid\s*=\s*"(\d+)"', html) + if not vid: + vid = r1('vid\s*:\s*"(\d+)"', html) + if vid: data = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid)) for qtyp in ["oriVid","superVid","highVid" ,"norVid","relativeId"]: @@ -31,8 +34,9 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False): for file, new in zip(data['clipsURL'], data['su']): urls.append(real_url(host, prot, file, new)) assert data['clipsURL'][0].endswith('.mp4') - + else: + # my.tv link doesn't include clip info anymore, below block is useless vid = r1('vid\s*=\s*\'(\d+)\'', get_html(url)) data = json.loads(get_decoded_html('http://my.tv.sohu.com/videinfo.jhtml?m=viewnew&vid=%s' % vid)) host = data['allot'] @@ -45,7 +49,7 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False): for file, new in zip(data['clipsURL'], data['su']): urls.append(real_url(host, prot, file, new)) assert data['clipsURL'][0].endswith('.mp4') - + print_info(site_info, title, 'mp4', size) if not info_only: download_urls(urls, title, 'mp4', size, output_dir, refer = url, merge = merge) From aae2a6cfdc916c38349ecfdadb1085575292047d Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 21 Sep 2013 14:26:01 +0200 Subject: [PATCH 009/147] Baidu: fix download link from outside mainland China --- src/you_get/extractor/baidu.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/you_get/extractor/baidu.py b/src/you_get/extractor/baidu.py index 10127aeb..54b94ec4 100755 --- a/src/you_get/extractor/baidu.py +++ b/src/you_get/extractor/baidu.py @@ -36,13 +36,18 @@ def baidu_download_lyric(sid, file_name, output_dir): x.write(lrc) def baidu_download_song(sid, output_dir = '.', merge = True, info_only = False): - html = baidu_get_song_html(sid) - url = baidu_get_song_url(html) - title = baidu_get_song_title(html) - artist = baidu_get_song_artist(html) - album = baidu_get_song_album(html) - assert url - + try: + html = baidu_get_song_html(sid) + url = baidu_get_song_url(html) + title = baidu_get_song_title(html) + artist = baidu_get_song_artist(html) + album = baidu_get_song_album(html) + assert url + except: + html = get_html("http://music.baidu.com/song/%s" % sid) + url = r1(r'download_url="([^"]+)"', html) + title = r1(r'sname="([^"]+)"', html) + type, ext, size = url_info(url, faker = True) print_info(site_info, title, type, size) if not info_only: From 36be10805dc61903105dc3d1cd6322581092b0b9 Mon Sep 17 00:00:00 2001 From: Star Brilliant Date: Mon, 30 Sep 2013 22:59:28 +0800 Subject: [PATCH 010/147] Bilibili started to use ssl.bilibili.tv instead of secure.bilibili.tv now --- src/you_get/extractor/bilibili.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractor/bilibili.py b/src/you_get/extractor/bilibili.py index 8512d362..322b60fc 100644 --- a/src/you_get/extractor/bilibili.py +++ b/src/you_get/extractor/bilibili.py @@ -83,7 +83,7 @@ def bilibili_download(url, output_dir = '.', merge = True, info_only = False): title = unescape_html(title) title = escape_file_path(title) - flashvars = r1_of([r'player_params=\'(cid=\d+)', r'flashvars="([^"]+)"', r'"https://secure.bilibili.tv/secure,(cid=\d+)(?:&aid=\d+)?"'], html) + flashvars = r1_of([r'player_params=\'(cid=\d+)', r'flashvars="([^"]+)"', r'"https://[a-z]+\.bilibili\.tv/secure,(cid=\d+)(?:&aid=\d+)?"'], html) assert flashvars t, id = flashvars.split('=', 1) id = id.split('&')[0] From f11d819f9e72f07ff7ccabab1adf9605136adf64 Mon Sep 17 00:00:00 2001 From: gongqijian Date: Fri, 4 Oct 2013 21:15:04 +0800 Subject: [PATCH 011/147] Update baidu.py --- src/you_get/extractor/baidu.py | 111 ++++++++++++++++++++------------- 1 file changed, 69 insertions(+), 42 deletions(-) diff --git a/src/you_get/extractor/baidu.py b/src/you_get/extractor/baidu.py index 54b94ec4..b93b0333 100755 --- a/src/you_get/extractor/baidu.py +++ b/src/you_get/extractor/baidu.py @@ -8,52 +8,71 @@ from .. import common from urllib import parse -def baidu_get_song_html(sid): - return get_html('http://music.baidu.com/song/%s/download?__o=%%2Fsong%%2F%s' % (sid, sid), faker = True) +def baidu_get_song_data(sid): + data = json.loads(get_html('http://music.baidu.com/data/music/fmlink?songIds=%s' % sid, faker = True))['data'] -def baidu_get_song_url(html): - return r1(r'href="/data/music/file\?link=(http.+?)"', html) + if data['xcode'] != '': + # inside china mainland + return data['songList'][0] + else: + # outside china mainland + html = get_html("http://music.baidu.com/song/%s" % sid) -def baidu_get_song_artist(html): - return r1(r'singer_name\s*:\s*"(.+?)"', html) + # baidu pan link + sourceLink = r1(r'"link-src-info">', html) - if href: - lrc = get_html('http://music.baidu.com' + href) - if len(lrc) > 0: - with open(output_dir + "/" + file_name.replace('/', '-') + '.lrc', 'w') as x: - x.write(lrc) +def baidu_get_song_artist(data): + return data['artistName'] + +def baidu_get_song_album(data): + return data['albumName'] + +def baidu_get_song_title(data): + return data['songName'] + +def baidu_get_song_lyric(data): + lrc = data['lrcLink'] + return None if lrc is '' else "http://music.baidu.com%s" % lrc def baidu_download_song(sid, output_dir = '.', merge = True, info_only = False): - try: - html = baidu_get_song_html(sid) - url = baidu_get_song_url(html) - title = baidu_get_song_title(html) - artist = baidu_get_song_artist(html) - album = baidu_get_song_album(html) - assert url - except: - html = get_html("http://music.baidu.com/song/%s" % sid) - url = r1(r'download_url="([^"]+)"', html) - title = r1(r'sname="([^"]+)"', html) - + data = baidu_get_song_data(sid) + url = baidu_get_song_url(data) + title = baidu_get_song_title(data) + artist = baidu_get_song_artist(data) + album = baidu_get_song_album(data) + lrc = baidu_get_song_lyric(data) + + assert url + file_name = "%s - %s - %s" % (title, album, artist) + type, ext, size = url_info(url, faker = True) print_info(site_info, title, type, size) if not info_only: - file_name = "%s - %s - %s" % (title, album, artist) download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True) - baidu_download_lyric(sid, file_name, output_dir) + + if lrc: + type, ext, size = url_info(lrc, faker = True) + print_info(site_info, title, type, size) + if not info_only: + download_urls([lrc], file_name, ext, size, output_dir, faker = True) def baidu_download_album(aid, output_dir = '.', merge = True, info_only = False): html = get_html('http://music.baidu.com/album/%s' % aid, faker = True) @@ -63,32 +82,40 @@ def baidu_download_album(aid, output_dir = '.', merge = True, info_only = False) ids = json.loads(r1(r'', html).replace('"', '').replace(';', '"'))['ids'] track_nr = 1 for id in ids: - song_html = baidu_get_song_html(id) - song_url = baidu_get_song_url(song_html) - song_title = baidu_get_song_title(song_html) + song_data = baidu_get_song_data(id) + song_url = baidu_get_song_url(song_data) + song_title = baidu_get_song_title(song_data) + song_lrc = baidu_get_song_lyric(song_data) file_name = '%02d.%s' % (track_nr, song_title) + type, ext, size = url_info(song_url, faker = True) print_info(site_info, song_title, type, size) if not info_only: download_urls([song_url], file_name, ext, size, output_dir, merge = merge, faker = True) - baidu_download_lyric(id, file_name, output_dir) + + if song_lrc: + type, ext, size = url_info(song_lrc, faker = True) + print_info(site_info, song_title, type, size) + if not info_only: + download_urls([song_lrc], file_name, ext, size, output_dir, faker = True) + track_nr += 1 def baidu_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False): if re.match(r'http://pan.baidu.com', url): html = get_html(url) - + title = r1(r'server_filename="([^"]+)"', html) if len(title.split('.')) > 1: title = ".".join(title.split('.')[:-1]) - + real_url = r1(r'\\"dlink\\":\\"([^"]*)\\"', html).replace('\\\\/', '/') type, ext, size = url_info(real_url, faker = True) - + print_info(site_info, title, ext, size) if not info_only: download_urls([real_url], title, ext, size, output_dir, merge = merge) - + elif re.match(r'http://music.baidu.com/album/\d+', url): id = r1(r'http://music.baidu.com/album/(\d+)', url) baidu_download_album(id, output_dir, merge, info_only) From f412a9e9c57ca3849a575b9504a938e7785151f9 Mon Sep 17 00:00:00 2001 From: "Lee, Donggu" Date: Tue, 8 Oct 2013 14:00:00 +0000 Subject: [PATCH 012/147] fix length check --- src/you_get/extractor/tudou.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractor/tudou.py b/src/you_get/extractor/tudou.py index f28da697..31e90d85 100644 --- a/src/you_get/extractor/tudou.py +++ b/src/you_get/extractor/tudou.py @@ -8,7 +8,7 @@ def tudou_download_by_iid(iid, title, output_dir = '.', merge = True, info_only data = json.loads(get_decoded_html('http://www.tudou.com/outplay/goto/getItemSegs.action?iid=%s' % iid)) vids = [] for k in data: - if len(data[k]) == 1: + if len(data[k]) > 0: vids.append({"k": data[k][0]["k"], "size": data[k][0]["size"]}) temp = max(vids, key=lambda x:x["size"]) From f1ed97c500ef566714640c00369f801e7c01e22d Mon Sep 17 00:00:00 2001 From: "Lee, Donggu" Date: Tue, 8 Oct 2013 14:00:00 +0000 Subject: [PATCH 013/147] flash link can be downloaded --- src/you_get/extractor/sohu.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/you_get/extractor/sohu.py b/src/you_get/extractor/sohu.py index dcaac7eb..c364917f 100644 --- a/src/you_get/extractor/sohu.py +++ b/src/you_get/extractor/sohu.py @@ -36,9 +36,11 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False): assert data['clipsURL'][0].endswith('.mp4') else: - # my.tv link doesn't include clip info anymore, below block is useless - vid = r1('vid\s*=\s*\'(\d+)\'', get_html(url)) - data = json.loads(get_decoded_html('http://my.tv.sohu.com/videinfo.jhtml?m=viewnew&vid=%s' % vid)) + if re.match(r'http://share.vrs.sohu.com', url): + vid = r1('id=(\d+)', url) + else: + vid = r1('vid\s*=\s*\'(\d+)\'', get_html(url)) + data = json.loads(get_decoded_html('http://my.tv.sohu.com/play/videonew.do?vid=%s&referer=http://my.tv.sohu.com' % vid)) host = data['allot'] prot = data['prot'] urls = [] From 3966bcf9cb9146ceda9223c99368b1bd3ee28401 Mon Sep 17 00:00:00 2001 From: "Lee, Donggu" Date: Tue, 8 Oct 2013 14:00:00 +0000 Subject: [PATCH 014/147] removed a filename of the first letter is a period on unix system --- src/you_get/common.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 0cb9c337..b78ae0e3 100644 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -106,11 +106,7 @@ def filenameable(text): 0: None, ord('/'): '-', }) - if platform.system() == 'Darwin': # For Mac OS - text = text.translate({ - ord(':'): '-', - }) - elif platform.system() == 'Windows': # For Windows + if platform.system() == 'Windows': # For Windows text = text.translate({ ord(':'): '-', ord('*'): '-', @@ -124,6 +120,13 @@ def filenameable(text): ord('['): '(', ord(']'): ')', }) + else: + if text.startswith("."): + text = text[1:] + if platform.system() == 'Darwin': # For Mac OS + text = text.translate({ + ord(':'): '-', + }) return text def unescape_html(html): From 9393e770acf04bdb25f9de349a8f8642570dc564 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 8 Oct 2013 14:35:59 +0200 Subject: [PATCH 015/147] remove test_xiami because it fails CI build too often --- tests/test.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/test.py b/tests/test.py index 5d21da2d..76b39ac5 100644 --- a/tests/test.py +++ b/tests/test.py @@ -28,11 +28,6 @@ class YouGetTests(unittest.TestCase): "http://vimeo.com/56810854", ]) - def test_xiami(self): - test_urls([ - "http://www.xiami.com/song/1769835121", - ]) - def test_youtube(self): test_urls([ "http://www.youtube.com/watch?v=pzKerr0JIPA", From 557926e5151c5f2f5d75eeb6c612e5915a75e0b3 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 8 Oct 2013 14:43:03 +0200 Subject: [PATCH 016/147] add version badge --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 001fd4b4..e8b0b775 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # You-Get -[![Build Status](https://api.travis-ci.org/soimort/you-get.png)](https://travis-ci.org/soimort/you-get) +[![Build Status](https://api.travis-ci.org/soimort/you-get.png)](https://travis-ci.org/soimort/you-get) [![PyPI version](https://badge.fury.io/py/you-get.png)](http://badge.fury.io/py/you-get) [You-Get](https://github.com/soimort/you-get) is a video downloader runs on Python 3. It aims at easing the download of videos on [YouTube](http://www.youtube.com), [Youku](http://www.youku.com)/[Tudou](http://www.tudou.com) (biggest online video providers in China), [ Niconico](http://www.nicovideo.jp), etc., in one script. From e7ce66c5655f8320cf8dfda1266ac505454cdaf3 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 18 Oct 2013 06:16:06 +0200 Subject: [PATCH 017/147] PPTV: fix multiple segments --- src/you_get/extractor/pptv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractor/pptv.py b/src/you_get/extractor/pptv.py index 4fd88e5b..b1d45edf 100644 --- a/src/you_get/extractor/pptv.py +++ b/src/you_get/extractor/pptv.py @@ -14,7 +14,7 @@ def pptv_download_by_id(id, title = None, output_dir = '.', merge = True, info_o key = r1(r']+>([^<>]+)', xml) rid = r1(r'rid="([^"]+)"', xml) title = r1(r'nm="([^"]+)"', xml) - pieces = re.findall(']+fs="(\d+)"', xml) numbers, fs = zip(*pieces) urls = ['http://%s/%s/%s?k=%s' % (host, i, rid, key) for i in numbers] total_size = sum(map(int, fs)) From acfd3b4e2a5a85a1c50e60c8d1e363ea2b61698d Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 18 Oct 2013 14:29:19 +0200 Subject: [PATCH 018/147] TED: fix #256 --- src/you_get/extractor/ted.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractor/ted.py b/src/you_get/extractor/ted.py index 167da2a8..23a7054c 100644 --- a/src/you_get/extractor/ted.py +++ b/src/you_get/extractor/ted.py @@ -11,7 +11,7 @@ def ted_download(url, output_dir = '.', merge = True, info_only = False): title = line.replace("", "").replace("", "").replace("\t", "") title = title[:title.find(' | ')] if line.find("no-flash-video-download") > -1: - url = line.replace(' Date: Fri, 18 Oct 2013 14:31:08 +0200 Subject: [PATCH 019/147] add tests for TED --- tests/test.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test.py b/tests/test.py index 76b39ac5..452fbcd8 100644 --- a/tests/test.py +++ b/tests/test.py @@ -23,6 +23,12 @@ class YouGetTests(unittest.TestCase): "http://www.mixcloud.com/DJVadim/north-america-are-you-ready/", ]) + def test_ted(self): + test_urls([ + "http://www.ted.com/talks/jennifer_lin_improvs_piano_magic.html", + "http://www.ted.com/talks/derek_paravicini_and_adam_ockelford_in_the_key_of_genius.html", + ]) + def test_vimeo(self): test_urls([ "http://vimeo.com/56810854", From bf5d2dfbcea26922e37108e0e28b77490576b5a8 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 18 Oct 2013 14:49:29 +0200 Subject: [PATCH 020/147] YouTube: fix #252 --- src/you_get/common.py | 5 ++++- src/you_get/extractor/youtube.py | 4 +++- tests/test.py | 1 + 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index b78ae0e3..ceb07646 100644 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -85,7 +85,10 @@ def parse_query_param(url, param): The value of the parameter. """ - return parse.parse_qs(parse.urlparse(url).query)[param][0] + try: + return parse.parse_qs(parse.urlparse(url).query)[param][0] + except: + return None def unicodize(text): return re.sub(r'\\u([0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])', lambda x: chr(int(x.group(0)[2:], 16)), text) diff --git a/src/you_get/extractor/youtube.py b/src/you_get/extractor/youtube.py index 1b766cc2..96f6987b 100644 --- a/src/you_get/extractor/youtube.py +++ b/src/you_get/extractor/youtube.py @@ -107,7 +107,9 @@ def youtube_download(url, output_dir='.', merge=True, info_only=False): """Downloads YouTube videos by URL. """ - id = match1(url, r'youtu.be/([^/]+)') or parse_query_param(url, 'v') + id = match1(url, r'youtu.be/([^/]+)') or \ + parse_query_param(url, 'v') or \ + parse_query_param(parse_query_param(url, 'u'), 'v') assert id youtube_download_by_id(id, title=None, output_dir=output_dir, merge=merge, info_only=info_only) diff --git a/tests/test.py b/tests/test.py index 452fbcd8..9ecf4d68 100644 --- a/tests/test.py +++ b/tests/test.py @@ -38,4 +38,5 @@ class YouGetTests(unittest.TestCase): test_urls([ "http://www.youtube.com/watch?v=pzKerr0JIPA", "http://youtu.be/pzKerr0JIPA", + "http://www.youtube.com/attribution_link?u=/watch?v%3DldAKIzq7bvs%26feature%3Dshare" ]) From 8b5ae3a60940472d26c00eadc6eacb58bc6adb73 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 18 Oct 2013 15:03:01 +0200 Subject: [PATCH 021/147] version 0.3.22 --- CHANGELOG.txt | 17 +++++++++++++++++ README.txt | 2 ++ src/you_get/version.py | 4 ++-- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 67cbb1fb..fca035b8 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,6 +1,23 @@ Changelog ========= +0.3.22 +------ + +*Date: 2013-10-18* + +* Fix issues for: + - Baidu + - Bilibili + - JPopsuki TV + - Niconico + - PPTV + - TED + - Tumblr + - YinYueTai + - YouTube + - ... + 0.3.21 ------ diff --git a/README.txt b/README.txt index 74cedfd3..c4b68af8 100644 --- a/README.txt +++ b/README.txt @@ -3,6 +3,8 @@ You-Get .. image:: https://api.travis-ci.org/soimort/you-get.png +.. image:: https://badge.fury.io/py/you-get.png + `You-Get `_ is a video downloader runs on Python 3. It aims at easing the download of videos on `YouTube `_, `Youku `_/`Tudou `_ (biggest online video providers in China), `Niconico `_, etc., in one script. See the project homepage http://www.soimort.org/you-get for further documentation. diff --git a/src/you_get/version.py b/src/you_get/version.py index 43c2747b..bf14f9c8 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,5 +1,5 @@ #!/usr/bin/env python __all__ = ['__version__', '__date__'] -__version__ = '0.3.21' -__date__ = '2013-08-17' +__version__ = '0.3.22' +__date__ = '2013-10-18' From bfc85f8a49ee1eec0cf743664c3741d3303a7632 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 18 Oct 2013 23:03:56 +0200 Subject: [PATCH 022/147] Miomio: add Sina as video source --- src/you_get/extractor/miomio.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractor/miomio.py b/src/you_get/extractor/miomio.py index cbdad765..bac1f64c 100644 --- a/src/you_get/extractor/miomio.py +++ b/src/you_get/extractor/miomio.py @@ -4,6 +4,7 @@ __all__ = ['miomio_download'] from ..common import * +from .sina import sina_download_by_vid from .tudou import tudou_download_by_id from .youku import youku_download_by_id @@ -16,9 +17,11 @@ def miomio_download(url, output_dir = '.', merge = True, info_only = False): t = r1(r'type=(\w+)', flashvars) id = r1(r'vid=([^"]+)', flashvars) if t == 'youku': - youku_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only) + youku_download_by_id(id, title, output_dir=output_dir, merge=merge, info_only=info_only) elif t == 'tudou': - tudou_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only) + tudou_download_by_id(id, title, output_dir=output_dir, merge=merge, info_only=info_only) + elif t == 'sina': + sina_download_by_vid(id, title, output_dir=output_dir, merge=merge, info_only=info_only) else: raise NotImplementedError(flashvars) From 1b75cfcc473116d787e4f0908e58a020b90d8768 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 18 Oct 2013 23:38:24 +0200 Subject: [PATCH 023/147] Sina: fix #246 --- src/you_get/extractor/sina.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractor/sina.py b/src/you_get/extractor/sina.py index af030a9e..33cc0c7c 100644 --- a/src/you_get/extractor/sina.py +++ b/src/you_get/extractor/sina.py @@ -5,7 +5,7 @@ __all__ = ['sina_download', 'sina_download_by_vid', 'sina_download_by_vkey'] from ..common import * def video_info(id): - xml = get_content('http://v.iask.com/v_play.php?vid=%s' % id, decoded=True) + xml = get_content('http://www.tucao.cc/api/sina.php?vid=%s' % id, decoded=True) urls = re.findall(r'(?:)?', xml) name = match1(xml, r'(?:)?') vstr = match1(xml, r'(?:)?') From d4855581a60021a84a7b63f4666d70185d6d64e7 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 23 Oct 2013 05:32:53 +0200 Subject: [PATCH 024/147] I can't believe you-get didn't download YouTube playlists (fix #35) --- src/you_get/extractor/youtube.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractor/youtube.py b/src/you_get/extractor/youtube.py index 96f6987b..9ff08e07 100644 --- a/src/you_get/extractor/youtube.py +++ b/src/you_get/extractor/youtube.py @@ -103,6 +103,15 @@ def youtube_download_by_id(id, title=None, output_dir='.', merge=True, info_only if not info_only: download_urls([url], title, ext, size, output_dir, merge = merge) +def youtube_list_download_by_id(list_id, title=None, output_dir='.', merge=True, info_only=False): + """Downloads a YouTube video list by its unique id. + """ + + video_page = get_content('http://www.youtube.com/playlist?list=%s' % list_id) + ids = set(re.findall(r' Date: Wed, 23 Oct 2013 06:11:09 +0200 Subject: [PATCH 025/147] support general short URLs, fix #162 --- src/you_get/extractor/__main__.py | 33 +++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/src/you_get/extractor/__main__.py b/src/you_get/extractor/__main__.py index ed7d5c10..f8d9ecea 100644 --- a/src/you_get/extractor/__main__.py +++ b/src/you_get/extractor/__main__.py @@ -5,14 +5,13 @@ from ..extractor import * from ..common import * def url_to_module(url): - site = r1(r'http://([^/]+)/', url) - assert site, 'invalid url: ' + url + video_host = r1(r'http://([^/]+)/', url) + video_url = r1(r'http://[^/]+(.*)', url) + assert video_host and video_url, 'invalid url: ' + url - if site.endswith('.com.cn'): - site = site[:-3] - domain = r1(r'(\.[^.]+\.[^.]+)$', site) - if not domain: - domain = site + if video_host.endswith('.com.cn'): + video_host = video_host[:-3] + domain = r1(r'(\.[^.]+\.[^.]+)$', video_host) or video_host assert domain, 'unsupported url: ' + url k = r1(r'([^.]+)', domain) @@ -68,14 +67,28 @@ def url_to_module(url): if k in downloads: return downloads[k] else: - raise NotImplementedError(url) + import http.client + conn = http.client.HTTPConnection(video_host) + conn.request("HEAD", video_url) + res = conn.getresponse() + location = res.getheader('location') + if location is None: + raise NotImplementedError(url) + else: + return url_to_module(location), location def any_download(url, output_dir = '.', merge = True, info_only = False): - m = url_to_module(url) + try: + m, url = url_to_module(url) + except: + m = url_to_module(url) m.download(url, output_dir = output_dir, merge = merge, info_only = info_only) def any_download_playlist(url, output_dir = '.', merge = True, info_only = False): - m = url_to_module(url) + try: + m, url = url_to_module(url) + except: + m = url_to_module(url) m.download_playlist(url, output_dir = output_dir, merge = merge, info_only = info_only) def main(): From 5afdf2661ae7c79a58e7b887a4a52c305f466992 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 23 Oct 2013 06:22:43 +0200 Subject: [PATCH 026/147] version 0.3.23 --- CHANGELOG.txt | 10 ++++++++++ src/you_get/version.py | 4 ++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index fca035b8..c4d7da6c 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,6 +1,16 @@ Changelog ========= +0.3.23 +------ + +*Date: 2013-10-23* + +* Support YouTube playlists +* Support general short URLs +* Fix issues for: + - Sina + 0.3.22 ------ diff --git a/src/you_get/version.py b/src/you_get/version.py index bf14f9c8..2295fbec 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,5 +1,5 @@ #!/usr/bin/env python __all__ = ['__version__', '__date__'] -__version__ = '0.3.22' -__date__ = '2013-10-18' +__version__ = '0.3.23' +__date__ = '2013-10-23' From b5dc362a5290a9889c181325c27ad1dd51b230aa Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 23 Oct 2013 18:32:56 +0200 Subject: [PATCH 027/147] Vimeo: fix #259 --- src/you_get/extractor/vimeo.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/you_get/extractor/vimeo.py b/src/you_get/extractor/vimeo.py index 1a18dfbb..60611f74 100644 --- a/src/you_get/extractor/vimeo.py +++ b/src/you_get/extractor/vimeo.py @@ -5,19 +5,16 @@ __all__ = ['vimeo_download', 'vimeo_download_by_id'] from ..common import * def vimeo_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): - html = get_html('http://vimeo.com/%s' % id, faker = True) + video_page = get_content('http://player.vimeo.com/video/%s' % id, headers=fake_headers) + title = r1(r'([^<]+)', video_page) + info = dict(re.findall(r'"([^"]+)":\{[^{]+"url":"([^"]+)"', video_page)) + for quality in ['hd', 'sd', 'mobile']: + if quality in info: + url = info[quality] + break + assert url - signature = r1(r'"signature":"([^"]+)"', html) - timestamp = r1(r'"timestamp":([^,]+)', html) - hd = r1(r',"hd":(\d+),', html) - - title = r1(r'"title":"([^"]+)"', html) - title = escape_file_path(title) - - url = 'http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s' % (id, signature, timestamp) - if hd == "1": - url += '&quality=hd' - type, ext, size = url_info(url, faker = True) + type, ext, size = url_info(url, faker=True) print_info(site_info, title, type, size) if not info_only: From fea0b658efd185b853e778598b2c9550b4292d29 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 29 Oct 2013 22:11:17 +0100 Subject: [PATCH 028/147] add new module: util.log --- src/you_get/common.py | 9 +-- src/you_get/util/fs.py | 0 src/you_get/util/log.py | 121 ++++++++++++++++++++++++++++++++++++++++ src/you_get/version.py | 1 + 4 files changed, 127 insertions(+), 4 deletions(-) create mode 100644 src/you_get/util/fs.py create mode 100644 src/you_get/util/log.py diff --git a/src/you_get/common.py b/src/you_get/common.py index ceb07646..4ad45053 100644 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -10,6 +10,7 @@ from urllib import request, parse import platform from .version import __version__ +from .util import log dry_run = False force = False @@ -772,8 +773,8 @@ def script_main(script_name, download, download_playlist = None): try: opts, args = getopt.getopt(sys.argv[1:], short_opts, opts) except getopt.GetoptError as err: - print(err) - print(help) + log.e(err) + log.e("try 'you-get --help' for more options") sys.exit(2) info_only = False @@ -811,8 +812,8 @@ def script_main(script_name, download, download_playlist = None): elif o in ('-x', '--http-proxy'): proxy = a else: - print(help) - sys.exit(1) + log.e("try 'you-get --help' for more options") + sys.exit(2) if not args: print(help) sys.exit() diff --git a/src/you_get/util/fs.py b/src/you_get/util/fs.py new file mode 100644 index 00000000..e69de29b diff --git a/src/you_get/util/log.py b/src/you_get/util/log.py new file mode 100644 index 00000000..738aa209 --- /dev/null +++ b/src/you_get/util/log.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python + +from ..version import __name__ + +import os, sys + +# Is terminal ANSI/VT100 compatible +if os.getenv('TERM') in ( + 'xterm', + 'vt100', + 'linux', + 'eterm-color', + ): + has_colors = True +else: + try: + ppid = os.getppid() + has_colors = (os.popen('ps -p %d -ocomm=' % ppid).read().strip() + == 'emacs') + except: + has_colors = False + +# ANSI/VT100 escape code +# http://en.wikipedia.org/wiki/ANSI_escape_code +colors = { + 'none': '', + 'reset': '\033[0m', + + 'black': '\033[30m', + 'bold-black': '\033[30;1m', + 'dark-gray': '\033[90m', + 'bold-dark-gray': '\033[90;1m', + + 'red': '\033[31m', + 'bold-red': '\033[31;1m', + 'light-red': '\033[91m', + 'bold-light-red': '\033[91;1m', + + 'green': '\033[32m', + 'bold-green': '\033[32;1m', + 'light-green': '\033[92m', + 'bold-light-green': '\033[92;1m', + + 'yellow': '\033[33m', + 'bold-yellow': '\033[33;1m', + 'light-yellow': '\033[93m', + 'bold-light-yellow': '\033[93;1m', + + 'blue': '\033[34m', + 'bold-blue': '\033[34;1m', + 'light-blue': '\033[94m', + 'bold-light-blue': '\033[94;1m', + + 'magenta': '\033[35m', + 'bold-magenta': '\033[35;1m', + 'light-magenta': '\033[95m', + 'bold-light-magenta': '\033[95;1m', + + 'cyan': '\033[36m', + 'bold-cyan': '\033[36;1m', + 'light-cyan': '\033[96m', + 'bold-light-cyan': '\033[96;1m', + + 'light-gray': '\033[37m', + 'bold-light-gray': '\033[37;1m', + 'white': '\033[97m', + 'bold-white': '\033[97;1m', +} + +def println(message): + """Prints a log message. + """ + sys.stderr.write("{0}: {1}\n".format(__name__, message)) + +def writeln(color, message): + """Prints a colorful log message. + """ + if color in colors: + sys.stderr.write("{0}{1}: {2}{3}\n".format(colors[color], __name__, message, colors['reset'])) + else: + sys.stderr.write("{0}: {1}\n".format(__name__, message)) + +def i(message): + """Sends an info log message. + """ + if has_colors: + writeln('white', message) + else: + println(message) + +def d(message): + """Sends a debug log message. + """ + if has_colors: + writeln('blue', message) + else: + println(message) + +def w(message): + """Sends a warning log message. + """ + if has_colors: + writeln('yellow', message) + else: + println(message) + +def e(message): + """Sends an error log message. + """ + if has_colors: + writeln('light-red', message) + else: + println(message) + +def wtf(message): + """What a Terrible Failure. + """ + if has_colors: + writeln('bold-red', message) + else: + println(message) diff --git a/src/you_get/version.py b/src/you_get/version.py index 2295fbec..7c09b706 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,5 +1,6 @@ #!/usr/bin/env python __all__ = ['__version__', '__date__'] +__name__ = 'you-get' __version__ = '0.3.23' __date__ = '2013-10-23' From a1172526a2998beaed43583c70725cfcad217e2b Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 29 Oct 2013 22:19:28 +0100 Subject: [PATCH 029/147] add __init__.py --- src/you_get/util/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/you_get/util/__init__.py diff --git a/src/you_get/util/__init__.py b/src/you_get/util/__init__.py new file mode 100644 index 00000000..e69de29b From ac30ee318a3974a2e787c1b14b16e9e2546152a4 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 29 Oct 2013 23:49:33 +0100 Subject: [PATCH 030/147] update module: util.log --- src/you_get/util/log.py | 67 ++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 34 deletions(-) diff --git a/src/you_get/util/log.py b/src/you_get/util/log.py index 738aa209..6f02c3a1 100644 --- a/src/you_get/util/log.py +++ b/src/you_get/util/log.py @@ -14,6 +14,7 @@ if os.getenv('TERM') in ( has_colors = True else: try: + # Eshell ppid = os.getppid() has_colors = (os.popen('ps -p %d -ocomm=' % ppid).read().strip() == 'emacs') @@ -67,55 +68,53 @@ colors = { 'bold-white': '\033[97;1m', } -def println(message): - """Prints a log message. - """ - sys.stderr.write("{0}: {1}\n".format(__name__, message)) - -def writeln(color, message): - """Prints a colorful log message. +def println(text, color=None, ostream=sys.stdout): + """Prints a text line to stream. """ if color in colors: - sys.stderr.write("{0}{1}: {2}{3}\n".format(colors[color], __name__, message, colors['reset'])) + ostream.write("{0}{1}{2}\n".format(colors[color], text, colors['reset'])) else: - sys.stderr.write("{0}: {1}\n".format(__name__, message)) + ostream.write("{0}\n".format(text)) -def i(message): +def printlog(message, color=None, ostream=sys.stderr): + """Prints a log message to stream. + """ + if color in colors: + ostream.write("{0}{1}: {2}{3}\n".format(colors[color], __name__, message, colors['reset'])) + else: + ostream.write("{0}: {1}\n".format(__name__, message)) + +def i(message, ostream=sys.stderr): """Sends an info log message. """ - if has_colors: - writeln('white', message) - else: - println(message) + printlog(message, + 'white' if has_colors else None, + ostream=ostream) -def d(message): +def d(message, ostream=sys.stderr): """Sends a debug log message. """ - if has_colors: - writeln('blue', message) - else: - println(message) + printlog(message, + 'blue' if has_colors else None, + ostream=ostream) -def w(message): +def w(message, ostream=sys.stderr): """Sends a warning log message. """ - if has_colors: - writeln('yellow', message) - else: - println(message) + printlog(message, + 'yellow' if has_colors else None, + ostream=ostream) -def e(message): +def e(message, ostream=sys.stderr): """Sends an error log message. """ - if has_colors: - writeln('light-red', message) - else: - println(message) + printlog(message, + 'bold-yellow' if has_colors else None, + ostream=ostream) -def wtf(message): +def wtf(message, ostream=sys.stderr): """What a Terrible Failure. """ - if has_colors: - writeln('bold-red', message) - else: - println(message) + printlog(message, + 'bold-red' if has_colors else None, + ostream=ostream) From 5c27e57d8c1c2211f06a4ea1ca2a4aa3b8b10032 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 29 Oct 2013 23:53:21 +0100 Subject: [PATCH 031/147] update __init__.py --- src/you_get/util/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/you_get/util/__init__.py b/src/you_get/util/__init__.py index e69de29b..b097d246 100644 --- a/src/you_get/util/__init__.py +++ b/src/you_get/util/__init__.py @@ -0,0 +1,3 @@ + +from .fs import * +from .log import * From c3f5f6a320e9a922717238128a6a58bb509327c7 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 30 Oct 2013 00:19:08 +0100 Subject: [PATCH 032/147] add new module: util.fs --- src/you_get/common.py | 9 +++++---- src/you_get/util/fs.py | 45 ++++++++++++++++++++++++++++++++++++++++++ tests/test_util.py | 11 +++++++++++ 3 files changed, 61 insertions(+), 4 deletions(-) create mode 100644 tests/test_util.py diff --git a/src/you_get/common.py b/src/you_get/common.py index 4ad45053..2c99976f 100644 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -10,7 +10,7 @@ from urllib import request, parse import platform from .version import __version__ -from .util import log +from .util import log, legitimize dry_run = False force = False @@ -94,7 +94,7 @@ def parse_query_param(url, param): def unicodize(text): return re.sub(r'\\u([0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])', lambda x: chr(int(x.group(0)[2:], 16)), text) -# DEPRECATED in favor of filenameable() +# DEPRECATED in favor of util.legitimize() def escape_file_path(path): path = path.replace('/', '-') path = path.replace('\\', '-') @@ -102,6 +102,7 @@ def escape_file_path(path): path = path.replace('?', '-') return path +# DEPRECATED in favor of util.legitimize() def filenameable(text): """Converts a string to a legal filename through various OSes. """ @@ -509,7 +510,7 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None, traceback.print_exc(file = sys.stdout) pass - title = filenameable(title) + title = legitimize(title) filename = '%s.%s' % (title, ext) filepath = os.path.join(output_dir, filename) @@ -585,7 +586,7 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir = '.', refer assert ext in ('ts') - title = filenameable(title) + title = legitimize(title) filename = '%s.%s' % (title, 'ts') filepath = os.path.join(output_dir, filename) diff --git a/src/you_get/util/fs.py b/src/you_get/util/fs.py index e69de29b..09aa48a9 100644 --- a/src/you_get/util/fs.py +++ b/src/you_get/util/fs.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python + +import platform + +def legitimize(text, os=platform.system()): + """Converts a string to a valid filename. + """ + + # POSIX systems + text = text.translate({ + 0: None, + ord('/'): '-', + }) + + if os == 'Windows': + # Windows (non-POSIX namespace) + text = text[:255] # Trim to 255 Unicode characters long + text = text.translate({ + # Reserved in Windows VFAT and NTFS + ord(':'): '-', + ord('*'): '-', + ord('?'): '-', + ord('\\'): '-', + ord('|'): '-', + ord('\"'): '\'', + # Reserved in Windows VFAT + ord('+'): '-', + ord('<'): '-', + ord('>'): '-', + ord('['): '(', + ord(']'): ')', + }) + else: + # *nix + if os == 'Darwin': + # Mac OS HFS+ + text = text.translate({ + ord(':'): '-', + }) + + # Remove leading . + if text.startswith("."): + text = text[1:] + + return text diff --git a/tests/test_util.py b/tests/test_util.py new file mode 100644 index 00000000..0b7b0231 --- /dev/null +++ b/tests/test_util.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python + +import unittest + +from you_get.util import * + +class TestUtil(unittest.TestCase): + def test_legitimize(self): + self.assertEqual(legitimize("1*2", os="Linux"), "1*2") + self.assertEqual(legitimize("1*2", os="Darwin"), "1*2") + self.assertEqual(legitimize("1*2", os="Windows"), "1-2") From 26b2be3f7c1c57cc01b74c39800e86059bcd4a7f Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 30 Oct 2013 03:37:47 +0100 Subject: [PATCH 033/147] change log.i() to no color --- src/you_get/util/log.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/util/log.py b/src/you_get/util/log.py index 6f02c3a1..bb3c6b79 100644 --- a/src/you_get/util/log.py +++ b/src/you_get/util/log.py @@ -88,7 +88,7 @@ def i(message, ostream=sys.stderr): """Sends an info log message. """ printlog(message, - 'white' if has_colors else None, + None, ostream=ostream) def d(message, ostream=sys.stderr): From 1027b925383ef46beb60369a1b561ccf981fc1be Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 30 Oct 2013 04:28:09 +0100 Subject: [PATCH 034/147] add log.underlined() --- src/you_get/util/log.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/you_get/util/log.py b/src/you_get/util/log.py index bb3c6b79..299152d0 100644 --- a/src/you_get/util/log.py +++ b/src/you_get/util/log.py @@ -68,10 +68,15 @@ colors = { 'bold-white': '\033[97;1m', } +def underlined(text): + """Returns an underlined text. + """ + return "\33[4m%s\33[24m" % text if has_colors else text + def println(text, color=None, ostream=sys.stdout): """Prints a text line to stream. """ - if color in colors: + if has_colors and color in colors: ostream.write("{0}{1}{2}\n".format(colors[color], text, colors['reset'])) else: ostream.write("{0}\n".format(text)) @@ -79,7 +84,7 @@ def println(text, color=None, ostream=sys.stdout): def printlog(message, color=None, ostream=sys.stderr): """Prints a log message to stream. """ - if color in colors: + if has_colors and color in colors: ostream.write("{0}{1}: {2}{3}\n".format(colors[color], __name__, message, colors['reset'])) else: ostream.write("{0}: {1}\n".format(__name__, message)) From 8919897ae884f8a26698da3cf3619d20a242f170 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 30 Oct 2013 07:29:44 +0100 Subject: [PATCH 035/147] add new module: util.sogou_proxy --- src/you_get/common.py | 78 +++++++++++++++--- src/you_get/util/__init__.py | 2 + src/you_get/util/sogou_proxy.py | 141 ++++++++++++++++++++++++++++++++ 3 files changed, 209 insertions(+), 12 deletions(-) create mode 100644 src/you_get/util/sogou_proxy.py diff --git a/src/you_get/common.py b/src/you_get/common.py index 2c99976f..2233d60d 100644 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -8,12 +8,15 @@ import re import sys from urllib import request, parse import platform +import threading from .version import __version__ -from .util import log, legitimize +from .util import log, legitimize, sogou_proxy_server dry_run = False force = False +sogou_proxy = None +sogou_env = None fake_headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', @@ -715,6 +718,35 @@ def print_info(site_info, title, type, size): print("Size: ", round(size / 1048576, 2), "MB (" + str(size) + " Bytes)") print() +def parse_host(host): + """Parses host name and port number from a string. + """ + if re.match(r'^(\d+)$', host) is not None: + return ("0.0.0.0", int(host)) + if re.match(r'^(\w+)://', host) is None: + host = "//" + host + o = parse.urlparse(host) + hostname = o.hostname or "0.0.0.0" + port = o.port or 0 + return (hostname, port) + +def get_sogou_proxy(): + return sogou_proxy + +def set_proxy(proxy): + proxy_handler = request.ProxyHandler({ + 'http': '%s:%s' % proxy, + 'https': '%s:%s' % proxy, + }) + opener = request.build_opener(proxy_handler) + request.install_opener(opener) + +def unset_proxy(): + proxy_handler = request.ProxyHandler({}) + opener = request.build_opener(proxy_handler) + request.install_opener(opener) + +# DEPRECATED in favor of set_proxy() and unset_proxy() def set_http_proxy(proxy): if proxy == None: # Use system default setting proxy_support = request.ProxyHandler() @@ -766,7 +798,7 @@ def script_main(script_name, download, download_playlist = None): ''' short_opts = 'Vhfiuno:x:' - opts = ['version', 'help', 'force', 'info', 'url', 'no-merge', 'no-proxy', 'debug', 'output-dir=', 'http-proxy='] + opts = ['version', 'help', 'force', 'info', 'url', 'no-merge', 'no-proxy', 'debug', 'sogou', 'output-dir=', 'http-proxy=', 'sogou-proxy=', 'sogou-env='] if download_playlist: short_opts = 'l' + short_opts opts = ['playlist'] + opts @@ -778,6 +810,11 @@ def script_main(script_name, download, download_playlist = None): log.e("try 'you-get --help' for more options") sys.exit(2) + global force + global dry_run + global sogou_proxy + global sogou_env + info_only = False playlist = False merge = True @@ -793,12 +830,10 @@ def script_main(script_name, download, download_playlist = None): print(help) sys.exit() elif o in ('-f', '--force'): - global force force = True elif o in ('-i', '--info'): info_only = True elif o in ('-u', '--url'): - global dry_run dry_run = True elif o in ('-l', '--playlist'): playlist = True @@ -812,19 +847,38 @@ def script_main(script_name, download, download_playlist = None): output_dir = a elif o in ('-x', '--http-proxy'): proxy = a + elif o in ('--sogou'): + sogou_proxy = ("0.0.0.0", 0) + elif o in ('--sogou-proxy'): + sogou_proxy = parse_host(a) + elif o in ('--sogou-env'): + sogou_env = a else: log.e("try 'you-get --help' for more options") sys.exit(2) if not args: - print(help) - sys.exit() + if sogou_proxy is not None: + try: + if sogou_env is not None: + server = sogou_proxy_server(sogou_proxy, network_env=sogou_env) + else: + server = sogou_proxy_server(sogou_proxy) + server.serve_forever() + except KeyboardInterrupt: + if traceback: + raise + else: + sys.exit() + else: + print(help) + sys.exit() set_http_proxy(proxy) - - if traceback: + + try: download_main(download, download_playlist, args, playlist, output_dir, merge, info_only) - else: - try: - download_main(download, download_playlist, args, playlist, output_dir, merge, info_only) - except KeyboardInterrupt: + except KeyboardInterrupt: + if traceback: + raise + else: sys.exit(1) diff --git a/src/you_get/util/__init__.py b/src/you_get/util/__init__.py index b097d246..4c43c5fa 100644 --- a/src/you_get/util/__init__.py +++ b/src/you_get/util/__init__.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python from .fs import * from .log import * +from .sogou_proxy import * diff --git a/src/you_get/util/sogou_proxy.py b/src/you_get/util/sogou_proxy.py new file mode 100644 index 00000000..ffdc0b7a --- /dev/null +++ b/src/you_get/util/sogou_proxy.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python + +# Original code from: +# http://xiaoxia.org/2011/03/26/using-python-to-write-a-local-sogou-proxy-server-procedures/ + +from . import log + +from http.client import HTTPResponse +from http.server import BaseHTTPRequestHandler, HTTPServer +from socketserver import ThreadingMixIn +from threading import Thread +import random, socket, struct, sys, time + +def sogou_proxy_server( + host=("0.0.0.0", 0), + network_env='CERNET', + ostream=sys.stderr): + """ + """ + + x_sogou_auth = '9CD285F1E7ADB0BD403C22AD1D545F40/30/853edc6d49ba4e27' + proxy_host = 'h0.cnc.bj.ie.sogou.com' + proxy_port = 80 + + def sogou_hash(t, host): + s = (t + host + 'SogouExplorerProxy').encode('ascii') + code = len(s) + dwords = int(len(s) / 4) + rest = len(s) % 4 + v = struct.unpack(str(dwords) + 'i' + str(rest) + 's', s) + for vv in v: + if type(vv) != bytes: + a = (vv & 0xFFFF) + b = (vv >> 16) + code += a + code = code ^ (((code << 5) ^ b) << 0xb) + # To avoid overflows + code &= 0xffffffff + code += code >> 0xb + if rest == 3: + code += s[len(s) - 2] * 256 + s[len(s) - 3] + code = code ^ ((code ^ (s[len(s) - 1]) * 4) << 0x10) + code &= 0xffffffff + code += code >> 0xb + elif rest == 2: + code += (s[len(s) - 1]) * 256 + (s[len(s) - 2]) + code ^= code << 0xb + code &= 0xffffffff + code += code >> 0x11 + elif rest == 1: + code += s[len(s) - 1] + code ^= code << 0xa + code &= 0xffffffff + code += code >> 0x1 + code ^= code * 8 + code &= 0xffffffff + code += code >> 5 + code ^= code << 4 + code = code & 0xffffffff + code += code >> 0x11 + code ^= code << 0x19 + code = code & 0xffffffff + code += code >> 6 + code = code & 0xffffffff + return hex(code)[2:].rstrip('L').zfill(8) + + class Handler(BaseHTTPRequestHandler): + _socket = None + def do_proxy(self): + try: + if self._socket is None: + self._socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self._socket.connect((proxy_host, proxy_port)) + self._socket.send(self.requestline.encode('ascii') + b'\r\n') + log.d(self.requestline, ostream) + + # Add Sogou Verification Tags + self.headers['X-Sogou-Auth'] = x_sogou_auth + t = hex(int(time.time()))[2:].rstrip('L').zfill(8) + self.headers['X-Sogou-Tag'] = sogou_hash(t, self.headers['Host']) + self.headers['X-Sogou-Timestamp'] = t + self._socket.send(str(self.headers).encode('ascii') + b'\r\n') + + # Send POST data + if self.command == 'POST': + self._socket.send(self.rfile.read(int(self.headers['Content-Length']))) + response = HTTPResponse(self._socket, method=self.command) + response.begin() + + # Response + status = 'HTTP/1.1 %s %s' % (response.status, response.reason) + self.wfile.write(status.encode('ascii') + b'\r\n') + h = '' + for hh, vv in response.getheaders(): + if hh.upper() != 'TRANSFER-ENCODING': + h += hh + ': ' + vv + '\r\n' + self.wfile.write(h.encode('ascii') + b'\r\n') + while True: + response_data = response.read(8192) + if len(response_data) == 0: + break + self.wfile.write(response_data) + + except socket.error: + log.e('Socket error for ' + self.requestline, ostream) + + def do_POST(self): + self.do_proxy() + + def do_GET(self): + self.do_proxy() + + class ThreadingHTTPServer(ThreadingMixIn, HTTPServer): + pass + + # Server starts + log.printlog('Sogou Proxy Mini-Server', color='bold-green', ostream=ostream) + + try: + server = ThreadingHTTPServer(host, Handler) + except Exception as ex: + log.wtf("Socket error: %s" % ex, ostream) + exit(1) + host = server.server_address + + if network_env.upper() == 'CERNET': + proxy_host = 'h%s.edu.bj.ie.sogou.com' % random.randint(0, 10) + elif network_env.upper() == 'CTCNET': + proxy_host = 'h%s.ctc.bj.ie.sogou.com' % random.randint(0, 3) + elif network_env.upper() == 'CNCNET': + proxy_host = 'h%s.cnc.bj.ie.sogou.com' % random.randint(0, 3) + elif network_env.upper() == 'DXT': + proxy_host = 'h%s.dxt.bj.ie.sogou.com' % random.randint(0, 10) + else: + proxy_host = 'h%s.edu.bj.ie.sogou.com' % random.randint(0, 10) + + log.i('Remote host: %s' % log.underlined(proxy_host), ostream) + log.i('Proxy server running on %s' % + log.underlined("%s:%s" % host), ostream) + + return server From 8e4bc2f9fa239b735c842d9e5dee23bffa783ff9 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 30 Oct 2013 07:32:06 +0100 Subject: [PATCH 036/147] enable Sogou proxy for Sohu --- src/you_get/extractor/sohu.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/you_get/extractor/sohu.py b/src/you_get/extractor/sohu.py index c364917f..a084f116 100644 --- a/src/you_get/extractor/sohu.py +++ b/src/you_get/extractor/sohu.py @@ -17,6 +17,14 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False): if not vid: vid = r1('vid\s*:\s*"(\d+)"', html) + # Open Sogou proxy if required + if get_sogou_proxy() is not None: + server = sogou_proxy_server(get_sogou_proxy(), ostream=open(os.devnull, 'w')) + server_thread = threading.Thread(target=server.serve_forever) + server_thread.daemon = True + server_thread.start() + set_proxy(server.server_address) + if vid: data = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid)) for qtyp in ["oriVid","superVid","highVid" ,"norVid","relativeId"]: @@ -52,6 +60,11 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False): urls.append(real_url(host, prot, file, new)) assert data['clipsURL'][0].endswith('.mp4') + # Close Sogou proxy if required + if get_sogou_proxy() is not None: + server.shutdown() + unset_proxy() + print_info(site_info, title, 'mp4', size) if not info_only: download_urls(urls, title, 'mp4', size, output_dir, refer = url, merge = merge) From ee5dd8f2dd6f224e5e8434c67e6ec903643f2774 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 30 Oct 2013 07:32:29 +0100 Subject: [PATCH 037/147] enable Sogou proxy for Youku --- src/you_get/extractor/youku.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/you_get/extractor/youku.py b/src/you_get/extractor/youku.py index 20c79c4d..4abedc97 100644 --- a/src/you_get/extractor/youku.py +++ b/src/you_get/extractor/youku.py @@ -121,7 +121,21 @@ def file_type_of_url(url): return str(re.search(r'/st/([^/]+)/', url).group(1)) def youku_download_by_id(id, title, output_dir = '.', stream_type = None, merge = True, info_only = False): + # Open Sogou proxy if required + if get_sogou_proxy() is not None: + server = sogou_proxy_server(get_sogou_proxy(), ostream=open(os.devnull, 'w')) + server_thread = threading.Thread(target=server.serve_forever) + server_thread.daemon = True + server_thread.start() + set_proxy(server.server_address) + info = get_info(id) + + # Close Sogou proxy if required + if get_sogou_proxy() is not None: + server.shutdown() + unset_proxy() + urls, sizes = zip(*find_video(info, stream_type)) ext = file_type_of_url(urls[0]) total_size = sum(sizes) From a498ebe2bfa3788d22ffb7547e50b05f4dd89b56 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 30 Oct 2013 07:36:41 +0100 Subject: [PATCH 038/147] TERM=screen (Tmux) compatible with ANSI/VT100 --- src/you_get/util/log.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/you_get/util/log.py b/src/you_get/util/log.py index 299152d0..c28fd4e7 100644 --- a/src/you_get/util/log.py +++ b/src/you_get/util/log.py @@ -10,6 +10,7 @@ if os.getenv('TERM') in ( 'vt100', 'linux', 'eterm-color', + 'screen', ): has_colors = True else: From 86c29abf6ee11996565bd4de710d56c254722a40 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 30 Oct 2013 07:41:29 +0100 Subject: [PATCH 039/147] correct binary prefix --- src/you_get/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 2233d60d..b96c8992 100644 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -715,7 +715,7 @@ def print_info(site_info, title, type, size): print("Video Site:", site_info) print("Title: ", tr(title)) print("Type: ", type_info) - print("Size: ", round(size / 1048576, 2), "MB (" + str(size) + " Bytes)") + print("Size: ", round(size / 1048576, 2), "MiB (" + str(size) + " Bytes)") print() def parse_host(host): From 4f7dd97e0246c5af76e6986457ce1b492c1527f6 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 30 Oct 2013 07:46:22 +0100 Subject: [PATCH 040/147] docstring for sogou_proxy_server() --- src/you_get/util/sogou_proxy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/util/sogou_proxy.py b/src/you_get/util/sogou_proxy.py index ffdc0b7a..01ffb572 100644 --- a/src/you_get/util/sogou_proxy.py +++ b/src/you_get/util/sogou_proxy.py @@ -15,7 +15,7 @@ def sogou_proxy_server( host=("0.0.0.0", 0), network_env='CERNET', ostream=sys.stderr): - """ + """Returns a Sogou proxy server object. """ x_sogou_auth = '9CD285F1E7ADB0BD403C22AD1D545F40/30/853edc6d49ba4e27' From 53aec49f852adc70e5d779e5fa517720ff149dba Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 30 Oct 2013 07:54:30 +0100 Subject: [PATCH 041/147] update help message --- src/you_get/common.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index b96c8992..25df1090 100644 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -792,12 +792,14 @@ def script_main(script_name, download, download_playlist = None): -u | --url Display the real URLs of videos without downloading. -n | --no-merge Don't merge video parts. -o | --output-dir Set the output directory for downloaded videos. - -x | --http-proxy Use specific HTTP proxy for downloading. + -x | --http-proxy Use specific HTTP proxy for downloading. --no-proxy Don't use any proxy. (ignore $http_proxy) + -S | --sogou Use a Sogou proxy server for downloading. + --sogou-proxy Run a standalone Sogou proxy server. --debug Show traceback on KeyboardInterrupt. ''' - short_opts = 'Vhfiuno:x:' + short_opts = 'VhfiunSo:x:' opts = ['version', 'help', 'force', 'info', 'url', 'no-merge', 'no-proxy', 'debug', 'sogou', 'output-dir=', 'http-proxy=', 'sogou-proxy=', 'sogou-env='] if download_playlist: short_opts = 'l' + short_opts @@ -847,7 +849,7 @@ def script_main(script_name, download, download_playlist = None): output_dir = a elif o in ('-x', '--http-proxy'): proxy = a - elif o in ('--sogou'): + elif o in ('-S', '--sogou'): sogou_proxy = ("0.0.0.0", 0) elif o in ('--sogou-proxy'): sogou_proxy = parse_host(a) From 117e03cdc89edf4fd03740ad399ee39779a7708e Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 30 Oct 2013 08:03:16 +0100 Subject: [PATCH 042/147] update README.md: move zh-CN document to wiki --- README.md | 227 +----------------------------------------------------- 1 file changed, 2 insertions(+), 225 deletions(-) diff --git a/README.md b/README.md index e8b0b775..1f70fb4a 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,8 @@ See the project homepage for further documentation. +中文说明:请参见[wiki](https://github.com/soimort/you-get/wiki/%E4%B8%AD%E6%96%87%E8%AF%B4%E6%98%8E)。 + Fork me on GitHub: ## Features @@ -206,228 +208,3 @@ You-Get is licensed under the [MIT license](https://raw.github.com/soimort/you-g ## Contributing Please see [CONTRIBUTING.md](https://github.com/soimort/you-get/blob/master/CONTRIBUTING.md). - - - -*** - - - -# You-Get - 中文说明 - -[You-Get](https://github.com/soimort/you-get)是一个基于Python 3的视频下载工具。之所以写它的主要原因是,我找不到一个现成的下载工具能够同时支持[YouTube](http://www.youtube.com/)和[优酷](http://www.youku.com/);而且,几乎所有以前的视频下载程序都是基于Python 2的。 - -项目主页: - -GitHub地址: - -## 特点 - -### 说明 - -You-Get基于优酷下载脚本[iambus/youku-lixian](https://github.com/iambus/youku-lixian)用Python 3改写而成,增加了以下功能: - -* 支持YouTube、Vimeo等国外视频网站 -* 支持断点续传 -* 可设置HTTP代理 - -### 支持的站点(截至目前) - -已实现对以下站点的支持,以后会陆续增加(・∀・) - -* YouTube -* Vimeo -* Coursera -* Blip -* Dailymotion -* eHow -* Facebook -* Google+ -* Google Drive -* Khan Academy -* TED -* Tumblr -* Vine -* Instagram -* SoundCloud -* Mixcloud -* Freesound -* JPopsuki -* VID48 -* NICONICO动画 -* 优酷 -* 土豆 -* 音悦台 -* AcFun -* bilibili -* CNTV -* 豆瓣 -* 凤凰视频 -* 爱奇艺 -* 激动网 -* 酷6网 -* MioMio -* 网易视频 -* PPTV -* 腾讯视频 -* 新浪视频 -* 搜狐视频 -* 56网 -* 虾米 -* 5sing -* 百度音乐 -* 百度网盘 -* SongTaste -* Alive.in.th - -## 依赖 - -* [Python 3](http://www.python.org/download/releases/) -* __(可选)__ [FFmpeg](http://ffmpeg.org) - * 用于转换与合并视频文件。 - -## 安装说明 - -(以下命令格式均以Linux shell为例) - -### 1. 通过[Pip](http://www.pip-installer.org/)安装: - - $ pip install you-get - - 检查安装是否成功: - - $ you-get -V - -### 2. 通过[EasyInstall](http://pypi.python.org/pypi/setuptools)安装: - - $ easy_install you-get - - 检查安装是否成功: - - $ you-get -V - -### 3. 从Git安装: - - $ git clone git://github.com/soimort/you-get.git - - 在不安装的情况下直接使用脚本: - - $ cd you-get/ - $ ./you-get -V - - 若要将Python package安装到系统默认路径,执行: - - $ make install - - 检查安装是否成功: - - $ you-get -V - -### 4. 直接下载(从): - - $ wget -O you-get.zip https://github.com/soimort/you-get/zipball/master - $ unzip you-get.zip - - 在不安装的情况下直接使用脚本: - - $ cd soimort-you-get-*/ - $ ./you-get -V - - 若要将Python package安装到系统默认路径,执行: - - $ make install - - 检查安装是否成功: - - $ you-get -V - -### 5. 从[AUR (Arch User Repository)](http://aur.archlinux.org/)安装: - - 点击[这里](https://aur.archlinux.org/packages.php\?ID=62576)。 - -### 升级: - -使用Pip: - - $ pip install --upgrade you-get - -### FAQ(针对Windows用户): - -* Q:我不知道该如何在Windows下安装。 - -* A:不需要安装。直接把`you-get`目录放到系统`%PATH%`中。 - -* Q:出现错误提示`UnicodeDecodeError: 'gbk' codec can't decode byte 0xb0 in position 1012: illegal multibyte sequence`。 - -* A:执行`set PYTHONIOENCODING=utf-8`。 - -## 使用方法示例 - -### 如何下载视频 - -显示视频信息,但不进行下载(`-i`或`--info`选项): - - $ you-get -i http://www.yinyuetai.com/video/463772 - -下载视频: - - $ you-get http://www.yinyuetai.com/video/463772 - -下载多个视频: - - $ you-get http://www.yinyuetai.com/video/463772 http://www.yinyuetai.com/video/471500 - -若当前目录下已有与视频标题同名的文件,下载时会自动跳过。若有同名的`.download`临时文件,程序会从上次中断处开始下载。 -如要强制重新下载该视频,可使用`-f`(`--force`)选项: - - $ you-get -f http://www.yinyuetai.com/video/463772 - -`-l`(`--playlist`)选项用于下载播放列表(只对某些网站适用): - - $ you-get -l http://www.youku.com/playlist_show/id_5344313.html - -__注:从0.1.3以后的版本起,`-l`选项不再必须。You-Get可以自动识别并处理播放列表的下载。__ - -指定视频文件的下载目录: - - $ you-get -o ~/Downloads http://www.yinyuetai.com/video/463772 - -显示详细帮助: - - $ you-get -h - -### 如何设置代理 - -默认情况下,Python自动使用系统的代理配置。可以通过环境变量`http_proxy`来设置系统的HTTP代理。 - -`-x`(`--http-proxy`)选项用于手动指定You-Get所使用的HTTP代理。例如:GoAgent的代理服务器是`http://127.0.0.1:8087`,则通过该代理下载某YouTube视频的命令是: - - $ you-get -x 127.0.0.1:8087 http://www.youtube.com/watch?v=KbtO_Ayjw0M - -Windows下的自由门等翻墙软件会自动设置系统全局代理,因此无需指定HTTP代理即可下载YouTube视频: - - $ you-get http://www.youtube.com/watch?v=KbtO_Ayjw0M - -如果不希望程序在下载过程中使用任何代理(包括系统的代理配置),可以显式地指定`--no-proxy`选项: - - $ you-get --no-proxy http://v.youku.com/v_show/id_XMjI0ODc1NTc2.html - -### 断点续传 - -下载未完成时被中止(因为`Ctrl+C`终止程序或者网络中断等原因),在目标路径中会有一个扩展名为`.download`的临时文件。 - -下次运行只要在目标路径中找到相应的`.download`临时文件,程序会自动从中断处继续下载。(除非指定了`-f`选项) - -## 使用Python 2? - -优酷等国内视频网站的下载,请移步:[iambus/youku-lixian](https://github.com/iambus/youku-lixian) - -YouTube等国外视频网站的下载,请移步:[rg3/youtube-dl](https://github.com/rg3/youtube-dl) - -## 许可证 - -You-Get在[MIT License](https://raw.github.com/soimort/you-get/master/LICENSE.txt)下发布。 - -## 如何参与贡献 / 报告issue - -请阅读 [CONTRIBUTING.md](https://github.com/soimort/you-get/blob/master/CONTRIBUTING.md)。 From 127611a66bb61d4a472f055a3d48b18fc41f9bc3 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 30 Oct 2013 08:06:33 +0100 Subject: [PATCH 043/147] update README.md: move zh-CN document to wiki --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1f70fb4a..a41ea451 100644 --- a/README.md +++ b/README.md @@ -6,10 +6,10 @@ See the project homepage for further documentation. -中文说明:请参见[wiki](https://github.com/soimort/you-get/wiki/%E4%B8%AD%E6%96%87%E8%AF%B4%E6%98%8E)。 - Fork me on GitHub: +__中文说明__已移至[wiki](https://github.com/soimort/you-get/wiki/%E4%B8%AD%E6%96%87%E8%AF%B4%E6%98%8E)。 + ## Features ### Supported Sites (As of Now) From addb3b1c33c12d5177c8412d28fe116fa6d9d56a Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 30 Oct 2013 08:09:15 +0100 Subject: [PATCH 044/147] update README.md --- README.md | 38 +++++++++++++++----------------------- 1 file changed, 15 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index a41ea451..60071617 100644 --- a/README.md +++ b/README.md @@ -175,31 +175,23 @@ By default, Python will apply the system proxy settings (i.e. environment variab For a complete list of all available options, see: $ you-get --help - -## Examples (For Developers) - -In Python 3 (interactive): - - >>> from you_get.downloader import * - >>> youtube.download("http://www.youtube.com/watch?v=8bQlxQJEzLk", info_only = True) - Video Site: YouTube.com - Title: If you're good at something, never do it for free! - Type: WebM video (video/webm) - Size: 0.13 MB (133176 Bytes) + Usage: you-get [OPTION]... [URL]... - >>> import you_get - >>> you_get.any_download("http://www.youtube.com/watch?v=sGwy8DsUJ4M") - Video Site: YouTube.com - Title: Mort from Madagascar LIKES - Type: WebM video (video/webm) - Size: 1.78 MB (1867072 Bytes) + Startup options: + -V | --version Display the version and exit. + -h | --help Print this help and exit. - Downloading Mort from Madagascar LIKES.webm ... - 100.0% ( 1.8/1.8 MB) [========================================] 1/1 - -## API Reference - -See source code. + Download options (use with URLs): + -f | --force Force overwriting existed files. + -i | --info Display the information of videos without downloading. + -u | --url Display the real URLs of videos without downloading. + -n | --no-merge Don't merge video parts. + -o | --output-dir Set the output directory for downloaded videos. + -x | --http-proxy Use specific HTTP proxy for downloading. + --no-proxy Don't use any proxy. (ignore $http_proxy) + -S | --sogou Use a Sogou proxy server for downloading. + --sogou-proxy Run a standalone Sogou proxy server. + --debug Show traceback on KeyboardInterrupt. ## License From 740cef54d428e4145a5e64dd7339239ae617d59f Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 30 Oct 2013 08:09:27 +0100 Subject: [PATCH 045/147] update README.txt --- README.txt | 40 +++++++++++++++------------------------- 1 file changed, 15 insertions(+), 25 deletions(-) diff --git a/README.txt b/README.txt index c4b68af8..83c84ba0 100644 --- a/README.txt +++ b/README.txt @@ -166,33 +166,23 @@ Command-Line Options For a complete list of all available options, see:: $ you-get --help - -Examples (For Developers) -------------------------- - -In Python 3 (interactive):: - - >>> from you_get.downloader import * - >>> youtube.download("http://www.youtube.com/watch?v=8bQlxQJEzLk", info_only = True) - Video Site: YouTube.com - Title: If you're good at something, never do it for free! - Type: WebM video (video/webm) - Size: 0.13 MB (133176 Bytes) + Usage: you-get [OPTION]... [URL]... - >>> import you_get - >>> you_get.any_download("http://www.youtube.com/watch?v=sGwy8DsUJ4M") - Video Site: YouTube.com - Title: Mort from Madagascar LIKES - Type: WebM video (video/webm) - Size: 1.78 MB (1867072 Bytes) + Startup options: + -V | --version Display the version and exit. + -h | --help Print this help and exit. - Downloading Mort from Madagascar LIKES.webm ... - 100.0% ( 1.8/1.8 MB) [========================================] 1/1 - -API Reference -------------- - -See source code. + Download options (use with URLs): + -f | --force Force overwriting existed files. + -i | --info Display the information of videos without downloading. + -u | --url Display the real URLs of videos without downloading. + -n | --no-merge Don't merge video parts. + -o | --output-dir Set the output directory for downloaded videos. + -x | --http-proxy Use specific HTTP proxy for downloading. + --no-proxy Don't use any proxy. (ignore $http_proxy) + -S | --sogou Use a Sogou proxy server for downloading. + --sogou-proxy Run a standalone Sogou proxy server. + --debug Show traceback on KeyboardInterrupt. License ------- From 06ba0b58da67f6cd1f089cbd917c250133fc8dcf Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 30 Oct 2013 08:42:34 +0100 Subject: [PATCH 046/147] update README.md --- README.md | 38 +++++++++++--------------------------- 1 file changed, 11 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 60071617..d8a89bae 100644 --- a/README.md +++ b/README.md @@ -67,23 +67,15 @@ __中文说明__已移至[wiki](https://github.com/soimort/you-get/wiki/%E4%B8%A ## Installation -### 1. Install via [Pip](http://www.pip-installer.org/): +### 1. Install via Pip: - $ pip install you-get + $ [sudo] pip install you-get Check if the installation was successful: $ you-get -V -### 2. Install via [EasyInstall](http://pypi.python.org/pypi/setuptools): - - $ easy_install you-get - - Check if the installation was successful: - - $ you-get -V - -### 3. Install from Git: +### 2. Install from Git: $ git clone git://github.com/soimort/you-get.git @@ -100,7 +92,7 @@ __中文说明__已移至[wiki](https://github.com/soimort/you-get/wiki/%E4%B8%A $ you-get -V -### 4. Direct download (from ): +### 3. Direct download (from ): $ wget -O you-get.zip https://github.com/soimort/you-get/zipball/master $ unzip you-get.zip @@ -118,27 +110,19 @@ __中文说明__已移至[wiki](https://github.com/soimort/you-get/wiki/%E4%B8%A $ you-get -V -### 5. Install from [AUR (Arch User Repository)](http://aur.archlinux.org/): +### 4. Install from your distro's repo: - Click [here](https://aur.archlinux.org/packages.php\?ID=62576). +* __AUR (Arch)__: -### Upgrading: +* __Overlay (Gentoo)__: + +## Upgrading Using Pip: - $ pip install --upgrade you-get + $ [sudo] pip install --upgrade you-get -### FAQ (For Windows Users): - -* Q: I don't know how to install it on Windows. - -* A: Then don't do it. Just put your `you-get` folder into system `%PATH%`. - -* Q: I got something like `UnicodeDecodeError: 'gbk' codec can't decode byte 0xb0 in position 1012: illegal multibyte sequence`. - -* A: Run `set PYTHONIOENCODING=utf-8`. - -## Examples (For End-Users) +## Examples Display the information of the video without downloading: From 884ac10b17d60cb3dc2a6d6b0d08bf81d9c2a08e Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 30 Oct 2013 08:42:39 +0100 Subject: [PATCH 047/147] update README.txt --- README.txt | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/README.txt b/README.txt index 83c84ba0..02a9408e 100644 --- a/README.txt +++ b/README.txt @@ -72,17 +72,9 @@ Dependencies Installation ------------ -#) Install via `Pip `_:: +#) Install via Pip:: - $ pip install you-get - - Check if the installation was successful:: - - $ you-get -V - -#) Install via `EasyInstall `_:: - - $ easy_install you-get + $ [sudo] pip install you-get Check if the installation was successful:: @@ -123,12 +115,21 @@ Installation $ you-get -V -#) Install from `AUR (Arch User Repository) `_: +#) Install from your distro's repo: - Click `here `_. +* `AUR (Arch) `_ -Examples (For End-Users) ------------------------- +* `Overlay (Gentoo) `_ + +Upgrading +--------- + +Using Pip:: + + $ [sudo] pip install --upgrade you-get + +Examples +-------- Display the information of the video without downloading:: From 35c9e3336c3acebaacd40272f6e8819469a874f8 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 30 Oct 2013 08:49:28 +0100 Subject: [PATCH 048/147] version 0.3.24 --- CHANGELOG.txt | 9 +++++++++ src/you_get/version.py | 4 ++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index c4d7da6c..d0409f10 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,6 +1,15 @@ Changelog ========= +0.3.24 +------ + +*Date: 2013-10-30* + +* Experimental: Sogou proxy server +* Fix issues for: + - Vimeo + 0.3.23 ------ diff --git a/src/you_get/version.py b/src/you_get/version.py index 7c09b706..4e983583 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -2,5 +2,5 @@ __all__ = ['__version__', '__date__'] __name__ = 'you-get' -__version__ = '0.3.23' -__date__ = '2013-10-23' +__version__ = '0.3.24' +__date__ = '2013-10-30' From d4e6542f17802c526e382036ab137e9ea052114c Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 31 Oct 2013 23:51:27 +0100 Subject: [PATCH 049/147] Sohu: fix #264 --- src/you_get/extractor/sohu.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/you_get/extractor/sohu.py b/src/you_get/extractor/sohu.py index a084f116..9a1e109b 100644 --- a/src/you_get/extractor/sohu.py +++ b/src/you_get/extractor/sohu.py @@ -12,10 +12,12 @@ def real_url(host, prot, file, new): return '%s%s?key=%s' % (start[:-1], new, key) def sohu_download(url, output_dir = '.', merge = True, info_only = False): - html = get_html(url) - vid = r1('vid\s*=\s*"(\d+)"', html) - if not vid: - vid = r1('vid\s*:\s*"(\d+)"', html) + if re.match(r'http://share.vrs.sohu.com', url): + vid = r1('id=(\d+)', url) + else: + html = get_html(url) + vid = r1(r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?', html) + assert vid # Open Sogou proxy if required if get_sogou_proxy() is not None: @@ -25,7 +27,7 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False): server_thread.start() set_proxy(server.server_address) - if vid: + if re.match(r'http://tv.sohu.com/', url): data = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid)) for qtyp in ["oriVid","superVid","highVid" ,"norVid","relativeId"]: hqvid = data['data'][qtyp] @@ -44,10 +46,6 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False): assert data['clipsURL'][0].endswith('.mp4') else: - if re.match(r'http://share.vrs.sohu.com', url): - vid = r1('id=(\d+)', url) - else: - vid = r1('vid\s*=\s*\'(\d+)\'', get_html(url)) data = json.loads(get_decoded_html('http://my.tv.sohu.com/play/videonew.do?vid=%s&referer=http://my.tv.sohu.com' % vid)) host = data['allot'] prot = data['prot'] From 987ee1cbf390a7b7f2bfdee9dcdb21c27c781ea4 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 31 Oct 2013 23:51:46 +0100 Subject: [PATCH 050/147] add test for: Sohu --- tests/test.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/test.py b/tests/test.py index 9ecf4d68..5f7968eb 100644 --- a/tests/test.py +++ b/tests/test.py @@ -23,6 +23,15 @@ class YouGetTests(unittest.TestCase): "http://www.mixcloud.com/DJVadim/north-america-are-you-ready/", ]) + def test_sohu(self): + test_urls([ + "http://tv.sohu.com/20120522/n343785589.shtml", + "http://tv.sohu.com/20130103/n362246415.shtml", + "http://tv.sohu.com/20130103/n362251239.shtml", + "http://my.tv.sohu.com/us/4559763/51981774.shtml", + "http://my.tv.sohu.com/us/47390785/61594248.shtml", + ]) + def test_ted(self): test_urls([ "http://www.ted.com/talks/jennifer_lin_improvs_piano_magic.html", From 33a39191a625d86a5059f4433cd66b50e6dfdf60 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 11 Nov 2013 19:03:47 +0100 Subject: [PATCH 051/147] YouTube: fix youtube_list_download_by_id --- src/you_get/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractor/youtube.py b/src/you_get/extractor/youtube.py index 9ff08e07..f266caa3 100644 --- a/src/you_get/extractor/youtube.py +++ b/src/you_get/extractor/youtube.py @@ -108,7 +108,7 @@ def youtube_list_download_by_id(list_id, title=None, output_dir='.', merge=True, """ video_page = get_content('http://www.youtube.com/playlist?list=%s' % list_id) - ids = set(re.findall(r' Date: Fri, 15 Nov 2013 16:18:51 +0100 Subject: [PATCH 052/147] util.log: fix #269 --- src/you_get/util/log.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/util/log.py b/src/you_get/util/log.py index c28fd4e7..356dea76 100644 --- a/src/you_get/util/log.py +++ b/src/you_get/util/log.py @@ -2,7 +2,7 @@ from ..version import __name__ -import os, sys +import os, sys, subprocess # Is terminal ANSI/VT100 compatible if os.getenv('TERM') in ( @@ -17,7 +17,7 @@ else: try: # Eshell ppid = os.getppid() - has_colors = (os.popen('ps -p %d -ocomm=' % ppid).read().strip() + has_colors = (subprocess.getoutput('ps -p %d -ocomm=' % ppid) == 'emacs') except: has_colors = False From c99627be72e73528b62a6d76b9c66b366545ebaf Mon Sep 17 00:00:00 2001 From: fakelbst Date: Sat, 16 Nov 2013 21:42:18 +0800 Subject: [PATCH 053/147] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=AF=B9=E8=B1=86?= =?UTF-8?q?=E7=93=A3=E9=9F=B3=E4=B9=90=E4=B8=93=E8=BE=91=E9=A1=B5=E9=9D=A2?= =?UTF-8?q?=E7=9A=84=E6=94=AF=E6=8C=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/you_get/downloader/douban.py | 51 +++++++++++++++++++++++++------- 1 file changed, 40 insertions(+), 11 deletions(-) diff --git a/src/you_get/downloader/douban.py b/src/you_get/downloader/douban.py index e27a3518..8a52275f 100644 --- a/src/you_get/downloader/douban.py +++ b/src/you_get/downloader/douban.py @@ -2,23 +2,52 @@ __all__ = ['douban_download'] +import urllib.request, urllib.parse from ..common import * def douban_download(url, output_dir = '.', merge = True, info_only = False): html = get_html(url) - - titles = re.findall(r'"name":"([^"]*)"', html) - real_urls = [re.sub('\\\\/', '/', i) for i in re.findall(r'"rawUrl":"([^"]*)"', html)] - - for i in range(len(titles)): - title = titles[i] - real_url = real_urls[i] + if 'subject' in url: + titles = re.findall(r'data-title="([^"]*)">', html) + song_id = re.findall(r'
  • Date: Sun, 17 Nov 2013 11:00:48 +0800 Subject: [PATCH 054/147] Fix title extraction for the new bilibili web page --- src/you_get/extractor/bilibili.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractor/bilibili.py b/src/you_get/extractor/bilibili.py index 322b60fc..df7640bc 100644 --- a/src/you_get/extractor/bilibili.py +++ b/src/you_get/extractor/bilibili.py @@ -79,7 +79,7 @@ def bilibili_download(url, output_dir = '.', merge = True, info_only = False): assert re.match(r'http://(www.bilibili.tv|bilibili.kankanews.com|bilibili.smgbb.cn)/video/av(\d+)', url) html = get_html(url) - title = r1(r'

    ([^<>]+)

    ', html) + title = r1(r']*>([^<>]+)', html) title = unescape_html(title) title = escape_file_path(title) From ed12df2eb5df464cc01b44ab6b93227f441a5072 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 21 Nov 2013 07:44:37 +0100 Subject: [PATCH 055/147] trim title length <=82, fix #273 --- src/you_get/util/fs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/util/fs.py b/src/you_get/util/fs.py index 09aa48a9..36e0b29d 100644 --- a/src/you_get/util/fs.py +++ b/src/you_get/util/fs.py @@ -14,7 +14,6 @@ def legitimize(text, os=platform.system()): if os == 'Windows': # Windows (non-POSIX namespace) - text = text[:255] # Trim to 255 Unicode characters long text = text.translate({ # Reserved in Windows VFAT and NTFS ord(':'): '-', @@ -42,4 +41,5 @@ def legitimize(text, os=platform.system()): if text.startswith("."): text = text[1:] + text = text[:82] # Trim to 82 Unicode characters long return text From 0cf72772d61482602eb0470d448d2255bd2d980d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BE=9D=E4=BA=91?= Date: Sat, 30 Nov 2013 22:43:00 +0800 Subject: [PATCH 056/147] setup.py: open with encoding 'utf-8' --- setup.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/setup.py b/setup.py index 6564d33d..d4f1be39 100755 --- a/setup.py +++ b/setup.py @@ -7,36 +7,36 @@ PROJ_METADATA = '%s.json' % PROJ_NAME import os, json, imp here = os.path.abspath(os.path.dirname(__file__)) -proj_info = json.loads(open(os.path.join(here, PROJ_METADATA)).read()) -README = open(os.path.join(here, 'README.txt')).read() -CHANGELOG = open(os.path.join(here, 'CHANGELOG.txt')).read() +proj_info = json.loads(open(os.path.join(here, PROJ_METADATA), encoding='utf-8').read()) +README = open(os.path.join(here, 'README.txt'), encoding='utf-8').read() +CHANGELOG = open(os.path.join(here, 'CHANGELOG.txt'), encoding='utf-8').read() VERSION = imp.load_source('version', os.path.join(here, 'src/%s/version.py' % PACKAGE_NAME)).__version__ from setuptools import setup, find_packages setup( name = proj_info['name'], version = VERSION, - + author = proj_info['author'], author_email = proj_info['author_email'], url = proj_info['url'], license = proj_info['license'], - + description = proj_info['description'], keywords = proj_info['keywords'], - + long_description = README + '\n\n' + CHANGELOG, - + packages = find_packages('src'), package_dir = {'' : 'src'}, - + test_suite = 'tests', - + platforms = 'any', zip_safe = False, include_package_data = True, - + classifiers = proj_info['classifiers'], - + entry_points = {'console_scripts': proj_info['console_scripts']} ) From 34a9830e71c6ecc853bfa85a2c0044b4529c5b1e Mon Sep 17 00:00:00 2001 From: Star Brilliant Date: Sat, 7 Dec 2013 12:28:38 +0800 Subject: [PATCH 057/147] Enable downloading Acfun locked comments --- src/you_get/extractor/acfun.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/you_get/extractor/acfun.py b/src/you_get/extractor/acfun.py index 88e1a7d0..6903a617 100644 --- a/src/you_get/extractor/acfun.py +++ b/src/you_get/extractor/acfun.py @@ -15,6 +15,10 @@ def get_srt_json(id): url = 'http://comment.acfun.tv/%s.json' % id return get_html(url) +def get_srt_lock_json(id): + url = 'http://comment.acfun.tv/%s_lock.json' % id + return get_html(url) + def acfun_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): info = json.loads(get_html('http://wenzhou.acfun.tv/api/getVideoByID.aspx?vid=' + id)) t = info['vtype'] @@ -35,6 +39,10 @@ def acfun_download_by_id(id, title = None, output_dir = '.', merge = True, info_ cmt = get_srt_json(vid) with open(os.path.join(output_dir, title + '.cmt.json'), 'w') as x: x.write(cmt) + print('Downloading %s ...' % (title + '.cmt_lock.json')) + cmt = get_srt_lock_json(vid) + with open(os.path.join(output_dir, title + '.cmt_lock.json'), 'w') as x: + x.write(cmt) def acfun_download(url, output_dir = '.', merge = True, info_only = False): assert re.match(r'http://[^\.]+.acfun.tv/v/ac(\d+)', url) From 17a07a507d17dcdc37dd6f899a82493880237cd2 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 8 Dec 2013 05:54:32 +0100 Subject: [PATCH 058/147] Vine: fixed --- src/you_get/extractor/vine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractor/vine.py b/src/you_get/extractor/vine.py index 5ff629b7..c8ffcbc8 100644 --- a/src/you_get/extractor/vine.py +++ b/src/you_get/extractor/vine.py @@ -8,7 +8,7 @@ def vine_download(url, output_dir = '.', merge = True, info_only = False): html = get_html(url) title = r1(r' Date: Tue, 10 Dec 2013 00:11:40 +0100 Subject: [PATCH 059/147] Google+: fix #276 --- src/you_get/extractor/google.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractor/google.py b/src/you_get/extractor/google.py index 0193db2f..3f8fcca3 100644 --- a/src/you_get/extractor/google.py +++ b/src/you_get/extractor/google.py @@ -50,7 +50,7 @@ def google_download(url, output_dir = '.', merge = True, info_only = False): if not re.search(r'plus.google.com/photos/[^/]*/albums/\d+/\d+', url): html = get_html(url) - url = r1(r'"(https://plus.google.com/photos/\d+/albums/\d+/\d+)', html) + url = "https://plus.google.com/" + r1(r'"(photos/\d+/albums/\d+/\d+)', html) title = r1(r'([^<\n]+)', html) else: title = None From 26463820c86ea419cc1c10a39b50ea9973a8b30d Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Mon, 16 Dec 2013 12:08:38 +0100 Subject: [PATCH 060/147] Acfun: mute the exception if .cmt.json not available --- src/you_get/extractor/acfun.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/you_get/extractor/acfun.py b/src/you_get/extractor/acfun.py index 6903a617..acd050f1 100644 --- a/src/you_get/extractor/acfun.py +++ b/src/you_get/extractor/acfun.py @@ -35,14 +35,17 @@ def acfun_download_by_id(id, title = None, output_dir = '.', merge = True, info_ raise NotImplementedError(t) if not info_only: - print('Downloading %s ...' % (title + '.cmt.json')) - cmt = get_srt_json(vid) - with open(os.path.join(output_dir, title + '.cmt.json'), 'w') as x: - x.write(cmt) - print('Downloading %s ...' % (title + '.cmt_lock.json')) - cmt = get_srt_lock_json(vid) - with open(os.path.join(output_dir, title + '.cmt_lock.json'), 'w') as x: - x.write(cmt) + try: + print('Downloading %s ...' % (title + '.cmt.json')) + cmt = get_srt_json(vid) + with open(os.path.join(output_dir, title + '.cmt.json'), 'w') as x: + x.write(cmt) + print('Downloading %s ...' % (title + '.cmt_lock.json')) + cmt = get_srt_lock_json(vid) + with open(os.path.join(output_dir, title + '.cmt_lock.json'), 'w') as x: + x.write(cmt) + except: + pass def acfun_download(url, output_dir = '.', merge = True, info_only = False): assert re.match(r'http://[^\.]+.acfun.tv/v/ac(\d+)', url) From b71bf45bd2ac814c9300fb62d4445e92383aee93 Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Thu, 19 Dec 2013 23:47:52 +0100 Subject: [PATCH 061/147] YouTube: fix #279 --- src/you_get/extractor/youtube.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/you_get/extractor/youtube.py b/src/you_get/extractor/youtube.py index f266caa3..513c52d8 100644 --- a/src/you_get/extractor/youtube.py +++ b/src/you_get/extractor/youtube.py @@ -77,6 +77,8 @@ def youtube_download_by_id(id, title=None, output_dir='.', merge=True, info_only stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',') html5player = ytplayer_config['assets']['js'] + if html5player[0:2] == '//': + html5player = 'http:' + html5player streams = { parse.parse_qs(stream)['itag'][0] : parse.parse_qs(stream) From 0612e97877f977b5dc4919486b9fe57c941ab717 Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Fri, 20 Dec 2013 03:04:43 +0100 Subject: [PATCH 062/147] Youku: fix #267 --- src/you_get/extractor/youku.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractor/youku.py b/src/you_get/extractor/youku.py index 4abedc97..529320dc 100644 --- a/src/you_get/extractor/youku.py +++ b/src/you_get/extractor/youku.py @@ -42,7 +42,7 @@ def parse_video_title(url, page): # if we are playing a viedo from play list, the meta title might be incorrect title = r1_of([r'<div class="show_title" title="([^"]+)">[^<]', r'<title>([^<>]*)'], page) else: - title = r1_of([r'
    [^<]', r'[^<]', r'([^-]+)—在线播放.*', r' Date: Fri, 20 Dec 2013 03:11:18 +0100 Subject: [PATCH 063/147] version 0.3.25 --- CHANGELOG.txt | 7 +++++++ src/you_get/version.py | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index d0409f10..944e8998 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,6 +1,13 @@ Changelog ========= +0.3.25 +------ + +*Date: 2013-12-20* + +* Bug fix release + 0.3.24 ------ diff --git a/src/you_get/version.py b/src/you_get/version.py index 4e983583..3583ff8b 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -2,5 +2,5 @@ __all__ = ['__version__', '__date__'] __name__ = 'you-get' -__version__ = '0.3.24' -__date__ = '2013-10-30' +__version__ = '0.3.25' +__date__ = '2013-12-20' From 81ce94e348f50d339cf4919a3e9a962defaa6fd0 Mon Sep 17 00:00:00 2001 From: Star Brilliant Date: Sat, 21 Dec 2013 00:18:44 +0800 Subject: [PATCH 064/147] Change sina API, fix #280 --- src/you_get/extractor/sina.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractor/sina.py b/src/you_get/extractor/sina.py index 33cc0c7c..6f6583b0 100644 --- a/src/you_get/extractor/sina.py +++ b/src/you_get/extractor/sina.py @@ -5,7 +5,8 @@ __all__ = ['sina_download', 'sina_download_by_vid', 'sina_download_by_vkey'] from ..common import * def video_info(id): - xml = get_content('http://www.tucao.cc/api/sina.php?vid=%s' % id, decoded=True) + xml = get_content('http://interface.bilibili.tv/playurl?vid=%s' % id, headers=fake_headers, decoded=True) + #xml = get_content('http://www.tucao.cc/api/sina.php?vid=%s' % id, headers=fake_headers, decoded=True) urls = re.findall(r'(?:)?', xml) name = match1(xml, r'(?:)?') vstr = match1(xml, r'(?:)?') From 9379fafa36e10434a6c85ba3eaa01629272acd63 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 20 Dec 2013 23:47:39 +0100 Subject: [PATCH 065/147] Sina: fix #246 --- src/you_get/extractor/sina.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractor/sina.py b/src/you_get/extractor/sina.py index 6f6583b0..68b73229 100644 --- a/src/you_get/extractor/sina.py +++ b/src/you_get/extractor/sina.py @@ -54,7 +54,8 @@ def sina_download(url, output_dir='.', merge=True, info_only=False): vid = vids[-1] if vid: - sina_download_by_vid(vid, output_dir=output_dir, merge=merge, info_only=info_only) + title = match1(video_page, r'title\s*:\s*\'([^\']+)\'') + sina_download_by_vid(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only) else: vkey = match1(video_page, r'vkey\s*:\s*"([^"]+)"') title = match1(video_page, r'title\s*:\s*"([^"]+)"') From f66af0d56c4d8e4e08b39d40d119ffdb276b97a6 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 21 Dec 2013 04:00:07 +0100 Subject: [PATCH 066/147] YouTube: fix #282 --- src/you_get/extractor/youtube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractor/youtube.py b/src/you_get/extractor/youtube.py index 513c52d8..1efe6b67 100644 --- a/src/you_get/extractor/youtube.py +++ b/src/you_get/extractor/youtube.py @@ -122,7 +122,8 @@ def youtube_download(url, output_dir='.', merge=True, info_only=False): parse_query_param(url, 'v') or \ parse_query_param(parse_query_param(url, 'u'), 'v') if id is None: - list_id = parse_query_param(url, 'list') + list_id = parse_query_param(url, 'list') or \ + parse_query_param(url, 'p') assert id or list_id if id: From da72974fa27f614e14cd86534cfbdda5cea1d1d9 Mon Sep 17 00:00:00 2001 From: Ming Dai Date: Tue, 31 Dec 2013 21:49:20 +0800 Subject: [PATCH 067/147] fix 404 of "%s/%s.html" in qq.py --- src/you_get/extractor/qq.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/you_get/extractor/qq.py b/src/you_get/extractor/qq.py index b59c68bc..c8a1daa7 100644 --- a/src/you_get/extractor/qq.py +++ b/src/you_get/extractor/qq.py @@ -6,9 +6,9 @@ from ..common import * def qq_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): url = 'http://vsrc.store.qq.com/%s.flv' % id - + _, _, size = url_info(url) - + print_info(site_info, title, 'flv', size) if not info_only: download_urls([url], title, 'flv', size, output_dir = output_dir, merge = merge) @@ -17,31 +17,31 @@ def qq_download(url, output_dir = '.', merge = True, info_only = False): if re.match(r'http://v.qq.com/([^\?]+)\?vid', url): aid = r1(r'(.*)\.html', url) vid = r1(r'http://v.qq.com/[^\?]+\?vid=(\w+)', url) - url = "%s/%s.html" % (aid, vid) - + url = 'http://sns.video.qq.com/tvideo/fcgi-bin/video?vid=%s' % vid + if re.match(r'http://y.qq.com/([^\?]+)\?vid', url): vid = r1(r'http://y.qq.com/[^\?]+\?vid=(\w+)', url) - + url = "http://v.qq.com/page/%s.html" % vid - + r_url = r1(r'(.+?)', r'title:"([^"]+)"')[0].strip() assert title title = unescape_html(title) title = escape_file_path(title) - - id = r1(r'vid:"([^"]+)"', html) - + + id = vid + qq_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only) site_info = "QQ.com" From b5a1a845a535230238bf09543aa95253cc7175c4 Mon Sep 17 00:00:00 2001 From: Ming Dai Date: Wed, 1 Jan 2014 14:25:44 +0800 Subject: [PATCH 068/147] Add support to play video in players --- src/you_get/common.py | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 25df1090..8a570f62 100644 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -15,6 +15,7 @@ from .util import log, legitimize, sogou_proxy_server dry_run = False force = False +player = None sogou_proxy = None sogou_env = None @@ -78,6 +79,11 @@ def match1(text, *patterns): ret.append(match.group(1)) return ret +def launch_player(player, urls): + import subprocess + import shlex + subprocess.call(shlex.split(player) + list(urls)) + def parse_query_param(url, param): """Parses the query string of a URL and returns the value of a parameter. @@ -504,6 +510,10 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None, print('Real URLs:\n', urls, '\n') return + if player: + launch_player(player, urls) + return + if not total_size: try: total_size = urls_size(urls) @@ -587,6 +597,10 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir = '.', refer print('Real URLs:\n', urls, '\n') return + if player: + launch_player(player, urls) + return + assert ext in ('ts') title = legitimize(title) @@ -792,6 +806,7 @@ def script_main(script_name, download, download_playlist = None): -u | --url Display the real URLs of videos without downloading. -n | --no-merge Don't merge video parts. -o | --output-dir Set the output directory for downloaded videos. + -p | --player Directly play the video with PLAYER like vlc/smplayer -x | --http-proxy Use specific HTTP proxy for downloading. --no-proxy Don't use any proxy. (ignore $http_proxy) -S | --sogou Use a Sogou proxy server for downloading. @@ -799,8 +814,8 @@ def script_main(script_name, download, download_playlist = None): --debug Show traceback on KeyboardInterrupt. ''' - short_opts = 'VhfiunSo:x:' - opts = ['version', 'help', 'force', 'info', 'url', 'no-merge', 'no-proxy', 'debug', 'sogou', 'output-dir=', 'http-proxy=', 'sogou-proxy=', 'sogou-env='] + short_opts = 'VhfiunSo:p:x:' + opts = ['version', 'help', 'force', 'info', 'url', 'no-merge', 'no-proxy', 'debug', 'sogou', 'output-dir=', 'player=', 'http-proxy=', 'sogou-proxy=', 'sogou-env='] if download_playlist: short_opts = 'l' + short_opts opts = ['playlist'] + opts @@ -814,6 +829,7 @@ def script_main(script_name, download, download_playlist = None): global force global dry_run + global player global sogou_proxy global sogou_env @@ -841,19 +857,21 @@ def script_main(script_name, download, download_playlist = None): playlist = True elif o in ('-n', '--no-merge'): merge = False - elif o in ('--no-proxy'): + elif o in ('--no-proxy',): proxy = '' - elif o in ('--debug'): + elif o in ('--debug',): traceback = True elif o in ('-o', '--output-dir'): output_dir = a + elif o in ('-p', '--player'): + player = a elif o in ('-x', '--http-proxy'): proxy = a elif o in ('-S', '--sogou'): sogou_proxy = ("0.0.0.0", 0) - elif o in ('--sogou-proxy'): + elif o in ('--sogou-proxy',): sogou_proxy = parse_host(a) - elif o in ('--sogou-env'): + elif o in ('--sogou-env',): sogou_env = a else: log.e("try 'you-get --help' for more options") From 00196dc1a11b8eefe7274c1c58cc7cf43875afcf Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 1 Jan 2014 23:06:28 +0100 Subject: [PATCH 069/147] Sohu: remove some tests (video deleted) --- tests/test.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test.py b/tests/test.py index 5f7968eb..92ec35dc 100644 --- a/tests/test.py +++ b/tests/test.py @@ -28,8 +28,7 @@ class YouGetTests(unittest.TestCase): "http://tv.sohu.com/20120522/n343785589.shtml", "http://tv.sohu.com/20130103/n362246415.shtml", "http://tv.sohu.com/20130103/n362251239.shtml", - "http://my.tv.sohu.com/us/4559763/51981774.shtml", - "http://my.tv.sohu.com/us/47390785/61594248.shtml", + "http://my.tv.sohu.com/us/4559763/51981774.shtml" ]) def test_ted(self): From 47118c83a33994d57f1959a080661890254f0394 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 1 Jan 2014 23:11:14 +0100 Subject: [PATCH 070/147] Mixcloud: fixed --- src/you_get/extractor/mixcloud.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractor/mixcloud.py b/src/you_get/extractor/mixcloud.py index 0261f081..4ad4a098 100644 --- a/src/you_get/extractor/mixcloud.py +++ b/src/you_get/extractor/mixcloud.py @@ -9,7 +9,7 @@ def mixcloud_download(url, output_dir = '.', merge = True, info_only = False): title = r1(r' Date: Sun, 5 Jan 2014 02:29:50 +0800 Subject: [PATCH 071/147] fix decode bug --- src/you_get/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 8a570f62..257e31c5 100644 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -192,7 +192,7 @@ def get_decoded_html(url, faker = False): data = response.data charset = r1(r'charset=([\w-]+)', response.headers['content-type']) if charset: - return data.decode(charset) + return data.decode(charset, 'ignore') else: return data From 10c2f33fa6685fda2daef1ac51f959a25a4c2ea2 Mon Sep 17 00:00:00 2001 From: liuerfire Date: Sun, 5 Jan 2014 02:59:24 +0800 Subject: [PATCH 072/147] update netease appsrc --- src/you_get/extractor/netease.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractor/netease.py b/src/you_get/extractor/netease.py index 863689f3..04a0c15c 100644 --- a/src/you_get/extractor/netease.py +++ b/src/you_get/extractor/netease.py @@ -6,8 +6,9 @@ from ..common import * def netease_download(url, output_dir = '.', merge = True, info_only = False): html = get_decoded_html(url) - + title = r1('movieDescription=\'([^\']+)\'', html) or r1('(.+)', html) + if title[0] == ' ': title = title[1:] @@ -27,7 +28,7 @@ def netease_download(url, output_dir = '.', merge = True, info_only = False): ext = 'flv' else: - url = r1(r'["\'](.+)-list.m3u8["\']', html) + ".mp4" + url = r1(r'["\'](.+)-list.m3u8["\']', html) or r1(r'["\'](.+).m3u8["\']', html) + ".mp4" _, _, size = url_info(url) ext = 'mp4' From ef89a2abf0c4f603663b138c276462050ad17ab6 Mon Sep 17 00:00:00 2001 From: liuerfire Date: Sun, 5 Jan 2014 13:31:45 +0800 Subject: [PATCH 073/147] fix the precedence error --- src/you_get/extractor/netease.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractor/netease.py b/src/you_get/extractor/netease.py index 04a0c15c..1321ba0f 100644 --- a/src/you_get/extractor/netease.py +++ b/src/you_get/extractor/netease.py @@ -28,7 +28,7 @@ def netease_download(url, output_dir = '.', merge = True, info_only = False): ext = 'flv' else: - url = r1(r'["\'](.+)-list.m3u8["\']', html) or r1(r'["\'](.+).m3u8["\']', html) + ".mp4" + url = (r1(r'["\'](.+)-list.m3u8["\']', html) or r1(r'["\'](.+).m3u8["\']', html)) + ".mp4" _, _, size = url_info(url) ext = 'mp4' From fe90e4e8578192785631ba464ec46dc8e26397c1 Mon Sep 17 00:00:00 2001 From: Ming Dai Date: Sun, 5 Jan 2014 17:24:21 +0800 Subject: [PATCH 074/147] fix failure on downloading other patterns of qq.com --- src/you_get/extractor/qq.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractor/qq.py b/src/you_get/extractor/qq.py index c8a1daa7..3ca87a58 100644 --- a/src/you_get/extractor/qq.py +++ b/src/you_get/extractor/qq.py @@ -33,6 +33,11 @@ def qq_download(url, output_dir = '.', merge = True, info_only = False): vid = r1(r'http://static.video.qq.com/.*vid=(\w+)', url) url = "http://v.qq.com/page/%s.html" % vid + if re.match(r'http://v.qq.com/cover/.*\.html', url): + html = get_html(url) + vid = r1(r'vid:"([^"]+)"', html) + url = 'http://sns.video.qq.com/tvideo/fcgi-bin/video?vid=%s' % vid + html = get_html(url) title = match1(html, r'(.+?)', r'title:"([^"]+)"')[0].strip() @@ -40,7 +45,10 @@ def qq_download(url, output_dir = '.', merge = True, info_only = False): title = unescape_html(title) title = escape_file_path(title) - id = vid + try: + id = vid + except: + id = r1(r'vid:"([^"]+)"', html) qq_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only) From 49338a523b708ad5264fd5088ca145b3c3a09368 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 8 Jan 2014 04:58:57 +0100 Subject: [PATCH 075/147] Youku: add support for 1080p (hd3) --- src/you_get/extractor/youku.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/you_get/extractor/youku.py b/src/you_get/extractor/youku.py index 529320dc..693d6ca4 100644 --- a/src/you_get/extractor/youku.py +++ b/src/you_get/extractor/youku.py @@ -87,14 +87,14 @@ def find_video(info, stream_type = None): segs = info['data'][0]['segs'] types = segs.keys() if not stream_type: - for x in ['hd2', 'mp4', 'flv']: + for x in ['hd3', 'hd2', 'mp4', 'flv']: if x in types: stream_type = x break else: raise NotImplementedError() - assert stream_type in ('hd2', 'mp4', 'flv') - file_type = {'hd2': 'flv', 'mp4': 'mp4', 'flv': 'flv'}[stream_type] + assert stream_type in ('hd3', 'hd2', 'mp4', 'flv') + file_type = {'hd3': 'flv', 'hd2': 'flv', 'mp4': 'mp4', 'flv': 'flv'}[stream_type] seed = info['data'][0]['seed'] source = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\\:._-1234567890") From a1f635df9462464985e63928b32324079816c4e9 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 8 Jan 2014 05:03:59 +0100 Subject: [PATCH 076/147] Sohu: remove some error-prone test case --- tests/test.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test.py b/tests/test.py index 92ec35dc..34a6d8cc 100644 --- a/tests/test.py +++ b/tests/test.py @@ -27,8 +27,7 @@ class YouGetTests(unittest.TestCase): test_urls([ "http://tv.sohu.com/20120522/n343785589.shtml", "http://tv.sohu.com/20130103/n362246415.shtml", - "http://tv.sohu.com/20130103/n362251239.shtml", - "http://my.tv.sohu.com/us/4559763/51981774.shtml" + "http://tv.sohu.com/20130103/n362251239.shtml" ]) def test_ted(self): From 127019d43770e4a270e4c4967ad8a5f8e959fc52 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 8 Jan 2014 08:21:32 +0100 Subject: [PATCH 077/147] Vine: fixed --- src/you_get/extractor/vine.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractor/vine.py b/src/you_get/extractor/vine.py index c8ffcbc8..0bd25700 100644 --- a/src/you_get/extractor/vine.py +++ b/src/you_get/extractor/vine.py @@ -8,7 +8,9 @@ def vine_download(url, output_dir = '.', merge = True, info_only = False): html = get_html(url) title = r1(r' Date: Tue, 21 Jan 2014 22:29:58 +0100 Subject: [PATCH 078/147] Mixcloud: fixed --- src/you_get/extractor/mixcloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractor/mixcloud.py b/src/you_get/extractor/mixcloud.py index 4ad4a098..d6159e47 100644 --- a/src/you_get/extractor/mixcloud.py +++ b/src/you_get/extractor/mixcloud.py @@ -7,7 +7,7 @@ from ..common import * def mixcloud_download(url, output_dir = '.', merge = True, info_only = False): html = get_html(url) title = r1(r' Date: Sat, 18 Jan 2014 04:58:22 +0800 Subject: [PATCH 079/147] add letv --- src/you_get/extractor/__init__.py | 1 + src/you_get/extractor/__main__.py | 1 + src/you_get/extractor/letv.py | 39 +++++++++++++++++++++++++++++++ 3 files changed, 41 insertions(+) create mode 100644 src/you_get/extractor/letv.py diff --git a/src/you_get/extractor/__init__.py b/src/you_get/extractor/__init__.py index 874824fe..4988e35c 100644 --- a/src/you_get/extractor/__init__.py +++ b/src/you_get/extractor/__init__.py @@ -20,6 +20,7 @@ from .iqiyi import * from .joy import * from .jpopsuki import * from .ku6 import * +from .letv import * from .miomio import * from .mixcloud import * from .netease import * diff --git a/src/you_get/extractor/__main__.py b/src/you_get/extractor/__main__.py index f8d9ecea..30bfa41c 100644 --- a/src/you_get/extractor/__main__.py +++ b/src/you_get/extractor/__main__.py @@ -40,6 +40,7 @@ def url_to_module(url): 'jpopsuki': jpopsuki, 'kankanews': bilibili, 'ku6': ku6, + 'letv': letv, 'miomio': miomio, 'mixcloud': mixcloud, 'nicovideo': nicovideo, diff --git a/src/you_get/extractor/letv.py b/src/you_get/extractor/letv.py new file mode 100644 index 00000000..fe371519 --- /dev/null +++ b/src/you_get/extractor/letv.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python + +__all__ = ['letv_download'] + +import json +import xml.etree.ElementTree as ET +from ..common import * + +def video_info(vid): + x = get_content("http://www.letv.com/v_xml/%s.xml" % vid) + xml_obj = ET.fromstring(x) + info = json.loads(xml_obj.find("playurl").text) + title = info.get('title') + urls = info.get('dispatch') + for key in urls.keys(): + url = urls[key][0] + break + return url, title + +def letv_download_by_vid(vid, output_dir='.', merge=True, info_only=False): + url, title = video_info(vid) + _, _, size = url_info(url) + ext = 'flv' + print_info(site_info, title, ext, size) + if not info_only: + download_urls([url], title, ext, size, output_dir=output_dir, merge=merge) + +def letv_download(url, output_dir='.', merge=True, info_only=False): + if re.match(r'http://www.letv.com/ptv/vplay/(\d+).html', url): + vid = match1(url, r'http://www.letv.com/ptv/vplay/(\d+).html') + else: + html = get_content(url) + vid = match1(html, r'vid="(\d+)"') + letv_download_by_vid(vid, output_dir=output_dir, merge=merge, info_only=info_only) + + +site_info = "letv.com" +download = letv_download +download_playlist = playlist_not_supported('letv') From 36817442974bb0bdb9e0bf954a50d6a2162906b2 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 21 Jan 2014 22:55:58 +0100 Subject: [PATCH 080/147] Update README to reflect merge of #289, fix #98 --- README.md | 1 + README.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/README.md b/README.md index d8a89bae..c258e6a7 100644 --- a/README.md +++ b/README.md @@ -45,6 +45,7 @@ __中文说明__已移至[wiki](https://github.com/soimort/you-get/wiki/%E4%B8%A * iQIYI (爱奇艺) * Joy.cn (激动网) * Ku6 (酷6网) +* LeTV (乐视网) * MioMio * NetEase (网易视频) * PPTV diff --git a/README.txt b/README.txt index 02a9408e..e77db5de 100644 --- a/README.txt +++ b/README.txt @@ -48,6 +48,7 @@ Supported Sites (As of Now) * iQIYI (爱奇艺) http://www.iqiyi.com * Joy.cn (激动网) http://www.joy.cn * Ku6 (酷6网) http://www.ku6.com +* LeTV (乐视网) http://www.letv.com * MioMio http://www.miomio.tv * NetEase (网易视频) http://v.163.com * PPTV http://www.pptv.com From 7b15064bd6f52afccf8148dcb5490651c2cb9c7e Mon Sep 17 00:00:00 2001 From: darren Date: Thu, 6 Feb 2014 04:03:51 +0000 Subject: [PATCH 081/147] Handle $ symbol in js function name --- src/you_get/extractor/youtube.py | 35 +++++++++++++++++--------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/src/you_get/extractor/youtube.py b/src/you_get/extractor/youtube.py index 1efe6b67..26ffa60e 100644 --- a/src/you_get/extractor/youtube.py +++ b/src/you_get/extractor/youtube.py @@ -35,6 +35,7 @@ yt_codecs = [ def decipher(js, s): def tr_js(code): code = re.sub(r'function', r'def', code) + code = re.sub(r'\$', '_', code) code = re.sub(r'\{', r':\n\t', code) code = re.sub(r'\}', r'\n', code) code = re.sub(r'var\s+', r'', code) @@ -44,15 +45,17 @@ def decipher(js, s): code = re.sub(r'(\w+).slice\((\d+)\)', r'\1[\2:]', code) code = re.sub(r'(\w+).split\(""\)', r'list(\1)', code) return code - + f1 = match1(js, r'\w+\.sig\|\|(\w+)\(\w+\.\w+\)') f1def = match1(js, r'(function %s\(\w+\)\{[^\{]+\})' % f1) code = tr_js(f1def) - f2 = match1(f1def, r'(\w+)\(\w+,\d+\)') + f2 = match1(f1def, r'([$\w]+)\(\w+,\d+\)') if f2 is not None: - f2def = match1(js, r'(function %s\(\w+,\w+\)\{[^\{]+\})' % f2) + f2e = re.escape(f2) + f2def = match1(js, r'(function %s\(\w+,\w+\)\{[^\{]+\})' % f2e) + f2 = re.sub(r'\$', r'_', f2) code = code + 'global %s\n' % f2 + tr_js(f2def) - + code = code + 'sig=%s(s)' % f1 exec(code, globals(), locals()) return locals()['sig'] @@ -60,37 +63,37 @@ def decipher(js, s): def youtube_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False): """Downloads a YouTube video by its unique id. """ - + raw_video_info = get_content('http://www.youtube.com/get_video_info?video_id=%s' % id) video_info = parse.parse_qs(raw_video_info) - + if video_info['status'] == ['ok'] and ('use_cipher_signature' not in video_info or video_info['use_cipher_signature'] == ['False']): title = parse.unquote_plus(video_info['title'][0]) stream_list = parse.parse_qs(raw_video_info)['url_encoded_fmt_stream_map'][0].split(',') - + else: # Parse video page when video_info is not usable. video_page = get_content('http://www.youtube.com/watch?v=%s' % id) ytplayer_config = json.loads(match1(video_page, r'ytplayer.config\s*=\s*([^\n]+);')) - + title = ytplayer_config['args']['title'] stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',') - + html5player = ytplayer_config['assets']['js'] if html5player[0:2] == '//': html5player = 'http:' + html5player - + streams = { parse.parse_qs(stream)['itag'][0] : parse.parse_qs(stream) for stream in stream_list } - + for codec in yt_codecs: itag = str(codec['itag']) if itag in streams: download_stream = streams[itag] break - + url = download_stream['url'][0] if 'sig' in download_stream: sig = download_stream['sig'][0] @@ -98,9 +101,9 @@ def youtube_download_by_id(id, title=None, output_dir='.', merge=True, info_only js = get_content(html5player) sig = decipher(js, download_stream['s'][0]) url = '%s&signature=%s' % (url, sig) - + type, ext, size = url_info(url) - + print_info(site_info, title, type, size) if not info_only: download_urls([url], title, ext, size, output_dir, merge = merge) @@ -117,7 +120,7 @@ def youtube_list_download_by_id(list_id, title=None, output_dir='.', merge=True, def youtube_download(url, output_dir='.', merge=True, info_only=False): """Downloads YouTube videos by URL. """ - + id = match1(url, r'youtu.be/([^/]+)') or \ parse_query_param(url, 'v') or \ parse_query_param(parse_query_param(url, 'u'), 'v') @@ -125,7 +128,7 @@ def youtube_download(url, output_dir='.', merge=True, info_only=False): list_id = parse_query_param(url, 'list') or \ parse_query_param(url, 'p') assert id or list_id - + if id: youtube_download_by_id(id, title=None, output_dir=output_dir, merge=merge, info_only=info_only) else: From 0f38ed615d848869bb48cb5abae83f017796fe00 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 8 Feb 2014 04:13:29 +0100 Subject: [PATCH 082/147] Update README to reflect merge of #286 --- README.md | 1 + README.txt | 1 + src/you_get/common.py | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c258e6a7..52763e48 100644 --- a/README.md +++ b/README.md @@ -172,6 +172,7 @@ For a complete list of all available options, see: -u | --url Display the real URLs of videos without downloading. -n | --no-merge Don't merge video parts. -o | --output-dir Set the output directory for downloaded videos. + -p | --player Directly play the video with PLAYER like vlc/smplayer. -x | --http-proxy Use specific HTTP proxy for downloading. --no-proxy Don't use any proxy. (ignore $http_proxy) -S | --sogou Use a Sogou proxy server for downloading. diff --git a/README.txt b/README.txt index e77db5de..5e259da9 100644 --- a/README.txt +++ b/README.txt @@ -180,6 +180,7 @@ For a complete list of all available options, see:: -u | --url Display the real URLs of videos without downloading. -n | --no-merge Don't merge video parts. -o | --output-dir Set the output directory for downloaded videos. + -p | --player Directly play the video with PLAYER like vlc/smplayer. -x | --http-proxy Use specific HTTP proxy for downloading. --no-proxy Don't use any proxy. (ignore $http_proxy) -S | --sogou Use a Sogou proxy server for downloading. diff --git a/src/you_get/common.py b/src/you_get/common.py index 257e31c5..a440b580 100644 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -806,7 +806,7 @@ def script_main(script_name, download, download_playlist = None): -u | --url Display the real URLs of videos without downloading. -n | --no-merge Don't merge video parts. -o | --output-dir Set the output directory for downloaded videos. - -p | --player Directly play the video with PLAYER like vlc/smplayer + -p | --player Directly play the video with PLAYER like vlc/smplayer. -x | --http-proxy Use specific HTTP proxy for downloading. --no-proxy Don't use any proxy. (ignore $http_proxy) -S | --sogou Use a Sogou proxy server for downloading. From 59e505b5bceaca6f223afbb55a3c79feb1eba144 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 8 Feb 2014 04:38:37 +0100 Subject: [PATCH 083/147] version 0.3.26 --- CHANGELOG.txt | 17 +++++++++++++++++ src/you_get/version.py | 4 ++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 944e8998..419a5784 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,6 +1,23 @@ Changelog ========= +0.3.26 +------ + +*Date: 2014-02-08* + +* New features: + - Play video in players (#286) + - LeTV support (#289) + - Youku 1080P support +* Bug fixes: + - YouTube (#282, #292) + - Sina (#246, #280) + - Mixcloud + - NetEase + - QQ + - Vine + 0.3.25 ------ diff --git a/src/you_get/version.py b/src/you_get/version.py index 3583ff8b..70d6adf1 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -2,5 +2,5 @@ __all__ = ['__version__', '__date__'] __name__ = 'you-get' -__version__ = '0.3.25' -__date__ = '2013-12-20' +__version__ = '0.3.26' +__date__ = '2014-02-08' From 4ab7c50ec8a30f47cd9ad6869953cee45db1276b Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 10 Feb 2014 02:16:03 +0100 Subject: [PATCH 084/147] Dailymotion: fix user page URL --- src/you_get/extractor/dailymotion.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/you_get/extractor/dailymotion.py b/src/you_get/extractor/dailymotion.py index 99d586c8..8e8851aa 100644 --- a/src/you_get/extractor/dailymotion.py +++ b/src/you_get/extractor/dailymotion.py @@ -7,22 +7,22 @@ from ..common import * def dailymotion_download(url, output_dir = '.', merge = True, info_only = False): """Downloads Dailymotion videos by URL. """ - - id = match1(url, r'/video/([^\?]+)') + + id = match1(url, r'/video/([^\?]+)') or match1(url, r'video=([^\?]+)') embed_url = 'http://www.dailymotion.com/embed/video/%s' % id html = get_content(embed_url) - + info = json.loads(match1(html, r'var\s*info\s*=\s*({.+}),\n')) - + title = info['title'] - + for quality in ['stream_h264_hd1080_url', 'stream_h264_hd_url', 'stream_h264_hq_url', 'stream_h264_url', 'stream_h264_ld_url']: real_url = info[quality] if real_url: break - + type, ext, size = url_info(real_url) - + print_info(site_info, title, type, size) if not info_only: download_urls([real_url], title, ext, size, output_dir, merge = merge) From 9fbf298e89e5c0ce18cec0ddc4433512f1c092dc Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 14 Feb 2014 02:20:06 +0100 Subject: [PATCH 085/147] AcFun: fix #295 --- src/you_get/extractor/acfun.py | 52 ++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/src/you_get/extractor/acfun.py b/src/you_get/extractor/acfun.py index acd050f1..f01bb19d 100644 --- a/src/you_get/extractor/acfun.py +++ b/src/you_get/extractor/acfun.py @@ -19,29 +19,30 @@ def get_srt_lock_json(id): url = 'http://comment.acfun.tv/%s_lock.json' % id return get_html(url) -def acfun_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): - info = json.loads(get_html('http://wenzhou.acfun.tv/api/getVideoByID.aspx?vid=' + id)) - t = info['vtype'] - vid = info['vid'] - if t == 'sina': - sina_download_by_vid(vid, title, output_dir = output_dir, merge = merge, info_only = info_only) - elif t == 'youku': - youku_download_by_id(vid, title, output_dir = output_dir, merge = merge, info_only = info_only) - elif t == 'tudou': - tudou_download_by_iid(vid, title, output_dir = output_dir, merge = merge, info_only = info_only) - elif t == 'qq': - qq_download_by_id(vid, title, output_dir = output_dir, merge = merge, info_only = info_only) +def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only=False): + info = json.loads(get_html('http://www.acfun.tv/video/getVideo.aspx?id=' + vid)) + sourceType = info['sourceType'] + sourceId = info['sourceId'] + danmakuId = info['danmakuId'] + if sourceType == 'sina': + sina_download_by_vid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) + elif sourceType == 'youku': + youku_download_by_id(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) + elif sourceType == 'tudou': + tudou_download_by_iid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) + elif sourceType == 'qq': + qq_download_by_id(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) else: raise NotImplementedError(t) - + if not info_only: try: print('Downloading %s ...' % (title + '.cmt.json')) - cmt = get_srt_json(vid) + cmt = get_srt_json(danmakuId) with open(os.path.join(output_dir, title + '.cmt.json'), 'w') as x: x.write(cmt) print('Downloading %s ...' % (title + '.cmt_lock.json')) - cmt = get_srt_lock_json(vid) + cmt = get_srt_lock_json(danmakuId) with open(os.path.join(output_dir, title + '.cmt_lock.json'), 'w') as x: x.write(cmt) except: @@ -50,19 +51,22 @@ def acfun_download_by_id(id, title = None, output_dir = '.', merge = True, info_ def acfun_download(url, output_dir = '.', merge = True, info_only = False): assert re.match(r'http://[^\.]+.acfun.tv/v/ac(\d+)', url) html = get_html(url) - + title = r1(r'

    ]*>([^<>]+)<', html) - assert title title = unescape_html(title) title = escape_file_path(title) - title = title.replace(' - AcFun.tv', '') - - id = r1(r"\[Video\](\d+)\[/Video\]", html) or r1(r"\[video\](\d+)\[/video\]", html) - if not id: - id = r1(r"src=\"/newflvplayer/player.*id=(\d+)", html) - sina_download_by_vid(id, title, output_dir = output_dir, merge = merge, info_only = info_only) + assert title + + videos = re.findall("data-vid=\"(\d+)\" href=\"[^\"]+\" title=\"([^\"]+)\"", html) + if videos is not None: + for video in videos: + p_vid = video[0] + p_title = title + " - " + video[1] + acfun_download_by_vid(p_vid, p_title, output_dir=output_dir, merge=merge, info_only=info_only) else: - acfun_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only) + # Useless - to be removed? + id = r1(r"src=\"/newflvplayer/player.*id=(\d+)", html) + sina_download_by_vid(id, title, output_dir=output_dir, merge=merge, info_only=info_only) site_info = "AcFun.tv" download = acfun_download From 13f785e42d742ee13fcef2516577bf1569639690 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 14 Feb 2014 02:49:53 +0100 Subject: [PATCH 086/147] YouTube: update yt_codecs --- src/you_get/extractor/youtube.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/you_get/extractor/youtube.py b/src/you_get/extractor/youtube.py index 26ffa60e..18cdd467 100644 --- a/src/you_get/extractor/youtube.py +++ b/src/you_get/extractor/youtube.py @@ -5,17 +5,17 @@ __all__ = ['youtube_download', 'youtube_download_by_id'] from ..common import * # YouTube media encoding options, in descending quality order. -# taken from http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs, 3/22/2013. +# taken from http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs, 2/14/2014. yt_codecs = [ {'itag': 38, 'container': 'MP4', 'video_resolution': '3072p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3.5-5', 'audio_encoding': 'AAC', 'audio_bitrate': '192'}, + {'itag': 85, 'container': 'MP4', 'video_resolution': '1080p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '3-4', 'audio_encoding': 'AAC', 'audio_bitrate': '192'}, {'itag': 46, 'container': 'WebM', 'video_resolution': '1080p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'}, {'itag': 37, 'container': 'MP4', 'video_resolution': '1080p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3-4.3', 'audio_encoding': 'AAC', 'audio_bitrate': '192'}, - {'itag': 102, 'container': '', 'video_resolution': '', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '2', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'}, - {'itag': 45, 'container': 'WebM', 'video_resolution': '720p', 'video_encoding': '', 'video_profile': '', 'video_bitrate': '', 'audio_encoding': '', 'audio_bitrate': ''}, - {'itag': 22, 'container': 'MP4', 'video_resolution': '720p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '2-2.9', 'audio_encoding': 'AAC', 'audio_bitrate': '192'}, - {'itag': 84, 'container': 'MP4', 'video_resolution': '720p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '2-2.9', 'audio_encoding': 'AAC', 'audio_bitrate': '152'}, - {'itag': 120, 'container': 'FLV', 'video_resolution': '720p', 'video_encoding': 'AVC', 'video_profile': 'Main@L3.1', 'video_bitrate': '2', 'audio_encoding': 'AAC', 'audio_bitrate': '128'}, - {'itag': 85, 'container': 'MP4', 'video_resolution': '520p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '2-2.9', 'audio_encoding': 'AAC', 'audio_bitrate': '152'}, + {'itag': 102, 'container': 'WebM', 'video_resolution': '720p', 'video_encoding': 'VP8', 'video_profile': '3D', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'}, + {'itag': 45, 'container': 'WebM', 'video_resolution': '720p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '2', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'}, + {'itag': 84, 'container': 'MP4', 'video_resolution': '720p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '2-3', 'audio_encoding': 'AAC', 'audio_bitrate': '192'}, + {'itag': 22, 'container': 'MP4', 'video_resolution': '720p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '2-3', 'audio_encoding': 'AAC', 'audio_bitrate': '192'}, + {'itag': 120, 'container': 'FLV', 'video_resolution': '720p', 'video_encoding': 'H.264', 'video_profile': 'Main@L3.1', 'video_bitrate': '2', 'audio_encoding': 'AAC', 'audio_bitrate': '128'}, {'itag': 44, 'container': 'WebM', 'video_resolution': '480p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '1', 'audio_encoding': 'Vorbis', 'audio_bitrate': '128'}, {'itag': 35, 'container': 'FLV', 'video_resolution': '480p', 'video_encoding': 'H.264', 'video_profile': 'Main', 'video_bitrate': '0.8-1', 'audio_encoding': 'AAC', 'audio_bitrate': '128'}, {'itag': 101, 'container': 'WebM', 'video_resolution': '360p', 'video_encoding': 'VP8', 'video_profile': '3D', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'}, @@ -28,7 +28,7 @@ yt_codecs = [ {'itag': 83, 'container': 'MP4', 'video_resolution': '240p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '96'}, {'itag': 13, 'container': '3GP', 'video_resolution': '', 'video_encoding': 'MPEG-4 Visual', 'video_profile': '', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': ''}, {'itag': 5, 'container': 'FLV', 'video_resolution': '240p', 'video_encoding': 'Sorenson H.263', 'video_profile': '', 'video_bitrate': '0.25', 'audio_encoding': 'MP3', 'audio_bitrate': '64'}, - {'itag': 36, 'container': '3GP', 'video_resolution': '240p', 'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.17', 'audio_encoding': 'AAC', 'audio_bitrate': '38'}, + {'itag': 36, 'container': '3GP', 'video_resolution': '240p', 'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.175', 'audio_encoding': 'AAC', 'audio_bitrate': '36'}, {'itag': 17, 'container': '3GP', 'video_resolution': '144p', 'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.05', 'audio_encoding': 'AAC', 'audio_bitrate': '24'}, ] From 869128c8c7f1a8d4de0f592ded338d0833f8d453 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 14 Feb 2014 09:14:04 +0100 Subject: [PATCH 087/147] Niconico: workaround for TLS hang bug, fix #296 --- src/you_get/extractor/nicovideo.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/you_get/extractor/nicovideo.py b/src/you_get/extractor/nicovideo.py index 7d384f31..f99a54b8 100644 --- a/src/you_get/extractor/nicovideo.py +++ b/src/you_get/extractor/nicovideo.py @@ -6,12 +6,17 @@ from ..common import * def nicovideo_login(user, password): data = "current_form=login&mail=" + user +"&password=" + password + "&login_submit=Log+In" - response = request.urlopen(request.Request("https://secure.nicovideo.jp/secure/login?site=niconico", headers = fake_headers, data = data.encode('utf-8'))) + response = request.urlopen(request.Request("https://secure.nicovideo.jp/secure/login?site=niconico", headers=fake_headers, data=data.encode('utf-8'))) return response.headers -def nicovideo_download(url, output_dir = '.', merge = True, info_only = False): - request.install_opener(request.build_opener(request.HTTPCookieProcessor())) - +def nicovideo_download(url, output_dir='.', merge=True, info_only=False): + import ssl + ssl_context = request.HTTPSHandler( +context=ssl.SSLContext(ssl.PROTOCOL_TLSv1)) + cookie_handler = request.HTTPCookieProcessor() + opener = request.build_opener(ssl_context, cookie_handler) + request.install_opener(opener) + import netrc, getpass info = netrc.netrc().authenticators('nicovideo') if info is None: @@ -21,15 +26,15 @@ def nicovideo_download(url, output_dir = '.', merge = True, info_only = False): user, password = info[0], info[2] print("Logging in...") nicovideo_login(user, password) - + html = get_html(url) # necessary! title = unicodize(r1(r'([^<]+)', html)) - + api_html = get_html('http://www.nicovideo.jp/api/getflv?v=%s' % url.split('/')[-1]) real_url = parse.unquote(r1(r'url=([^&]+)&', api_html)) - + type, ext, size = url_info(real_url) - + print_info(site_info, title, type, size) if not info_only: download_urls([real_url], title, ext, size, output_dir, merge = merge) From 1a0ed7db44bc5fb23357ba6770feecbd908029bb Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 14 Feb 2014 09:22:31 +0100 Subject: [PATCH 088/147] version 0.3.27 --- CHANGELOG.txt | 7 +++++++ src/you_get/version.py | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 419a5784..f43f1300 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,6 +1,13 @@ Changelog ========= +0.3.27 +------ + +*Date: 2014-02-14* + +* Bug fix release + 0.3.26 ------ diff --git a/src/you_get/version.py b/src/you_get/version.py index 70d6adf1..73a9663e 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -2,5 +2,5 @@ __all__ = ['__version__', '__date__'] __name__ = 'you-get' -__version__ = '0.3.26' -__date__ = '2014-02-08' +__version__ = '0.3.27' +__date__ = '2014-02-14' From da4506c4e79c275630a4948d2cba2bac63e70b01 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 14 Feb 2014 21:09:57 +0100 Subject: [PATCH 089/147] fix url_to_module --- src/you_get/extractor/__main__.py | 26 ++++++++++---------------- tests/test.py | 14 +++++++------- 2 files changed, 17 insertions(+), 23 deletions(-) diff --git a/src/you_get/extractor/__main__.py b/src/you_get/extractor/__main__.py index 30bfa41c..b2974021 100644 --- a/src/you_get/extractor/__main__.py +++ b/src/you_get/extractor/__main__.py @@ -8,12 +8,12 @@ def url_to_module(url): video_host = r1(r'http://([^/]+)/', url) video_url = r1(r'http://[^/]+(.*)', url) assert video_host and video_url, 'invalid url: ' + url - + if video_host.endswith('.com.cn'): video_host = video_host[:-3] domain = r1(r'(\.[^.]+\.[^.]+)$', video_host) or video_host assert domain, 'unsupported url: ' + url - + k = r1(r'([^.]+)', domain) downloads = { '163': netease, @@ -66,7 +66,7 @@ def url_to_module(url): #TODO } if k in downloads: - return downloads[k] + return downloads[k], url else: import http.client conn = http.client.HTTPConnection(video_host) @@ -76,21 +76,15 @@ def url_to_module(url): if location is None: raise NotImplementedError(url) else: - return url_to_module(location), location + return url_to_module(location) -def any_download(url, output_dir = '.', merge = True, info_only = False): - try: - m, url = url_to_module(url) - except: - m = url_to_module(url) - m.download(url, output_dir = output_dir, merge = merge, info_only = info_only) +def any_download(url, output_dir='.', merge=True, info_only=False): + m, url = url_to_module(url) + m.download(url, output_dir=output_dir, merge=merge, info_only=info_only) -def any_download_playlist(url, output_dir = '.', merge = True, info_only = False): - try: - m, url = url_to_module(url) - except: - m = url_to_module(url) - m.download_playlist(url, output_dir = output_dir, merge = merge, info_only = info_only) +def any_download_playlist(url, output_dir='.', merge=True, info_only=False): + m, url = url_to_module(url) + m.download_playlist(url, output_dir=output_dir, merge=merge, info_only=info_only) def main(): script_main('you-get', any_download, any_download_playlist) diff --git a/tests/test.py b/tests/test.py index 34a6d8cc..05883d4b 100644 --- a/tests/test.py +++ b/tests/test.py @@ -8,39 +8,39 @@ from you_get.extractor.__main__ import url_to_module def test_urls(urls): for url in urls: - url_to_module(url).download(url, info_only = True) + url_to_module(url)[0].download(url, info_only = True) class YouGetTests(unittest.TestCase): - + def test_freesound(self): test_urls([ "http://www.freesound.org/people/Corsica_S/sounds/184419/", ]) - + def test_mixcloud(self): test_urls([ "http://www.mixcloud.com/beatbopz/beat-bopz-disco-mix/", "http://www.mixcloud.com/DJVadim/north-america-are-you-ready/", ]) - + def test_sohu(self): test_urls([ "http://tv.sohu.com/20120522/n343785589.shtml", "http://tv.sohu.com/20130103/n362246415.shtml", "http://tv.sohu.com/20130103/n362251239.shtml" ]) - + def test_ted(self): test_urls([ "http://www.ted.com/talks/jennifer_lin_improvs_piano_magic.html", "http://www.ted.com/talks/derek_paravicini_and_adam_ockelford_in_the_key_of_genius.html", ]) - + def test_vimeo(self): test_urls([ "http://vimeo.com/56810854", ]) - + def test_youtube(self): test_urls([ "http://www.youtube.com/watch?v=pzKerr0JIPA", From ac5ad6a48828860d6eaa6cc7d0acbb7151bf90bd Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 14 Feb 2014 21:16:38 +0100 Subject: [PATCH 090/147] fix https url patterns --- src/you_get/extractor/__main__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractor/__main__.py b/src/you_get/extractor/__main__.py index b2974021..08359131 100644 --- a/src/you_get/extractor/__main__.py +++ b/src/you_get/extractor/__main__.py @@ -5,8 +5,8 @@ from ..extractor import * from ..common import * def url_to_module(url): - video_host = r1(r'http://([^/]+)/', url) - video_url = r1(r'http://[^/]+(.*)', url) + video_host = r1(r'https?://([^/]+)/', url) + video_url = r1(r'https?://[^/]+(.*)', url) assert video_host and video_url, 'invalid url: ' + url if video_host.endswith('.com.cn'): From da166b1098d28bfc21e350a38ddfdf7da07bf5ee Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 14 Feb 2014 21:19:43 +0100 Subject: [PATCH 091/147] Vine: improve title --- src/you_get/extractor/vine.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/you_get/extractor/vine.py b/src/you_get/extractor/vine.py index 0bd25700..cd3ff63f 100644 --- a/src/you_get/extractor/vine.py +++ b/src/you_get/extractor/vine.py @@ -4,15 +4,17 @@ __all__ = ['vine_download'] from ..common import * -def vine_download(url, output_dir = '.', merge = True, info_only = False): +def vine_download(url, output_dir='.', merge=True, info_only=False): html = get_html(url) - - title = r1(r' Date: Fri, 14 Feb 2014 21:56:04 +0100 Subject: [PATCH 092/147] add support: Magisto.com --- src/you_get/extractor/__init__.py | 1 + src/you_get/extractor/__main__.py | 1 + src/you_get/extractor/magisto.py | 22 ++++++++++++++++++++++ 3 files changed, 24 insertions(+) create mode 100644 src/you_get/extractor/magisto.py diff --git a/src/you_get/extractor/__init__.py b/src/you_get/extractor/__init__.py index 4988e35c..5f084d4d 100644 --- a/src/you_get/extractor/__init__.py +++ b/src/you_get/extractor/__init__.py @@ -21,6 +21,7 @@ from .joy import * from .jpopsuki import * from .ku6 import * from .letv import * +from .magisto import * from .miomio import * from .mixcloud import * from .netease import * diff --git a/src/you_get/extractor/__main__.py b/src/you_get/extractor/__main__.py index 08359131..744a0646 100644 --- a/src/you_get/extractor/__main__.py +++ b/src/you_get/extractor/__main__.py @@ -41,6 +41,7 @@ def url_to_module(url): 'kankanews': bilibili, 'ku6': ku6, 'letv': letv, + 'magisto': magisto, 'miomio': miomio, 'mixcloud': mixcloud, 'nicovideo': nicovideo, diff --git a/src/you_get/extractor/magisto.py b/src/you_get/extractor/magisto.py new file mode 100644 index 00000000..d0b3c60d --- /dev/null +++ b/src/you_get/extractor/magisto.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python + +__all__ = ['magisto_download'] + +from ..common import * + +def magisto_download(url, output_dir='.', merge=True, info_only=False): + html = get_html(url) + + title1 = r1(r' Date: Fri, 14 Feb 2014 21:58:43 +0100 Subject: [PATCH 093/147] add test case: Magisto.com --- tests/test.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/test.py b/tests/test.py index 05883d4b..1b5884c6 100644 --- a/tests/test.py +++ b/tests/test.py @@ -17,6 +17,11 @@ class YouGetTests(unittest.TestCase): "http://www.freesound.org/people/Corsica_S/sounds/184419/", ]) + def test_magisto(self): + test_urls([ + "http://www.magisto.com/album/video/f3x9AAQORAkfDnIFDA", + ]) + def test_mixcloud(self): test_urls([ "http://www.mixcloud.com/beatbopz/beat-bopz-disco-mix/", From b03ff088b6b1a8840e11bd449889dee27cbbb0c6 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 14 Feb 2014 22:35:40 +0100 Subject: [PATCH 094/147] Update README to reflect merge of #297 --- README.md | 35 ++++++++++++++++++----------------- README.txt | 35 ++++++++++++++++++----------------- 2 files changed, 36 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index 52763e48..996d8390 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,7 @@ __中文说明__已移至[wiki](https://github.com/soimort/you-get/wiki/%E4%B8%A * Tumblr * Vine * Instagram +* Magisto * SoundCloud * Mixcloud * Freesound @@ -71,44 +72,44 @@ __中文说明__已移至[wiki](https://github.com/soimort/you-get/wiki/%E4%B8%A ### 1. Install via Pip: $ [sudo] pip install you-get - + Check if the installation was successful: - + $ you-get -V ### 2. Install from Git: $ git clone git://github.com/soimort/you-get.git - + Use the raw script without installation: - + $ cd you-get/ $ ./you-get -V - + To install the package into the system path, execute: - + $ make install - + Check if the installation was successful: - + $ you-get -V ### 3. Direct download (from ): - + $ wget -O you-get.zip https://github.com/soimort/you-get/zipball/master $ unzip you-get.zip - + Use the raw script without installation: - + $ cd soimort-you-get-*/ $ ./you-get -V - + To install the package into the system path, execute: - + $ make install - + Check if the installation was successful: - + $ you-get -V ### 4. Install from your distro's repo: @@ -161,11 +162,11 @@ For a complete list of all available options, see: $ you-get --help Usage: you-get [OPTION]... [URL]... - + Startup options: -V | --version Display the version and exit. -h | --help Print this help and exit. - + Download options (use with URLs): -f | --force Force overwriting existed files. -i | --info Display the information of videos without downloading. diff --git a/README.txt b/README.txt index 5e259da9..a2f17c2d 100644 --- a/README.txt +++ b/README.txt @@ -31,6 +31,7 @@ Supported Sites (As of Now) * Tumblr http://www.tumblr.com * Vine http://vine.co * Instagram http://instagram.com +* Magisto http://www.magisto.com * SoundCloud http://soundcloud.com * Mixcloud http://www.mixcloud.com * Freesound http://www.freesound.org @@ -76,44 +77,44 @@ Installation #) Install via Pip:: $ [sudo] pip install you-get - + Check if the installation was successful:: - + $ you-get -V #) Install from Git:: $ git clone git://github.com/soimort/you-get.git - + Use the raw script without installation:: - + $ cd you-get/ $ ./you-get -V - + To install the package into the system path, execute:: - + $ make install - + Check if the installation was successful:: - + $ you-get -V #) Direct download:: - + $ wget -O you-get.zip https://github.com/soimort/you-get/zipball/master $ unzip you-get.zip - + Use the raw script without installation:: - + $ cd soimort-you-get-*/ $ ./you-get -V - + To install the package into the system path, execute:: - + $ make install - + Check if the installation was successful:: - + $ you-get -V #) Install from your distro's repo: @@ -169,11 +170,11 @@ For a complete list of all available options, see:: $ you-get --help Usage: you-get [OPTION]... [URL]... - + Startup options: -V | --version Display the version and exit. -h | --help Print this help and exit. - + Download options (use with URLs): -f | --force Force overwriting existed files. -i | --info Display the information of videos without downloading. From 0ae948d7572e3df947605d7bfbc6fe65a5f9c3f5 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 16 Feb 2014 01:10:13 +0100 Subject: [PATCH 095/147] YouTube: support embed URL patterns --- src/you_get/extractor/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/you_get/extractor/youtube.py b/src/you_get/extractor/youtube.py index 18cdd467..4a61019a 100644 --- a/src/you_get/extractor/youtube.py +++ b/src/you_get/extractor/youtube.py @@ -122,6 +122,7 @@ def youtube_download(url, output_dir='.', merge=True, info_only=False): """ id = match1(url, r'youtu.be/([^/]+)') or \ + match1(url, r'youtube.com/embed/([^/]+)') or \ parse_query_param(url, 'v') or \ parse_query_param(parse_query_param(url, 'u'), 'v') if id is None: From 39adc8f563d4e885be7ce5db6fe766ec5f5112f8 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 18 Feb 2014 02:04:15 +0100 Subject: [PATCH 096/147] Sina: key algorithm #298 --- src/you_get/extractor/sina.py | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/src/you_get/extractor/sina.py b/src/you_get/extractor/sina.py index 68b73229..8ab4931c 100644 --- a/src/you_get/extractor/sina.py +++ b/src/you_get/extractor/sina.py @@ -4,9 +4,19 @@ __all__ = ['sina_download', 'sina_download_by_vid', 'sina_download_by_vkey'] from ..common import * -def video_info(id): - xml = get_content('http://interface.bilibili.tv/playurl?vid=%s' % id, headers=fake_headers, decoded=True) - #xml = get_content('http://www.tucao.cc/api/sina.php?vid=%s' % id, headers=fake_headers, decoded=True) +from hashlib import md5 +from random import randint +from time import time + +def get_k(vid, rand): + t = str(int('{0:b}'.format(int(time()))[:-6], 2)) + return md5((vid + 'Z6prk18aWxP278cVAH' + t + rand).encode('utf-8')).hexdigest()[:16] + t + +def video_info(vid): + rand = "0.{0}{1}".format(randint(10000, 10000000), randint(10000, 10000000)) + url = 'http://v.iask.com/v_play.php?vid={0}&ran={1}&p=i&k={2}'.format(vid, rand, get_k(vid, rand)) + xml = get_content(url, headers=fake_headers, decoded=True) + urls = re.findall(r'(?:)?', xml) name = match1(xml, r'(?:)?') vstr = match1(xml, r'(?:)?') @@ -16,7 +26,7 @@ def sina_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only= """Downloads a Sina video by its unique vid. http://video.sina.com.cn/ """ - + urls, name, vstr = video_info(vid) title = title or name assert title @@ -24,7 +34,7 @@ def sina_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only= for url in urls: _, _, temp = url_info(url) size += temp - + print_info(site_info, title, 'flv', size) if not info_only: download_urls(urls, title, 'flv', size, output_dir = output_dir, merge = merge) @@ -33,10 +43,10 @@ def sina_download_by_vkey(vkey, title=None, output_dir='.', merge=True, info_onl """Downloads a Sina video by its unique vkey. http://video.sina.com/ """ - + url = 'http://video.sina.com/v/flvideo/%s_0.flv' % vkey type, ext, size = url_info(url) - + print_info(site_info, title, 'flv', size) if not info_only: download_urls([url], title, 'flv', size, output_dir = output_dir, merge = merge) @@ -44,7 +54,7 @@ def sina_download_by_vkey(vkey, title=None, output_dir='.', merge=True, info_onl def sina_download(url, output_dir='.', merge=True, info_only=False): """Downloads Sina videos by URL. """ - + vid = match1(url, r'vid=(\d+)') if vid is None: video_page = get_content(url) @@ -52,7 +62,7 @@ def sina_download(url, output_dir='.', merge=True, info_only=False): if hd_vid == '0': vids = match1(video_page, r'[^\w]vid\s*:\s*\'([^\']+)\'').split('|') vid = vids[-1] - + if vid: title = match1(video_page, r'title\s*:\s*\'([^\']+)\'') sina_download_by_vid(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only) From a31c8882fc59ae6fed7931676e1ca3801fa58f5d Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 18 Feb 2014 19:47:26 +0100 Subject: [PATCH 097/147] add support: Vkontakte --- src/you_get/extractor/__init__.py | 1 + src/you_get/extractor/__main__.py | 1 + src/you_get/extractor/vk.py | 25 +++++++++++++++++++++++++ 3 files changed, 27 insertions(+) create mode 100644 src/you_get/extractor/vk.py diff --git a/src/you_get/extractor/__init__.py b/src/you_get/extractor/__init__.py index 5f084d4d..018cf072 100644 --- a/src/you_get/extractor/__init__.py +++ b/src/you_get/extractor/__init__.py @@ -37,6 +37,7 @@ from .tumblr import * from .vid48 import * from .vimeo import * from .vine import * +from .vk import * from .w56 import * from .xiami import * from .yinyuetai import * diff --git a/src/you_get/extractor/__main__.py b/src/you_get/extractor/__main__.py index 744a0646..0cb5fe93 100644 --- a/src/you_get/extractor/__main__.py +++ b/src/you_get/extractor/__main__.py @@ -58,6 +58,7 @@ def url_to_module(url): 'vid48': vid48, 'vimeo': vimeo, 'vine': vine, + 'vk': vk, 'xiami': xiami, 'yinyuetai': yinyuetai, 'youku': youku, diff --git a/src/you_get/extractor/vk.py b/src/you_get/extractor/vk.py new file mode 100644 index 00000000..6bb8b39a --- /dev/null +++ b/src/you_get/extractor/vk.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python + +__all__ = ['vk_download'] + +from ..common import * + +def vk_download(url, output_dir='.', merge=True, info_only=False): + video_page = get_content(url) + title = unescape_html(r1(r'"title":"([^"]+)"', video_page)) + info = dict(re.findall(r'\\"url(\d+)\\":\\"([^"]+)\\"', video_page)) + for quality in ['1080', '720', '480', '360', '240']: + if quality in info: + url = re.sub(r'\\\\\\/', r'/', info[quality]) + break + assert url + + type, ext, size = url_info(url) + + print_info(site_info, title, type, size) + if not info_only: + download_urls([url], title, ext, size, output_dir, merge=merge) + +site_info = "VK.com" +download = vk_download +download_playlist = playlist_not_supported('vk') From 7fae296a3e54aad1e254dda31710fa2f10e9a6c6 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 18 Feb 2014 19:50:32 +0100 Subject: [PATCH 098/147] update README: VK support --- README.md | 1 + README.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/README.md b/README.md index 996d8390..5aac180a 100644 --- a/README.md +++ b/README.md @@ -60,6 +60,7 @@ __中文说明__已移至[wiki](https://github.com/soimort/you-get/wiki/%E4%B8%A * Baidu Wangpan (百度网盘) * SongTaste * Alive.in.th +* VK ## Dependencies diff --git a/README.txt b/README.txt index a2f17c2d..c4275cad 100644 --- a/README.txt +++ b/README.txt @@ -63,6 +63,7 @@ Supported Sites (As of Now) * Baidu Wangpan (百度网盘) http://pan.baidu.com * SongTaste http://www.songtaste.com * Alive.in.th http://alive.in.th +* VK http://vk.com Dependencies ------------ From fbcf3ab54ec63797a208b995ac638ec177a4cdc6 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 19 Feb 2014 04:46:02 +0100 Subject: [PATCH 099/147] YouTube: ignore 3D video profiles (itag=82,83,84,85,100,101,102) --- src/you_get/extractor/youtube.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/you_get/extractor/youtube.py b/src/you_get/extractor/youtube.py index 4a61019a..b24a1d9b 100644 --- a/src/you_get/extractor/youtube.py +++ b/src/you_get/extractor/youtube.py @@ -8,24 +8,24 @@ from ..common import * # taken from http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs, 2/14/2014. yt_codecs = [ {'itag': 38, 'container': 'MP4', 'video_resolution': '3072p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3.5-5', 'audio_encoding': 'AAC', 'audio_bitrate': '192'}, - {'itag': 85, 'container': 'MP4', 'video_resolution': '1080p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '3-4', 'audio_encoding': 'AAC', 'audio_bitrate': '192'}, + #{'itag': 85, 'container': 'MP4', 'video_resolution': '1080p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '3-4', 'audio_encoding': 'AAC', 'audio_bitrate': '192'}, {'itag': 46, 'container': 'WebM', 'video_resolution': '1080p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'}, {'itag': 37, 'container': 'MP4', 'video_resolution': '1080p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3-4.3', 'audio_encoding': 'AAC', 'audio_bitrate': '192'}, - {'itag': 102, 'container': 'WebM', 'video_resolution': '720p', 'video_encoding': 'VP8', 'video_profile': '3D', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'}, + #{'itag': 102, 'container': 'WebM', 'video_resolution': '720p', 'video_encoding': 'VP8', 'video_profile': '3D', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'}, {'itag': 45, 'container': 'WebM', 'video_resolution': '720p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '2', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'}, - {'itag': 84, 'container': 'MP4', 'video_resolution': '720p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '2-3', 'audio_encoding': 'AAC', 'audio_bitrate': '192'}, + #{'itag': 84, 'container': 'MP4', 'video_resolution': '720p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '2-3', 'audio_encoding': 'AAC', 'audio_bitrate': '192'}, {'itag': 22, 'container': 'MP4', 'video_resolution': '720p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '2-3', 'audio_encoding': 'AAC', 'audio_bitrate': '192'}, {'itag': 120, 'container': 'FLV', 'video_resolution': '720p', 'video_encoding': 'H.264', 'video_profile': 'Main@L3.1', 'video_bitrate': '2', 'audio_encoding': 'AAC', 'audio_bitrate': '128'}, {'itag': 44, 'container': 'WebM', 'video_resolution': '480p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '1', 'audio_encoding': 'Vorbis', 'audio_bitrate': '128'}, {'itag': 35, 'container': 'FLV', 'video_resolution': '480p', 'video_encoding': 'H.264', 'video_profile': 'Main', 'video_bitrate': '0.8-1', 'audio_encoding': 'AAC', 'audio_bitrate': '128'}, - {'itag': 101, 'container': 'WebM', 'video_resolution': '360p', 'video_encoding': 'VP8', 'video_profile': '3D', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'}, - {'itag': 100, 'container': 'WebM', 'video_resolution': '360p', 'video_encoding': 'VP8', 'video_profile': '3D', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '128'}, + #{'itag': 101, 'container': 'WebM', 'video_resolution': '360p', 'video_encoding': 'VP8', 'video_profile': '3D', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'}, + #{'itag': 100, 'container': 'WebM', 'video_resolution': '360p', 'video_encoding': 'VP8', 'video_profile': '3D', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '128'}, {'itag': 43, 'container': 'WebM', 'video_resolution': '360p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '0.5', 'audio_encoding': 'Vorbis', 'audio_bitrate': '128'}, {'itag': 34, 'container': 'FLV', 'video_resolution': '360p', 'video_encoding': 'H.264', 'video_profile': 'Main', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '128'}, - {'itag': 82, 'container': 'MP4', 'video_resolution': '360p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '96'}, + #{'itag': 82, 'container': 'MP4', 'video_resolution': '360p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '96'}, {'itag': 18, 'container': 'MP4', 'video_resolution': '270p/360p', 'video_encoding': 'H.264', 'video_profile': 'Baseline', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '96'}, {'itag': 6, 'container': 'FLV', 'video_resolution': '270p', 'video_encoding': 'Sorenson H.263', 'video_profile': '', 'video_bitrate': '0.8', 'audio_encoding': 'MP3', 'audio_bitrate': '64'}, - {'itag': 83, 'container': 'MP4', 'video_resolution': '240p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '96'}, + #{'itag': 83, 'container': 'MP4', 'video_resolution': '240p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '96'}, {'itag': 13, 'container': '3GP', 'video_resolution': '', 'video_encoding': 'MPEG-4 Visual', 'video_profile': '', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': ''}, {'itag': 5, 'container': 'FLV', 'video_resolution': '240p', 'video_encoding': 'Sorenson H.263', 'video_profile': '', 'video_bitrate': '0.25', 'audio_encoding': 'MP3', 'audio_bitrate': '64'}, {'itag': 36, 'container': '3GP', 'video_resolution': '240p', 'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.175', 'audio_encoding': 'AAC', 'audio_bitrate': '36'}, From deb4e6af0d38228c0a02bd0d2d26f80618c076c7 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 21 Feb 2014 07:47:12 +0100 Subject: [PATCH 100/147] version 0.3.28 --- CHANGELOG.txt | 9 +++++++++ src/you_get/version.py | 4 ++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index f43f1300..be69a8d9 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,6 +1,15 @@ Changelog ========= +0.3.28 +------ + +*Date: 2014-02-21* + +* New site support: + - Magisto.com + - VK.com + 0.3.27 ------ diff --git a/src/you_get/version.py b/src/you_get/version.py index 73a9663e..ea83578a 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -2,5 +2,5 @@ __all__ = ['__version__', '__date__'] __name__ = 'you-get' -__version__ = '0.3.27' -__date__ = '2014-02-14' +__version__ = '0.3.28' +__date__ = '2014-02-21' From 9d31c599afbbbaebfbf96a81009c5e17898ba5be Mon Sep 17 00:00:00 2001 From: Eskibear Date: Sun, 23 Feb 2014 00:22:36 +0800 Subject: [PATCH 101/147] fix bug: when no lyrics available, downloading procedure throws an exception and exits. --- src/you_get/extractor/xiami.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractor/xiami.py b/src/you_get/extractor/xiami.py index fccc584b..5d4fec76 100644 --- a/src/you_get/extractor/xiami.py +++ b/src/you_get/extractor/xiami.py @@ -50,7 +50,10 @@ def xiami_download_song(sid, output_dir = '.', merge = True, info_only = False): album_name = i.getElementsByTagName("album_name")[0].firstChild.nodeValue song_title = i.getElementsByTagName("title")[0].firstChild.nodeValue url = location_dec(i.getElementsByTagName("location")[0].firstChild.nodeValue) - lrc_url = i.getElementsByTagName("lyric")[0].firstChild.nodeValue + try: + lrc_url = i.getElementsByTagName("lyric")[0].firstChild.nodeValue + except: + pass type, ext, size = url_info(url, faker = True) if not ext: ext = 'mp3' From 5c05cd666b65f40e32037c9a86a545bc7d592e1c Mon Sep 17 00:00:00 2001 From: Eskibear Date: Sun, 23 Feb 2014 00:49:43 +0800 Subject: [PATCH 102/147] continue to fix lrc missing bug in different downloading mode, namely album mode, collection mode. --- src/you_get/extractor/xiami.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractor/xiami.py b/src/you_get/extractor/xiami.py index 5d4fec76..51b190db 100644 --- a/src/you_get/extractor/xiami.py +++ b/src/you_get/extractor/xiami.py @@ -81,7 +81,10 @@ def xiami_download_showcollect(cid, output_dir = '.', merge = True, info_only = album_name = i.getElementsByTagName("album_name")[0].firstChild.nodeValue song_title = i.getElementsByTagName("title")[0].firstChild.nodeValue url = location_dec(i.getElementsByTagName("location")[0].firstChild.nodeValue) - lrc_url = i.getElementsByTagName("lyric")[0].firstChild.nodeValue + try: + lrc_url = i.getElementsByTagName("lyric")[0].firstChild.nodeValue + except: + pass type, ext, size = url_info(url, faker = True) if not ext: ext = 'mp3' @@ -109,7 +112,10 @@ def xiami_download_album(aid, output_dir = '.', merge = True, info_only = False) for i in tracks: song_title = i.getElementsByTagName("title")[0].firstChild.nodeValue url = location_dec(i.getElementsByTagName("location")[0].firstChild.nodeValue) - lrc_url = i.getElementsByTagName("lyric")[0].firstChild.nodeValue + try: + lrc_url = i.getElementsByTagName("lyric")[0].firstChild.nodeValue + except: + pass if not pic_exist: pic_url = i.getElementsByTagName("pic")[0].firstChild.nodeValue type, ext, size = url_info(url, faker = True) From 55fc2a6bd4bd8bde959dfcd75b89c6d93b17a0ff Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 22 Feb 2014 18:43:16 +0100 Subject: [PATCH 103/147] remove all Sohu test cases --- tests/test.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tests/test.py b/tests/test.py index 1b5884c6..0d51f86e 100644 --- a/tests/test.py +++ b/tests/test.py @@ -28,13 +28,6 @@ class YouGetTests(unittest.TestCase): "http://www.mixcloud.com/DJVadim/north-america-are-you-ready/", ]) - def test_sohu(self): - test_urls([ - "http://tv.sohu.com/20120522/n343785589.shtml", - "http://tv.sohu.com/20130103/n362246415.shtml", - "http://tv.sohu.com/20130103/n362251239.shtml" - ]) - def test_ted(self): test_urls([ "http://www.ted.com/talks/jennifer_lin_improvs_piano_magic.html", From 75b6b57fca25856ffde9b7d486b37ac72ed8f1cd Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 23 Feb 2014 19:24:18 +0100 Subject: [PATCH 104/147] Vine: fixed --- src/you_get/extractor/vine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractor/vine.py b/src/you_get/extractor/vine.py index cd3ff63f..35dec2ea 100644 --- a/src/you_get/extractor/vine.py +++ b/src/you_get/extractor/vine.py @@ -10,7 +10,7 @@ def vine_download(url, output_dir='.', merge=True, info_only=False): title1 = r1(r' Date: Fri, 28 Feb 2014 17:07:39 +0100 Subject: [PATCH 105/147] YouTube: fix #308 --- src/you_get/extractor/youtube.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/you_get/extractor/youtube.py b/src/you_get/extractor/youtube.py index b24a1d9b..f8a702b7 100644 --- a/src/you_get/extractor/youtube.py +++ b/src/you_get/extractor/youtube.py @@ -95,12 +95,16 @@ def youtube_download_by_id(id, title=None, output_dir='.', merge=True, info_only break url = download_stream['url'][0] - if 'sig' in download_stream: - sig = download_stream['sig'][0] - else: - js = get_content(html5player) - sig = decipher(js, download_stream['s'][0]) - url = '%s&signature=%s' % (url, sig) + try: + if 'sig' in download_stream: + sig = download_stream['sig'][0] + url = '%s&signature=%s' % (url, sig) + else: + js = get_content(html5player) + sig = decipher(js, download_stream['s'][0]) + url = '%s&signature=%s' % (url, sig) + except NameError: + pass type, ext, size = url_info(url) From 09f3f31b4a543516a573c83ded891f47a63d6c11 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 28 Feb 2014 17:21:43 +0100 Subject: [PATCH 106/147] YouTube: fix #308 (again) --- src/you_get/extractor/youtube.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/src/you_get/extractor/youtube.py b/src/you_get/extractor/youtube.py index f8a702b7..0a01a225 100644 --- a/src/you_get/extractor/youtube.py +++ b/src/you_get/extractor/youtube.py @@ -95,16 +95,13 @@ def youtube_download_by_id(id, title=None, output_dir='.', merge=True, info_only break url = download_stream['url'][0] - try: - if 'sig' in download_stream: - sig = download_stream['sig'][0] - url = '%s&signature=%s' % (url, sig) - else: - js = get_content(html5player) - sig = decipher(js, download_stream['s'][0]) - url = '%s&signature=%s' % (url, sig) - except NameError: - pass + if 'sig' in download_stream: + sig = download_stream['sig'][0] + url = '%s&signature=%s' % (url, sig) + elif 's' in download_stream: + js = get_content(html5player) + sig = decipher(js, download_stream['s'][0]) + url = '%s&signature=%s' % (url, sig) type, ext, size = url_info(url) From 882ee6ace7abefd38fbb331eb6b445304282407f Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 28 Feb 2014 19:20:02 +0100 Subject: [PATCH 107/147] version 0.3.28.1 --- CHANGELOG.txt | 7 +++++++ src/you_get/version.py | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index be69a8d9..dc72dccb 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,6 +1,13 @@ Changelog ========= +0.3.28.1 +-------- + +*Date: 2014-02-28* + +* Bug fix release + 0.3.28 ------ diff --git a/src/you_get/version.py b/src/you_get/version.py index ea83578a..9f763951 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -2,5 +2,5 @@ __all__ = ['__version__', '__date__'] __name__ = 'you-get' -__version__ = '0.3.28' -__date__ = '2014-02-21' +__version__ = '0.3.28.1' +__date__ = '2014-02-28' From 3f21a0672d1e90da49a96cf9cc73056521038d9e Mon Sep 17 00:00:00 2001 From: lilydjwg Date: Fri, 7 Mar 2014 21:08:38 +0800 Subject: [PATCH 108/147] avoid zombie git process --- src/you_get/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index a440b580..3f3a29eb 100644 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -787,7 +787,7 @@ def get_version(): try: import subprocess real_dir = os.path.dirname(os.path.realpath(__file__)) - git_hash = subprocess.Popen(['git', 'rev-parse', '--short', 'HEAD'], cwd=real_dir, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL).stdout.read().decode('utf-8').strip() + git_hash = subprocess.check_output(['git', 'rev-parse', '--short', 'HEAD'], cwd=real_dir, stderr=subprocess.DEVNULL).decode('utf-8').strip() assert git_hash return '%s-%s' % (__version__, git_hash) except: From ecb7e84e6b47d0ad9205658bcc8c55fe049747aa Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 8 Mar 2014 19:49:51 +0100 Subject: [PATCH 109/147] Bilibili: fix #312 --- src/you_get/extractor/bilibili.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractor/bilibili.py b/src/you_get/extractor/bilibili.py index df7640bc..d94a11e8 100644 --- a/src/you_get/extractor/bilibili.py +++ b/src/you_get/extractor/bilibili.py @@ -101,7 +101,7 @@ def bilibili_download(url, output_dir = '.', merge = True, info_only = False): if not info_only: print('Downloading %s ...' % (title + '.cmt.xml')) xml = get_srt_xml(id) - with open(os.path.join(output_dir, title + '.cmt.xml'), 'w') as x: + with open(os.path.join(output_dir, title + '.cmt.xml'), 'w', encoding='utf-8') as x: x.write(xml) site_info = "bilibili.tv" From 95cd79524119b0de92c86eb86cc9540eb2712ed5 Mon Sep 17 00:00:00 2001 From: HU Pili Date: Fri, 28 Mar 2014 12:49:34 +0800 Subject: [PATCH 110/147] Support load cookies.txt --- src/you_get/common.py | 149 +++++++++++++++++++++++------------------- 1 file changed, 80 insertions(+), 69 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 3f3a29eb..0ac917ad 100644 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -54,16 +54,16 @@ def r1_of(patterns, text): def match1(text, *patterns): """Scans through a string for substrings matched some patterns (first-subgroups only). - + Args: text: A string to be scanned. patterns: Arbitrary number of regex patterns. - + Returns: When only one pattern is given, returns a string (None if no match found). When more than one pattern are given, returns a list of strings ([] if no match found). """ - + if len(patterns) == 1: pattern = patterns[0] match = re.search(pattern, text) @@ -86,15 +86,15 @@ def launch_player(player, urls): def parse_query_param(url, param): """Parses the query string of a URL and returns the value of a parameter. - + Args: url: A URL. param: A string representing the name of the parameter. - + Returns: The value of the parameter. """ - + try: return parse.parse_qs(parse.urlparse(url).query)[param][0] except: @@ -172,7 +172,7 @@ def get_response(url, faker = False): response = request.urlopen(request.Request(url, headers = fake_headers), None) else: response = request.urlopen(url) - + data = response.read() if response.info().get('Content-Encoding') == 'gzip': data = ungzip(data) @@ -198,26 +198,30 @@ def get_decoded_html(url, faker = False): def get_content(url, headers={}, decoded=True): """Gets the content of a URL via sending a HTTP GET request. - + Args: url: A URL. headers: Request headers used by the client. decoded: Whether decode the response body using UTF-8 or the charset specified in Content-Type. - + Returns: The content as a string. """ - - response = request.urlopen(request.Request(url, headers=headers)) + + req = request.Request(url, headers=headers) + if cookies_txt: + cookies_txt.add_cookie_header(req) + req.headers.update(req.unredirected_hdrs) + response = request.urlopen(req) data = response.read() - + # Handle HTTP compression for gzip and deflate (zlib) content_encoding = response.getheader('Content-Encoding') if content_encoding == 'gzip': data = ungzip(data) elif content_encoding == 'deflate': data = undeflate(data) - + # Decode the response body if decoded: charset = match1(response.getheader('Content-Type'), r'charset=([\w-]+)') @@ -225,7 +229,7 @@ def get_content(url, headers={}, decoded=True): data = data.decode(charset) else: data = data.decode('utf-8') - + return data def url_size(url, faker = False): @@ -233,7 +237,7 @@ def url_size(url, faker = False): response = request.urlopen(request.Request(url, headers = fake_headers), None) else: response = request.urlopen(url) - + size = int(response.headers['content-length']) return size @@ -245,9 +249,9 @@ def url_info(url, faker = False): response = request.urlopen(request.Request(url, headers = fake_headers), None) else: response = request.urlopen(request.Request(url)) - + headers = response.headers - + type = headers['content-type'] mapping = { 'video/3gpp': '3gp', @@ -275,12 +279,12 @@ def url_info(url, faker = False): ext = None else: ext = None - + if headers['transfer-encoding'] != 'chunked': size = int(headers['content-length']) else: size = None - + return type, ext, size def url_locations(urls, faker = False): @@ -290,13 +294,13 @@ def url_locations(urls, faker = False): response = request.urlopen(request.Request(url, headers = fake_headers), None) else: response = request.urlopen(request.Request(url)) - + locations.append(response.url) return locations def url_save(url, filepath, bar, refer = None, is_part = False, faker = False): file_size = url_size(url, faker = faker) - + if os.path.exists(filepath): if not force and file_size == os.path.getsize(filepath): if not is_part: @@ -314,19 +318,19 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False): print('Overwriting %s' % tr(os.path.basename(filepath)), '...') elif not os.path.exists(os.path.dirname(filepath)): os.mkdir(os.path.dirname(filepath)) - + temp_filepath = filepath + '.download' received = 0 if not force: open_mode = 'ab' - + if os.path.exists(temp_filepath): received += os.path.getsize(temp_filepath) if bar: bar.update_received(os.path.getsize(temp_filepath)) else: open_mode = 'wb' - + if received < file_size: if faker: headers = fake_headers @@ -336,7 +340,7 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False): headers['Range'] = 'bytes=' + str(received) + '-' if refer: headers['Referer'] = refer - + response = request.urlopen(request.Request(url, headers = headers), None) try: range_start = int(response.headers['content-range'][6:].split('/')[0].split('-')[0]) @@ -344,13 +348,13 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False): range_length = end_length - range_start except: range_length = int(response.headers['content-length']) - + if file_size != received + range_length: received = 0 if bar: bar.received = 0 open_mode = 'wb' - + with open(temp_filepath, open_mode) as output: while True: buffer = response.read(1024 * 256) @@ -364,9 +368,9 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False): received += len(buffer) if bar: bar.update_received(len(buffer)) - + assert received == os.path.getsize(temp_filepath), '%s == %s == %s' % (received, os.path.getsize(temp_filepath), temp_filepath) - + if os.access(filepath, os.W_OK): os.remove(filepath) # on Windows rename could fail if destination filepath exists os.rename(temp_filepath, filepath) @@ -389,19 +393,19 @@ def url_save_chunked(url, filepath, bar, refer = None, is_part = False, faker = print('Overwriting %s' % tr(os.path.basename(filepath)), '...') elif not os.path.exists(os.path.dirname(filepath)): os.mkdir(os.path.dirname(filepath)) - + temp_filepath = filepath + '.download' received = 0 if not force: open_mode = 'ab' - + if os.path.exists(temp_filepath): received += os.path.getsize(temp_filepath) if bar: bar.update_received(os.path.getsize(temp_filepath)) else: open_mode = 'wb' - + if faker: headers = fake_headers else: @@ -410,9 +414,9 @@ def url_save_chunked(url, filepath, bar, refer = None, is_part = False, faker = headers['Range'] = 'bytes=' + str(received) + '-' if refer: headers['Referer'] = refer - + response = request.urlopen(request.Request(url, headers = headers), None) - + with open(temp_filepath, open_mode) as output: while True: buffer = response.read(1024 * 256) @@ -422,9 +426,9 @@ def url_save_chunked(url, filepath, bar, refer = None, is_part = False, faker = received += len(buffer) if bar: bar.update_received(len(buffer)) - + assert received == os.path.getsize(temp_filepath), '%s == %s == %s' % (received, os.path.getsize(temp_filepath)) - + if os.access(filepath, os.W_OK): os.remove(filepath) # on Windows rename could fail if destination filepath exists os.rename(temp_filepath, filepath) @@ -436,7 +440,7 @@ class SimpleProgressBar: self.total_pieces = total_pieces self.current_piece = 1 self.received = 0 - + def update(self): self.displayed = True bar_size = 40 @@ -455,14 +459,14 @@ class SimpleProgressBar: bar = '{0:>5}% ({1:>5}/{2:<5}MB) [{3:<40}] {4}/{5}'.format(percent, round(self.received / 1048576, 1), round(self.total_size / 1048576, 1), bar, self.current_piece, self.total_pieces) sys.stdout.write('\r' + bar) sys.stdout.flush() - + def update_received(self, n): self.received += n self.update() - + def update_piece(self, n): self.current_piece = n - + def done(self): if self.displayed: print() @@ -475,20 +479,20 @@ class PiecesProgressBar: self.total_pieces = total_pieces self.current_piece = 1 self.received = 0 - + def update(self): self.displayed = True bar = '{0:>5}%[{1:<40}] {2}/{3}'.format('?', '?' * 40, self.current_piece, self.total_pieces) sys.stdout.write('\r' + bar) sys.stdout.flush() - + def update_received(self, n): self.received += n self.update() - + def update_piece(self, n): self.current_piece = n - + def done(self): if self.displayed: print() @@ -509,7 +513,7 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None, if dry_run: print('Real URLs:\n', urls, '\n') return - + if player: launch_player(player, urls) return @@ -522,9 +526,9 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None, import sys traceback.print_exc(file = sys.stdout) pass - + title = legitimize(title) - + filename = '%s.%s' % (title, ext) filepath = os.path.join(output_dir, filename) if total_size: @@ -535,7 +539,7 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None, bar = SimpleProgressBar(total_size, len(urls)) else: bar = PiecesProgressBar(total_size, len(urls)) - + if len(urls) == 1: url = urls[0] print('Downloading %s ...' % tr(filename)) @@ -552,7 +556,7 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None, bar.update_piece(i + 1) url_save(url, filepath, bar, refer = refer, is_part = True, faker = faker) bar.done() - + if not merge: print() return @@ -570,7 +574,7 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None, else: for part in parts: os.remove(part) - + elif ext == 'mp4': try: from .processor.ffmpeg import has_ffmpeg_installed @@ -585,10 +589,10 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None, else: for part in parts: os.remove(part) - + else: print("Can't merge %s files" % ext) - + print() def download_urls_chunked(urls, title, ext, total_size, output_dir = '.', refer = None, merge = True, faker = False): @@ -596,15 +600,15 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir = '.', refer if dry_run: print('Real URLs:\n', urls, '\n') return - + if player: launch_player(player, urls) return assert ext in ('ts') - + title = legitimize(title) - + filename = '%s.%s' % (title, 'ts') filepath = os.path.join(output_dir, filename) if total_size: @@ -615,7 +619,7 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir = '.', refer bar = SimpleProgressBar(total_size, len(urls)) else: bar = PiecesProgressBar(total_size, len(urls)) - + if len(urls) == 1: parts = [] url = urls[0] @@ -624,7 +628,7 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir = '.', refer parts.append(filepath) url_save_chunked(url, filepath, bar, refer = refer, faker = faker) bar.done() - + if not merge: print() return @@ -652,7 +656,7 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir = '.', refer bar.update_piece(i + 1) url_save_chunked(url, filepath, bar, refer = refer, is_part = True, faker = faker) bar.done() - + if not merge: print() return @@ -669,7 +673,7 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir = '.', refer print('No ffmpeg is found. Merging aborted.') else: print("Can't merge %s files" % ext) - + print() def playlist_not_supported(name): @@ -698,7 +702,7 @@ def print_info(site_info, title, type, size): type = 'video/MP2T' elif type in ['webm']: type = 'video/webm' - + if type in ['video/3gpp']: type_info = "3GPP multimedia file (%s)" % type elif type in ['video/x-flv', 'video/f4v']: @@ -725,7 +729,7 @@ def print_info(site_info, title, type, size): type_info = "MP3 (%s)" % type else: type_info = "Unknown type (%s)" % type - + print("Video Site:", site_info) print("Title: ", tr(title)) print("Type: ", type_info) @@ -777,7 +781,7 @@ def download_main(download, download_playlist, urls, playlist, output_dir, merge url = url[8:] if not url.startswith('http://'): url = 'http://' + url - + if playlist: download_playlist(url, output_dir = output_dir, merge = merge, info_only = info_only) else: @@ -804,6 +808,7 @@ def script_main(script_name, download, download_playlist = None): -f | --force Force overwriting existed files. -i | --info Display the information of videos without downloading. -u | --url Display the real URLs of videos without downloading. + -c | --cookies Load NetScape's cookies.txt file. -n | --no-merge Don't merge video parts. -o | --output-dir Set the output directory for downloaded videos. -p | --player Directly play the video with PLAYER like vlc/smplayer. @@ -813,26 +818,28 @@ def script_main(script_name, download, download_playlist = None): --sogou-proxy Run a standalone Sogou proxy server. --debug Show traceback on KeyboardInterrupt. ''' - - short_opts = 'VhfiunSo:p:x:' - opts = ['version', 'help', 'force', 'info', 'url', 'no-merge', 'no-proxy', 'debug', 'sogou', 'output-dir=', 'player=', 'http-proxy=', 'sogou-proxy=', 'sogou-env='] + + short_opts = 'Vhfiuc:nSo:p:x:' + opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-merge', 'no-proxy', 'debug', 'sogou', 'output-dir=', 'player=', 'http-proxy=', 'sogou-proxy=', 'sogou-env='] if download_playlist: short_opts = 'l' + short_opts opts = ['playlist'] + opts - + try: opts, args = getopt.getopt(sys.argv[1:], short_opts, opts) except getopt.GetoptError as err: log.e(err) log.e("try 'you-get --help' for more options") sys.exit(2) - + global force global dry_run global player global sogou_proxy global sogou_env - + global cookies_txt + cookies_txt = None + info_only = False playlist = False merge = True @@ -853,6 +860,10 @@ def script_main(script_name, download, download_playlist = None): info_only = True elif o in ('-u', '--url'): dry_run = True + elif o in ('-c', '--cookies'): + from http import cookiejar + cookies_txt = cookiejar.MozillaCookieJar(a) + cookies_txt.load() elif o in ('-l', '--playlist'): playlist = True elif o in ('-n', '--no-merge'): @@ -892,7 +903,7 @@ def script_main(script_name, download, download_playlist = None): else: print(help) sys.exit() - + set_http_proxy(proxy) try: From d59532b387cac74d0e9ad55f264beaafd3ff099b Mon Sep 17 00:00:00 2001 From: HU Pili Date: Sat, 29 Mar 2014 21:42:34 +0800 Subject: [PATCH 111/147] add global variable cookies_txt Test case does not enter via script_main. Without this definition test cases will fail, although the new feature works correctly. --- src/you_get/common.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/you_get/common.py b/src/you_get/common.py index 0ac917ad..a3a6eeed 100644 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -18,6 +18,7 @@ force = False player = None sogou_proxy = None sogou_env = None +cookies_txt = None fake_headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', From 36f7cf798ffda7e78d53d0f93c18a2f36700f2e3 Mon Sep 17 00:00:00 2001 From: lilydjwg Date: Fri, 11 Apr 2014 19:42:13 +0800 Subject: [PATCH 112/147] unescape HTML entities in media titles --- src/you_get/common.py | 5 +++-- src/you_get/util/__init__.py | 1 + src/you_get/util/strings.py | 20 ++++++++++++++++++++ 3 files changed, 24 insertions(+), 2 deletions(-) create mode 100644 src/you_get/util/strings.py diff --git a/src/you_get/common.py b/src/you_get/common.py index 3f3a29eb..2215cbbc 100644 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -11,7 +11,7 @@ import platform import threading from .version import __version__ -from .util import log, legitimize, sogou_proxy_server +from .util import log, legitimize, sogou_proxy_server, unescape dry_run = False force = False @@ -523,6 +523,7 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None, traceback.print_exc(file = sys.stdout) pass + title = unescape(title) title = legitimize(title) filename = '%s.%s' % (title, ext) @@ -727,7 +728,7 @@ def print_info(site_info, title, type, size): type_info = "Unknown type (%s)" % type print("Video Site:", site_info) - print("Title: ", tr(title)) + print("Title: ", unescape(tr(title))) print("Type: ", type_info) print("Size: ", round(size / 1048576, 2), "MiB (" + str(size) + " Bytes)") print() diff --git a/src/you_get/util/__init__.py b/src/you_get/util/__init__.py index 4c43c5fa..947ea465 100644 --- a/src/you_get/util/__init__.py +++ b/src/you_get/util/__init__.py @@ -3,3 +3,4 @@ from .fs import * from .log import * from .sogou_proxy import * +from .strings import * diff --git a/src/you_get/util/strings.py b/src/you_get/util/strings.py new file mode 100644 index 00000000..4650540c --- /dev/null +++ b/src/you_get/util/strings.py @@ -0,0 +1,20 @@ +try: + # py 3.4 + from html import unescape +except ImportError: + import re + from html.entities import entitydefs + + def unescape(string): + '''HTML entity decode''' + string = re.sub(r'&#[^;]+;', _sharp2uni, string) + string = re.sub(r'&[^;]+;', lambda m: entitydefs[m.group(0)[1:-1]], string) + return string + + def _sharp2uni(m): + '''&#...; ==> unicode''' + s = m.group(0)[2:].rstrip(';;') + if s.startswith('x'): + return chr(int('0'+s, 16)) + else: + return chr(int(s)) From 2bca2e771c1039610c9762d93e5b47e8eed7359e Mon Sep 17 00:00:00 2001 From: lilydjwg Date: Fri, 11 Apr 2014 19:51:07 +0800 Subject: [PATCH 113/147] better unescape, also unescape in xiami lyrics filename --- src/you_get/common.py | 7 +++---- src/you_get/extractor/xiami.py | 3 ++- src/you_get/util/strings.py | 5 +++++ 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 2215cbbc..3b2c00d1 100644 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -11,7 +11,7 @@ import platform import threading from .version import __version__ -from .util import log, legitimize, sogou_proxy_server, unescape +from .util import log, legitimize, sogou_proxy_server, get_filename, unescape dry_run = False force = False @@ -523,8 +523,7 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None, traceback.print_exc(file = sys.stdout) pass - title = unescape(title) - title = legitimize(title) + title = get_filename(title) filename = '%s.%s' % (title, ext) filepath = os.path.join(output_dir, filename) @@ -604,7 +603,7 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir = '.', refer assert ext in ('ts') - title = legitimize(title) + title = get_filename(title) filename = '%s.%s' % (title, 'ts') filepath = os.path.join(output_dir, filename) diff --git a/src/you_get/extractor/xiami.py b/src/you_get/extractor/xiami.py index 51b190db..143e6eb5 100644 --- a/src/you_get/extractor/xiami.py +++ b/src/you_get/extractor/xiami.py @@ -29,8 +29,9 @@ def location_dec(str): def xiami_download_lyric(lrc_url, file_name, output_dir): lrc = get_html(lrc_url, faker = True) + filename = get_filename(file_name) if len(lrc) > 0: - with open(output_dir + "/" + file_name.replace('/', '-').replace('?', '-') + '.lrc', 'w', encoding='utf-8') as x: + with open(output_dir + "/" + filename + '.lrc', 'w', encoding='utf-8') as x: x.write(lrc) def xiami_download_pic(pic_url, file_name, output_dir): diff --git a/src/you_get/util/strings.py b/src/you_get/util/strings.py index 4650540c..638aba58 100644 --- a/src/you_get/util/strings.py +++ b/src/you_get/util/strings.py @@ -18,3 +18,8 @@ except ImportError: return chr(int('0'+s, 16)) else: return chr(int(s)) + +from .fs import legitimize + +def get_filename(htmlstring): + return legitimize(unescape(htmlstring)) From cd5040b61718199ad97a13d9d446fc0528872abc Mon Sep 17 00:00:00 2001 From: lilydjwg Date: Fri, 11 Apr 2014 19:55:54 +0800 Subject: [PATCH 114/147] let's replace old unescape_html --- src/you_get/common.py | 10 ++-------- src/you_get/util/strings.py | 6 +++--- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 3b2c00d1..c6b0acd9 100644 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -11,7 +11,7 @@ import platform import threading from .version import __version__ -from .util import log, legitimize, sogou_proxy_server, get_filename, unescape +from .util import log, sogou_proxy_server, get_filename, unescape_html dry_run = False force = False @@ -143,12 +143,6 @@ def filenameable(text): }) return text -def unescape_html(html): - from html import parser - html = parser.HTMLParser().unescape(html) - html = re.sub(r'&#(\d+);', lambda x: chr(int(x.group(1))), html) - return html - def ungzip(data): """Decompresses data for Content-Encoding: gzip. """ @@ -727,7 +721,7 @@ def print_info(site_info, title, type, size): type_info = "Unknown type (%s)" % type print("Video Site:", site_info) - print("Title: ", unescape(tr(title))) + print("Title: ", unescape_html(tr(title))) print("Type: ", type_info) print("Size: ", round(size / 1048576, 2), "MiB (" + str(size) + " Bytes)") print() diff --git a/src/you_get/util/strings.py b/src/you_get/util/strings.py index 638aba58..7e74f35e 100644 --- a/src/you_get/util/strings.py +++ b/src/you_get/util/strings.py @@ -1,11 +1,11 @@ try: # py 3.4 - from html import unescape + from html import unescape as unescape_html except ImportError: import re from html.entities import entitydefs - def unescape(string): + def unescape_html(string): '''HTML entity decode''' string = re.sub(r'&#[^;]+;', _sharp2uni, string) string = re.sub(r'&[^;]+;', lambda m: entitydefs[m.group(0)[1:-1]], string) @@ -22,4 +22,4 @@ except ImportError: from .fs import legitimize def get_filename(htmlstring): - return legitimize(unescape(htmlstring)) + return legitimize(unescape_html(htmlstring)) From 358df6bfffbea215df8ea4d59959ec8cc5ee64c7 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 13 Apr 2014 13:31:21 +0200 Subject: [PATCH 115/147] AcFun: fix #321 --- src/you_get/extractor/acfun.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/you_get/extractor/acfun.py b/src/you_get/extractor/acfun.py index f01bb19d..08fb8617 100644 --- a/src/you_get/extractor/acfun.py +++ b/src/you_get/extractor/acfun.py @@ -12,15 +12,15 @@ from .youku import youku_download_by_id import json, re def get_srt_json(id): - url = 'http://comment.acfun.tv/%s.json' % id + url = 'http://comment.acfun.com/%s.json' % id return get_html(url) def get_srt_lock_json(id): - url = 'http://comment.acfun.tv/%s_lock.json' % id + url = 'http://comment.acfun.com/%s_lock.json' % id return get_html(url) def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only=False): - info = json.loads(get_html('http://www.acfun.tv/video/getVideo.aspx?id=' + vid)) + info = json.loads(get_html('http://www.acfun.com/video/getVideo.aspx?id=' + vid)) sourceType = info['sourceType'] sourceId = info['sourceId'] danmakuId = info['danmakuId'] @@ -49,10 +49,10 @@ def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only pass def acfun_download(url, output_dir = '.', merge = True, info_only = False): - assert re.match(r'http://[^\.]+.acfun.tv/v/ac(\d+)', url) + assert re.match(r'http://[^\.]+.acfun.[^\.]+/v/ac(\d+)', url) html = get_html(url) - title = r1(r'

    ]*>([^<>]+)<', html) + title = r1(r'

    ([^<>]+)<', html) title = unescape_html(title) title = escape_file_path(title) assert title @@ -68,6 +68,6 @@ def acfun_download(url, output_dir = '.', merge = True, info_only = False): id = r1(r"src=\"/newflvplayer/player.*id=(\d+)", html) sina_download_by_vid(id, title, output_dir=output_dir, merge=merge, info_only=info_only) -site_info = "AcFun.tv" +site_info = "AcFun.com" download = acfun_download download_playlist = playlist_not_supported('acfun') From 1db438ac5ee99d06ce22a73921d590d10f4fb00e Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 13 Apr 2014 13:39:23 +0200 Subject: [PATCH 116/147] version: 0.3.28.2 --- CHANGELOG.txt | 7 +++++++ README.md | 2 +- README.txt | 2 +- src/you_get/version.py | 4 ++-- 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index dc72dccb..de718214 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,6 +1,13 @@ Changelog ========= +0.3.28.2 +-------- + +*Date: 2014-04-13* + +* Bug fix release + 0.3.28.1 -------- diff --git a/README.md b/README.md index 5aac180a..2584420d 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ __中文说明__已移至[wiki](https://github.com/soimort/you-get/wiki/%E4%B8%A * Youku (优酷) * Tudou (土豆) * YinYueTai (音悦台) -* AcFun +* AcFun * bilibili * CNTV (中国网络电视台) * Douban (豆瓣) diff --git a/README.txt b/README.txt index c4275cad..41fa31f1 100644 --- a/README.txt +++ b/README.txt @@ -41,7 +41,7 @@ Supported Sites (As of Now) * Youku (优酷) http://www.youku.com * Tudou (土豆) http://www.tudou.com * YinYueTai (音悦台) http://www.yinyuetai.com -* AcFun http://www.acfun.tv +* AcFun http://www.acfun.com * bilibili http://www.bilibili.tv * CNTV (中国网络电视台) http://www.cntv.cn * Douban (豆瓣) http://douban.com diff --git a/src/you_get/version.py b/src/you_get/version.py index 9f763951..9ea627bd 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -2,5 +2,5 @@ __all__ = ['__version__', '__date__'] __name__ = 'you-get' -__version__ = '0.3.28.1' -__date__ = '2014-02-28' +__version__ = '0.3.28.2' +__date__ = '2014-04-13' From 74ae901c84fd1fa100b303db16c82ace1dbbf868 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 13 Apr 2014 14:08:19 +0200 Subject: [PATCH 117/147] Magisto: include video hash in filename --- src/you_get/extractor/magisto.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractor/magisto.py b/src/you_get/extractor/magisto.py index d0b3c60d..77032518 100644 --- a/src/you_get/extractor/magisto.py +++ b/src/you_get/extractor/magisto.py @@ -9,7 +9,8 @@ def magisto_download(url, output_dir='.', merge=True, info_only=False): title1 = r1(r' Date: Thu, 17 Apr 2014 14:59:05 +0200 Subject: [PATCH 118/147] YouTube: fix #322 --- src/you_get/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractor/youtube.py b/src/you_get/extractor/youtube.py index 0a01a225..c7ef7fc5 100644 --- a/src/you_get/extractor/youtube.py +++ b/src/you_get/extractor/youtube.py @@ -74,7 +74,7 @@ def youtube_download_by_id(id, title=None, output_dir='.', merge=True, info_only else: # Parse video page when video_info is not usable. video_page = get_content('http://www.youtube.com/watch?v=%s' % id) - ytplayer_config = json.loads(match1(video_page, r'ytplayer.config\s*=\s*([^\n]+);')) + ytplayer_config = json.loads(match1(video_page, r'ytplayer.config\s*=\s*([^\n]+});')) title = ytplayer_config['args']['title'] stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',') From b3d10e32a91288835f3986d68ea944a699e63608 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 26 Apr 2014 15:17:45 +0200 Subject: [PATCH 119/147] add rtmpdump wrapper --- src/you_get/processor/__init__.py | 1 + src/you_get/processor/rtmpdump.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 src/you_get/processor/rtmpdump.py diff --git a/src/you_get/processor/__init__.py b/src/you_get/processor/__init__.py index 88616f31..d728385d 100644 --- a/src/you_get/processor/__init__.py +++ b/src/you_get/processor/__init__.py @@ -3,3 +3,4 @@ from .join_flv import concat_flv from .join_mp4 import concat_mp4 from .ffmpeg import * +from .rtmpdump import * diff --git a/src/you_get/processor/rtmpdump.py b/src/you_get/processor/rtmpdump.py new file mode 100644 index 00000000..c343bb85 --- /dev/null +++ b/src/you_get/processor/rtmpdump.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python + +import os.path +import subprocess + +def get_usable_rtmpdump(cmd): + try: + p = subprocess.Popen([cmd], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out, err = p.communicate() + return cmd + except: + return None + +RTMPDUMP = get_usable_rtmpdump('rtmpdump') + +def has_rtmpdump_installed(): + return RTMPDUMP is not None + +def download_rtmpdump_stream(url, playpath, title, ext, output_dir='.'): + filename = '%s.%s' % (title, ext) + filepath = os.path.join(output_dir, filename) + + params = [RTMPDUMP, '-r'] + params.append(url) + params.append('-y') + params.append(playpath) + params.append('-o') + params.append(filepath) + + subprocess.call(params) + return From 4ba7ce686ca9d3445f8ca4a733ffdc3d683057d3 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 26 Apr 2014 15:18:28 +0200 Subject: [PATCH 120/147] add support: CBS & thePlatform --- src/you_get/extractor/__init__.py | 2 ++ src/you_get/extractor/__main__.py | 2 ++ src/you_get/extractor/cbs.py | 21 +++++++++++++++++++++ src/you_get/extractor/theplatform.py | 26 ++++++++++++++++++++++++++ 4 files changed, 51 insertions(+) create mode 100644 src/you_get/extractor/cbs.py create mode 100644 src/you_get/extractor/theplatform.py diff --git a/src/you_get/extractor/__init__.py b/src/you_get/extractor/__init__.py index 018cf072..f128640b 100644 --- a/src/you_get/extractor/__init__.py +++ b/src/you_get/extractor/__init__.py @@ -5,6 +5,7 @@ from .alive import * from .baidu import * from .bilibili import * from .blip import * +from .cbs import * from .cntv import * from .coursera import * from .dailymotion import * @@ -32,6 +33,7 @@ from .sina import * from .sohu import * from .songtaste import * from .soundcloud import * +from .theplatform import * from .tudou import * from .tumblr import * from .vid48 import * diff --git a/src/you_get/extractor/__main__.py b/src/you_get/extractor/__main__.py index 0cb5fe93..bbe15a33 100644 --- a/src/you_get/extractor/__main__.py +++ b/src/you_get/extractor/__main__.py @@ -24,6 +24,7 @@ def url_to_module(url): 'bilibili': bilibili, 'blip': blip, 'cntv': cntv, + 'cbs': cbs, 'coursera': coursera, 'dailymotion': dailymotion, 'douban': douban, @@ -53,6 +54,7 @@ def url_to_module(url): 'songtaste':songtaste, 'soundcloud': soundcloud, 'ted': ted, + 'theplatform': theplatform, 'tudou': tudou, 'tumblr': tumblr, 'vid48': vid48, diff --git a/src/you_get/extractor/cbs.py b/src/you_get/extractor/cbs.py new file mode 100644 index 00000000..8c9d4a7b --- /dev/null +++ b/src/you_get/extractor/cbs.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python + +__all__ = ['cbs_download'] + +from ..common import * + +from .theplatform import theplatform_download_by_pid + +def cbs_download(url, output_dir='.', merge=True, info_only=False): + """Downloads CBS videos by URL. + """ + + html = get_content(url) + pid = match1(html, r'video\.settings\.pid\s*=\s*\'([^\']+)\'') + title = match1(html, r'video\.settings\.title\s*=\s*\"([^\"]+)\"') + + theplatform_download_by_pid(pid, title, output_dir=output_dir, merge=merge, info_only=info_only) + +site_info = "CBS.com" +download = cbs_download +download_playlist = playlist_not_supported('cbs') diff --git a/src/you_get/extractor/theplatform.py b/src/you_get/extractor/theplatform.py new file mode 100644 index 00000000..61ce9415 --- /dev/null +++ b/src/you_get/extractor/theplatform.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python + +from ..common import * + +def theplatform_download_by_pid(pid, title, output_dir='.', merge=True, info_only=False): + smil_url = "http://link.theplatform.com/s/dJ5BDC/%s/meta.smil?format=smil&mbr=true" % pid + smil = get_content(smil_url) + smil_base = unescape_html(match1(smil, r' Date: Sat, 26 Apr 2014 18:15:56 +0200 Subject: [PATCH 121/147] update README: CBS support --- README.md | 1 + README.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/README.md b/README.md index 2584420d..ee2b5b4f 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ __中文说明__已移至[wiki](https://github.com/soimort/you-get/wiki/%E4%B8%A * Vimeo * Coursera * Blip +* CBS * Dailymotion * eHow * Facebook diff --git a/README.txt b/README.txt index 41fa31f1..bfc5b8e1 100644 --- a/README.txt +++ b/README.txt @@ -21,6 +21,7 @@ Supported Sites (As of Now) * Vimeo http://vimeo.com * Coursera https://www.coursera.org * Blip http://blip.tv +* CBS http://www.cbs.com * Dailymotion http://dailymotion.com * eHow http://www.ehow.com * Facebook http://facebook.com From 8a6b79ade0034bfbe033329df9f698307378fd03 Mon Sep 17 00:00:00 2001 From: wz520 Date: Wed, 7 May 2014 18:12:41 +0800 Subject: [PATCH 122/147] call legitimize() for bilibili's comment title if the video title contains invalid filename character, IOError(code=22) is thrown when saving *.cmt.xml. --- src/you_get/extractor/bilibili.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/you_get/extractor/bilibili.py b/src/you_get/extractor/bilibili.py index d94a11e8..ef2eb77c 100644 --- a/src/you_get/extractor/bilibili.py +++ b/src/you_get/extractor/bilibili.py @@ -99,6 +99,7 @@ def bilibili_download(url, output_dir = '.', merge = True, info_only = False): raise NotImplementedError(flashvars) if not info_only: + title = legitimize(title) print('Downloading %s ...' % (title + '.cmt.xml')) xml = get_srt_xml(id) with open(os.path.join(output_dir, title + '.cmt.xml'), 'w', encoding='utf-8') as x: From bf70fa9fafdd9538edd8552f953b29d754812e42 Mon Sep 17 00:00:00 2001 From: liuerfire Date: Thu, 15 May 2014 22:29:13 +0800 Subject: [PATCH 123/147] letv: fix url parse error --- src/you_get/extractor/letv.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/src/you_get/extractor/letv.py b/src/you_get/extractor/letv.py index fe371519..54aa28b2 100644 --- a/src/you_get/extractor/letv.py +++ b/src/you_get/extractor/letv.py @@ -3,18 +3,37 @@ __all__ = ['letv_download'] import json +import random import xml.etree.ElementTree as ET from ..common import * +def get_timestamp(): + tn = random.random() + url = 'http://api.letv.com/time?tn={}'.format(tn) + result = get_content(url) + return json.loads(result)['stime'] + +def get_key(t): + for s in range(0, 8): + e = 1 & t + t >>= 1 + e <<= 31 + t += e + return t ^ 185025305 + def video_info(vid): - x = get_content("http://www.letv.com/v_xml/%s.xml" % vid) - xml_obj = ET.fromstring(x) + tn = get_timestamp() + key = get_key(tn) + url = 'http://api.letv.com/mms/out/video/play?id={}&platid=1&splatid=101&format=1&tkey={}&domain=http%3A%2F%2Fwww.letv.com'.format(vid, key) + r = get_content(url, decoded=False) + xml_obj = ET.fromstring(r) info = json.loads(xml_obj.find("playurl").text) title = info.get('title') urls = info.get('dispatch') - for key in urls.keys(): - url = urls[key][0] + for k in urls.keys(): + url = urls[k][0] break + url += '&termid=1&format=0&hwtype=un&ostype=Windows7&tag=letv&sign=letv&expect=1&pay=0&rateid={}'.format(k) return url, title def letv_download_by_vid(vid, output_dir='.', merge=True, info_only=False): From 86ab2d9a8c1b28084a9ce424e10bedb1eaf34dca Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 17 May 2014 19:28:52 +0200 Subject: [PATCH 124/147] Google+: fixed --- src/you_get/extractor/google.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/you_get/extractor/google.py b/src/you_get/extractor/google.py index 3f8fcca3..c5497daa 100644 --- a/src/you_get/extractor/google.py +++ b/src/you_get/extractor/google.py @@ -43,54 +43,54 @@ fmt_level = dict( def google_download(url, output_dir = '.', merge = True, info_only = False): # Percent-encoding Unicode URL url = parse.quote(url, safe = ':/+%') - + service = url.split('/')[2].split('.')[0] - + if service == 'plus': # Google Plus - + if not re.search(r'plus.google.com/photos/[^/]*/albums/\d+/\d+', url): html = get_html(url) url = "https://plus.google.com/" + r1(r'"(photos/\d+/albums/\d+/\d+)', html) title = r1(r'([^<\n]+)', html) else: title = None - + html = get_html(url) real_urls = re.findall(r'\[(\d+),\d+,\d+,"([^"]+)"\]', html) real_url = unicodize(sorted(real_urls, key = lambda x : fmt_level[x[0]])[0][1]) - + if title is None: post_url = r1(r'"(https://plus.google.com/\d+/posts/[^"]*)"', html) post_html = get_html(post_url) - title = r1(r'<title>([^<\n]+)', post_html) - + title = r1(r'<title[^>]*>([^<\n]+)', post_html) + if title is None: response = request.urlopen(request.Request(real_url)) if response.headers['content-disposition']: filename = parse.unquote(r1(r'filename="?(.+)"?', response.headers['content-disposition'])).split('.') title = ''.join(filename[:-1]) - + type, ext, size = url_info(real_url) if ext is None: ext = 'mp4' - + elif service in ['docs', 'drive'] : # Google Docs - + html = get_html(url) - + title = r1(r'"title":"([^"]*)"', html) or r1(r'<meta itemprop="name" content="([^"]*)"', html) if len(title.split('.')) > 1: title = ".".join(title.split('.')[:-1]) - + docid = r1(r'"docid":"([^"]*)"', html) - + request.install_opener(request.build_opener(request.HTTPCookieProcessor())) - + request.urlopen(request.Request("https://docs.google.com/uc?id=%s&export=download" % docid)) real_url ="https://docs.google.com/uc?export=download&confirm=no_antivirus&id=%s" % docid - + type, ext, size = url_info(real_url) - + print_info(site_info, title, ext, size) if not info_only: download_urls([real_url], title, ext, size, output_dir, merge = merge) From bcf5c0689350878e1bbacdd25d8d9b4c550dcbda Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Sun, 18 May 2014 00:28:57 +0200 Subject: [PATCH 125/147] Acfun: legitimize title --- src/you_get/extractor/acfun.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/you_get/extractor/acfun.py b/src/you_get/extractor/acfun.py index 08fb8617..3c9036d1 100644 --- a/src/you_get/extractor/acfun.py +++ b/src/you_get/extractor/acfun.py @@ -36,6 +36,7 @@ def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only raise NotImplementedError(t) if not info_only: + title = legitimize(title) try: print('Downloading %s ...' % (title + '.cmt.json')) cmt = get_srt_json(danmakuId) From 91f1e8d2dfe15789ea8688c1b3a4aa89c265786f Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Sun, 18 May 2014 01:00:45 +0200 Subject: [PATCH 126/147] version 0.3.28.3 --- CHANGELOG.txt | 8 ++++++++ src/you_get/version.py | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index de718214..f2943685 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,6 +1,14 @@ Changelog ========= +0.3.28.3 +-------- + +*Date: 2014-05-18* + +* New site support: + - CBS.com + 0.3.28.2 -------- diff --git a/src/you_get/version.py b/src/you_get/version.py index 9ea627bd..590f3850 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -2,5 +2,5 @@ __all__ = ['__version__', '__date__'] __name__ = 'you-get' -__version__ = '0.3.28.2' -__date__ = '2014-04-13' +__version__ = '0.3.28.3' +__date__ = '2014-05-18' From 2cbdb199d6a8856f4ea915394c00e12b9ade709d Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Tue, 20 May 2014 03:35:29 +0200 Subject: [PATCH 127/147] Travis: add Python 3.4 --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index c2a812c1..5ac5b86a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,4 +3,5 @@ language: python python: - "3.2" - "3.3" + - "3.4" script: make test From 6dedff45f99e650d2ebd105471a16808f76be2c6 Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Wed, 21 May 2014 00:20:35 +0200 Subject: [PATCH 128/147] Youku: fix #319 --- src/you_get/extractor/youku.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractor/youku.py b/src/you_get/extractor/youku.py index 693d6ca4..35fb7e75 100644 --- a/src/you_get/extractor/youku.py +++ b/src/you_get/extractor/youku.py @@ -18,7 +18,7 @@ def trim_title(title): return title def find_video_id_from_url(url): - patterns = [r'^http://v.youku.com/v_show/id_([\w=]+).html', + patterns = [r'^http://v.youku.com/v_show/id_([\w=]+).htm[l]?', r'^http://player.youku.com/player.php/sid/([\w=]+)/v.swf', r'^loader\.swf\?VideoIDS=([\w=]+)', r'^([\w=]+)$'] From 0bd2cab7e94ff59697661c4a605ed2ffa479b07f Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Wed, 21 May 2014 01:41:58 +0200 Subject: [PATCH 129/147] update README to reflect the merge of #317 --- README.md | 1 + README.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/README.md b/README.md index ee2b5b4f..a59ac572 100644 --- a/README.md +++ b/README.md @@ -174,6 +174,7 @@ For a complete list of all available options, see: -i | --info Display the information of videos without downloading. -u | --url Display the real URLs of videos without downloading. -n | --no-merge Don't merge video parts. + -c | --cookies Load NetScape's cookies.txt file. -o | --output-dir <PATH> Set the output directory for downloaded videos. -p | --player <PLAYER [options]> Directly play the video with PLAYER like vlc/smplayer. -x | --http-proxy <HOST:PORT> Use specific HTTP proxy for downloading. diff --git a/README.txt b/README.txt index bfc5b8e1..434d0c27 100644 --- a/README.txt +++ b/README.txt @@ -182,6 +182,7 @@ For a complete list of all available options, see:: -i | --info Display the information of videos without downloading. -u | --url Display the real URLs of videos without downloading. -n | --no-merge Don't merge video parts. + -c | --cookies Load NetScape's cookies.txt file. -o | --output-dir <PATH> Set the output directory for downloaded videos. -p | --player <PLAYER [options]> Directly play the video with PLAYER like vlc/smplayer. -x | --http-proxy <HOST:PORT> Use specific HTTP proxy for downloading. From b44f83e9453c96a873e2832de06023a833aec454 Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Wed, 21 May 2014 02:39:35 +0200 Subject: [PATCH 130/147] update acfun.py & bilibili.py to reflect the merge of #320 --- src/you_get/extractor/acfun.py | 2 +- src/you_get/extractor/bilibili.py | 24 ++++++++++++------------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/you_get/extractor/acfun.py b/src/you_get/extractor/acfun.py index 3c9036d1..a89466a3 100644 --- a/src/you_get/extractor/acfun.py +++ b/src/you_get/extractor/acfun.py @@ -36,7 +36,7 @@ def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only raise NotImplementedError(t) if not info_only: - title = legitimize(title) + title = get_filename(title) try: print('Downloading %s ...' % (title + '.cmt.json')) cmt = get_srt_json(danmakuId) diff --git a/src/you_get/extractor/bilibili.py b/src/you_get/extractor/bilibili.py index ef2eb77c..d26e72a4 100644 --- a/src/you_get/extractor/bilibili.py +++ b/src/you_get/extractor/bilibili.py @@ -19,7 +19,7 @@ def parse_srt_p(p): assert len(fields) == 8, fields time, mode, font_size, font_color, pub_time, pool, user_id, history = fields time = float(time) - + mode = int(mode) assert 1 <= mode <= 8 # mode 1~3: scrolling @@ -28,17 +28,17 @@ def parse_srt_p(p): # mode 6: reverse? # mode 7: position # mode 8: advanced - + pool = int(pool) assert 0 <= pool <= 2 # pool 0: normal # pool 1: srt # pool 2: special? - + font_size = int(font_size) - + font_color = '#%06x' % int(font_color) - + return pool, mode, font_size, font_color def parse_srt_xml(xml): @@ -56,7 +56,7 @@ def parse_cid_playurl(xml): def bilibili_download_by_cid(id, title, output_dir = '.', merge = True, info_only = False): url = 'http://interface.bilibili.tv/playurl?cid=' + id urls = [i if not re.match(r'.*\.qqvideo\.tc\.qq\.com', i) else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i) for i in parse_cid_playurl(get_html(url, 'utf-8'))] # dirty fix for QQ - + if re.search(r'\.(flv|hlv)\b', urls[0]): type = 'flv' elif re.search(r'/flv/', urls[0]): @@ -65,12 +65,12 @@ def bilibili_download_by_cid(id, title, output_dir = '.', merge = True, info_onl type = 'mp4' else: type = 'flv' - + size = 0 for url in urls: _, _, temp = url_info(url) size += temp - + print_info(site_info, title, type, size) if not info_only: download_urls(urls, title, type, total_size = None, output_dir = output_dir, merge = merge) @@ -78,11 +78,11 @@ def bilibili_download_by_cid(id, title, output_dir = '.', merge = True, info_onl def bilibili_download(url, output_dir = '.', merge = True, info_only = False): assert re.match(r'http://(www.bilibili.tv|bilibili.kankanews.com|bilibili.smgbb.cn)/video/av(\d+)', url) html = get_html(url) - + title = r1(r'<h2[^>]*>([^<>]+)</h2>', html) title = unescape_html(title) title = escape_file_path(title) - + flashvars = r1_of([r'player_params=\'(cid=\d+)', r'flashvars="([^"]+)"', r'"https://[a-z]+\.bilibili\.tv/secure,(cid=\d+)(?:&aid=\d+)?"'], html) assert flashvars t, id = flashvars.split('=', 1) @@ -97,9 +97,9 @@ def bilibili_download(url, output_dir = '.', merge = True, info_only = False): tudou_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only) else: raise NotImplementedError(flashvars) - + if not info_only: - title = legitimize(title) + title = get_filename(title) print('Downloading %s ...' % (title + '.cmt.xml')) xml = get_srt_xml(id) with open(os.path.join(output_dir, title + '.cmt.xml'), 'w', encoding='utf-8') as x: From 6eced06b1165c7bb2801b617284902b51479b184 Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Thu, 22 May 2014 13:56:40 +0200 Subject: [PATCH 131/147] show real URL of RTMP stream --- src/you_get/common.py | 22 ++++++++++++++++++---- src/you_get/extractor/theplatform.py | 6 +++--- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index c53c0821..8c7baaea 100644 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -503,10 +503,10 @@ class DummyProgressBar: def done(self): pass -def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None, merge = True, faker = False): +def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False): assert urls if dry_run: - print('Real URLs:\n', urls, '\n') + print('Real URLs:\n%s\n' % urls) return if player: @@ -590,10 +590,10 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None, print() -def download_urls_chunked(urls, title, ext, total_size, output_dir = '.', refer = None, merge = True, faker = False): +def download_urls_chunked(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False): assert urls if dry_run: - print('Real URLs:\n', urls, '\n') + print('Real URLs:\n%s\n' % urls) return if player: @@ -671,6 +671,20 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir = '.', refer print() +def download_rtmp_url(url, playpath, title, ext, total_size=0, output_dir='.', refer=None, merge=True, faker=False): + assert url + if dry_run: + print('Real URLs:\n%s\n' % [url]) + return + + if player: + launch_player(player, url) + return + + from .processor.rtmpdump import has_rtmpdump_installed, download_rtmpdump_stream + assert has_rtmpdump_installed(), "RTMPDump not installed." + download_rtmpdump_stream(url, playpath, title, ext, output_dir) + def playlist_not_supported(name): def f(*args, **kwargs): raise NotImplementedError('Playlist is not supported for ' + name) diff --git a/src/you_get/extractor/theplatform.py b/src/you_get/extractor/theplatform.py index 61ce9415..d4eb1793 100644 --- a/src/you_get/extractor/theplatform.py +++ b/src/you_get/extractor/theplatform.py @@ -17,9 +17,9 @@ def theplatform_download_by_pid(pid, title, output_dir='.', merge=True, info_onl print_info(site_info, title, type, size) if not info_only: - from ..processor.rtmpdump import has_rtmpdump_installed, download_rtmpdump_stream - assert has_rtmpdump_installed(), "RTMPDump not installed." - download_rtmpdump_stream(url=smil_base, playpath=ext+':'+smil_video, title=title, ext=ext, output_dir=output_dir) + #from ..processor.rtmpdump import has_rtmpdump_installed, download_rtmpdump_stream + #assert has_rtmpdump_installed(), "RTMPDump not installed." + download_rtmp_url(url=smil_base, playpath=ext+':'+smil_video, title=title, ext=ext, output_dir=output_dir) site_info = "thePlatform.com" download = theplatform_download_by_pid From 94b99128a5c3d540f70dd44cddd8614f91208640 Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Thu, 22 May 2014 14:04:22 +0200 Subject: [PATCH 132/147] show real URL of RTMP stream, fix #329 --- src/you_get/common.py | 3 ++- src/you_get/extractor/theplatform.py | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 8c7baaea..5822f2d4 100644 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -674,7 +674,8 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir='.', refer=No def download_rtmp_url(url, playpath, title, ext, total_size=0, output_dir='.', refer=None, merge=True, faker=False): assert url if dry_run: - print('Real URLs:\n%s\n' % [url]) + print('Real URL:\n%s\n' % [url]) + print('Real Playpath:\n%s\n' % [playpath]) return if player: diff --git a/src/you_get/extractor/theplatform.py b/src/you_get/extractor/theplatform.py index d4eb1793..2938c459 100644 --- a/src/you_get/extractor/theplatform.py +++ b/src/you_get/extractor/theplatform.py @@ -17,8 +17,6 @@ def theplatform_download_by_pid(pid, title, output_dir='.', merge=True, info_onl print_info(site_info, title, type, size) if not info_only: - #from ..processor.rtmpdump import has_rtmpdump_installed, download_rtmpdump_stream - #assert has_rtmpdump_installed(), "RTMPDump not installed." download_rtmp_url(url=smil_base, playpath=ext+':'+smil_video, title=title, ext=ext, output_dir=output_dir) site_info = "thePlatform.com" From 6dd119436ba3d2ca57442351eb8708ddf3c575a2 Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Thu, 22 May 2014 14:21:17 +0200 Subject: [PATCH 133/147] add play_rtmpdump_stream() --- src/you_get/common.py | 3 ++- src/you_get/processor/rtmpdump.py | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 5822f2d4..d80f39c5 100644 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -679,7 +679,8 @@ def download_rtmp_url(url, playpath, title, ext, total_size=0, output_dir='.', r return if player: - launch_player(player, url) + from .processor.rtmpdump import play_rtmpdump_stream + play_rtmpdump_stream(player, url, playpath) return from .processor.rtmpdump import has_rtmpdump_installed, download_rtmpdump_stream diff --git a/src/you_get/processor/rtmpdump.py b/src/you_get/processor/rtmpdump.py index c343bb85..6f291979 100644 --- a/src/you_get/processor/rtmpdump.py +++ b/src/you_get/processor/rtmpdump.py @@ -29,3 +29,7 @@ def download_rtmpdump_stream(url, playpath, title, ext, output_dir='.'): subprocess.call(params) return + +def play_rtmpdump_stream(player, url, playpath): + os.system("rtmpdump -r '%s' -y '%s' -o - | %s -" % (url, playpath, player)) + return From 4edcf2349394f743c3b77643a1c1ce03ebea8d39 Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Thu, 22 May 2014 14:29:18 +0200 Subject: [PATCH 134/147] update README --- README.md | 6 ++++-- README.txt | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index a59ac572..bca7e505 100644 --- a/README.md +++ b/README.md @@ -66,8 +66,10 @@ __中文说明__已移至[wiki](https://github.com/soimort/you-get/wiki/%E4%B8%A ## Dependencies * [Python 3](http://www.python.org/download/releases/) -* __(Optional)__ [FFmpeg](http://ffmpeg.org) - * Used for converting and joining video files. +* __(Optional)__ [FFmpeg](http://ffmpeg.org) / [Libav](http://libav.org/) + * For converting and joining video files. +* __(Optional)__ [RTMPDump](http://rtmpdump.mplayerhq.hu/) + * For processing RTMP streams. ## Installation diff --git a/README.txt b/README.txt index 434d0c27..5e0152d2 100644 --- a/README.txt +++ b/README.txt @@ -70,8 +70,10 @@ Dependencies ------------ * `Python 3 <http://www.python.org/download/releases/>`_ -* (Optional) `FFmpeg <http://ffmpeg.org>`_ - * Used for converting and joining video files. +* (Optional) `FFmpeg <http://ffmpeg.org>`_ / `Libav <http://libav.org/>`_ + * For converting and joining video files. +* (Optional) `RTMPDump <http://rtmpdump.mplayerhq.hu/>`_ + * For processing RTMP streams. Installation ------------ From bac92409e72cec5b90c0e852598b6ee3936c7f35 Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Thu, 29 May 2014 01:54:58 +0200 Subject: [PATCH 135/147] QQ: fix #310, using AcFun API --- src/you_get/extractor/qq.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/src/you_get/extractor/qq.py b/src/you_get/extractor/qq.py index 3ca87a58..b91f0f95 100644 --- a/src/you_get/extractor/qq.py +++ b/src/you_get/extractor/qq.py @@ -4,14 +4,27 @@ __all__ = ['qq_download'] from ..common import * -def qq_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): - url = 'http://vsrc.store.qq.com/%s.flv' % id +def qq_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False): + xml = get_html('http://www.acfun.com/getinfo?vids=%s' % id) + from xml.dom.minidom import parseString + doc = parseString(xml) + doc_root = doc.getElementsByTagName('root')[0] + doc_vl = doc_root.getElementsByTagName('vl')[0] + doc_vi = doc_vl.getElementsByTagName('vi')[0] + fn = doc_vi.getElementsByTagName('fn')[0].firstChild.data + fclip = doc_vi.getElementsByTagName('fclip')[0].firstChild.data + if int(fclip) > 0: + fn = fn[:-4] + "." + fclip + fn[-4:] + fvkey = doc_vi.getElementsByTagName('fvkey')[0].firstChild.data + doc_ul = doc_vi.getElementsByTagName('ul') + url = doc_ul[0].getElementsByTagName('url')[0].firstChild.data + url = url + fn + '?vkey=' + fvkey - _, _, size = url_info(url) + _, ext, size = url_info(url) - print_info(site_info, title, 'flv', size) + print_info(site_info, title, ext, size) if not info_only: - download_urls([url], title, 'flv', size, output_dir = output_dir, merge = merge) + download_urls([url], title, ext, size, output_dir=output_dir, merge=merge) def qq_download(url, output_dir = '.', merge = True, info_only = False): if re.match(r'http://v.qq.com/([^\?]+)\?vid', url): From 822c8394a532e4e699f9c8647caeeeebb45b77e8 Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Thu, 29 May 2014 02:27:38 +0200 Subject: [PATCH 136/147] iQIYI: raise NotImplementedError --- src/you_get/extractor/acfun.py | 2 +- src/you_get/extractor/iqiyi.py | 21 ++++++++++++--------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/you_get/extractor/acfun.py b/src/you_get/extractor/acfun.py index a89466a3..1e0d2a43 100644 --- a/src/you_get/extractor/acfun.py +++ b/src/you_get/extractor/acfun.py @@ -33,7 +33,7 @@ def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only elif sourceType == 'qq': qq_download_by_id(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) else: - raise NotImplementedError(t) + raise NotImplementedError(sourceType) if not info_only: title = get_filename(title) diff --git a/src/you_get/extractor/iqiyi.py b/src/you_get/extractor/iqiyi.py index 5c951d1d..0bfec350 100644 --- a/src/you_get/extractor/iqiyi.py +++ b/src/you_get/extractor/iqiyi.py @@ -6,20 +6,23 @@ from ..common import * def iqiyi_download(url, output_dir = '.', merge = True, info_only = False): html = get_html(url) - - videoId = r1(r'data-player-videoid="([^"]+)"', html) - assert videoId - - info_url = 'http://cache.video.qiyi.com/v/%s' % videoId - info_xml = get_html(info_url) - + + tvid = r1(r'data-player-tvid="([^"]+)"', html) + videoid = r1(r'data-player-videoid="([^"]+)"', html) + assert tvid + assert videoid + + info_url = 'http://cache.video.qiyi.com/vj/%s/%s/' % (tvid, videoid) + info = get_html(info_url) + raise NotImplementedError('iqiyi') + from xml.dom.minidom import parseString doc = parseString(info_xml) title = doc.getElementsByTagName('title')[0].firstChild.nodeValue size = int(doc.getElementsByTagName('totalBytes')[0].firstChild.nodeValue) urls = [n.firstChild.nodeValue for n in doc.getElementsByTagName('file')] assert urls[0].endswith('.f4v'), urls[0] - + for i in range(len(urls)): temp_url = "http://data.video.qiyi.com/%s" % urls[i].split("/")[-1].split(".")[0] + ".ts" try: @@ -28,7 +31,7 @@ def iqiyi_download(url, output_dir = '.', merge = True, info_only = False): key = r1(r'key=(.*)', e.geturl()) assert key urls[i] += "?key=%s" % key - + print_info(site_info, title, 'flv', size) if not info_only: download_urls(urls, title, 'flv', size, output_dir = output_dir, merge = merge) From f10123b9e93d920ac362f9d5daac359076cbf844 Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Thu, 29 May 2014 02:42:57 +0200 Subject: [PATCH 137/147] Acfun & Bilibili: 'Downloading %s ...\n' --- src/you_get/extractor/acfun.py | 4 ++-- src/you_get/extractor/bilibili.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/you_get/extractor/acfun.py b/src/you_get/extractor/acfun.py index 1e0d2a43..31b4cd06 100644 --- a/src/you_get/extractor/acfun.py +++ b/src/you_get/extractor/acfun.py @@ -38,11 +38,11 @@ def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only if not info_only: title = get_filename(title) try: - print('Downloading %s ...' % (title + '.cmt.json')) + print('Downloading %s ...\n' % (title + '.cmt.json')) cmt = get_srt_json(danmakuId) with open(os.path.join(output_dir, title + '.cmt.json'), 'w') as x: x.write(cmt) - print('Downloading %s ...' % (title + '.cmt_lock.json')) + print('Downloading %s ...\n' % (title + '.cmt_lock.json')) cmt = get_srt_lock_json(danmakuId) with open(os.path.join(output_dir, title + '.cmt_lock.json'), 'w') as x: x.write(cmt) diff --git a/src/you_get/extractor/bilibili.py b/src/you_get/extractor/bilibili.py index d26e72a4..254803ca 100644 --- a/src/you_get/extractor/bilibili.py +++ b/src/you_get/extractor/bilibili.py @@ -100,7 +100,7 @@ def bilibili_download(url, output_dir = '.', merge = True, info_only = False): if not info_only: title = get_filename(title) - print('Downloading %s ...' % (title + '.cmt.xml')) + print('Downloading %s ...\n' % (title + '.cmt.xml')) xml = get_srt_xml(id) with open(os.path.join(output_dir, title + '.cmt.xml'), 'w', encoding='utf-8') as x: x.write(xml) From b70ac59a6a66cb2124e78fe58f0c0655ef33d293 Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Thu, 29 May 2014 03:21:12 +0200 Subject: [PATCH 138/147] version 0.3.29 --- CHANGELOG.txt | 7 +++++++ src/you_get/version.py | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index f2943685..95b21973 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,6 +1,13 @@ Changelog ========= +0.3.29 +------ + +*Date: 2014-05-29* + +* Bug fix release + 0.3.28.3 -------- diff --git a/src/you_get/version.py b/src/you_get/version.py index 590f3850..c5b74ca0 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -2,5 +2,5 @@ __all__ = ['__version__', '__date__'] __name__ = 'you-get' -__version__ = '0.3.28.3' -__date__ = '2014-05-18' +__version__ = '0.3.29' +__date__ = '2014-05-29' From c058e140597c02cb481e1f36306cd1a6a17ef8cc Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Tue, 10 Jun 2014 02:34:16 +0200 Subject: [PATCH 139/147] TED: fixed --- src/you_get/extractor/ted.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/you_get/extractor/ted.py b/src/you_get/extractor/ted.py index 23a7054c..0c2d2c83 100644 --- a/src/you_get/extractor/ted.py +++ b/src/you_get/extractor/ted.py @@ -3,16 +3,16 @@ __all__ = ['ted_download'] from ..common import * +import json -def ted_download(url, output_dir = '.', merge = True, info_only = False): - page = get_html(url).split("\n") - for line in page: - if line.find("<title>") > -1: - title = line.replace("<title>", "").replace("", "").replace("\t", "") - title = title[:title.find(' | ')] - if line.find("no-flash-video-download") > -1: - url = line.replace(' Date: Wed, 18 Jun 2014 00:59:05 +0200 Subject: [PATCH 140/147] Vine: add video id into title --- src/you_get/extractor/instagram.py | 8 ++++---- src/you_get/extractor/vine.py | 5 +++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/you_get/extractor/instagram.py b/src/you_get/extractor/instagram.py index 6071dfd0..0605a6c3 100644 --- a/src/you_get/extractor/instagram.py +++ b/src/you_get/extractor/instagram.py @@ -6,13 +6,13 @@ from ..common import * def instagram_download(url, output_dir = '.', merge = True, info_only = False): html = get_html(url) - - id = r1(r'instagram.com/p/([^/]+)/', html) + + vid = r1(r'instagram.com/p/([^/]+)/', html) description = r1(r' Date: Wed, 18 Jun 2014 01:14:11 +0200 Subject: [PATCH 141/147] Bilibili: fix #341 --- README.md | 2 +- README.txt | 2 +- src/you_get/extractor/bilibili.py | 9 ++++----- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index bca7e505..0509b98d 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ __中文说明__已移至[wiki](https://github.com/soimort/you-get/wiki/%E4%B8%A * Tudou (土豆) * YinYueTai (音悦台) * AcFun -* bilibili +* bilibili * CNTV (中国网络电视台) * Douban (豆瓣) * ifeng (凤凰视频) diff --git a/README.txt b/README.txt index 5e0152d2..b2195ae2 100644 --- a/README.txt +++ b/README.txt @@ -43,7 +43,7 @@ Supported Sites (As of Now) * Tudou (土豆) http://www.tudou.com * YinYueTai (音悦台) http://www.yinyuetai.com * AcFun http://www.acfun.com -* bilibili http://www.bilibili.tv +* bilibili http://www.bilibili.com * CNTV (中国网络电视台) http://www.cntv.cn * Douban (豆瓣) http://douban.com * ifeng (凤凰视频) http://v.ifeng.com diff --git a/src/you_get/extractor/bilibili.py b/src/you_get/extractor/bilibili.py index 254803ca..b10ae7da 100644 --- a/src/you_get/extractor/bilibili.py +++ b/src/you_get/extractor/bilibili.py @@ -11,7 +11,7 @@ from .youku import youku_download_by_id import re def get_srt_xml(id): - url = 'http://comment.bilibili.tv/%s.xml' % id + url = 'http://comment.bilibili.com/%s.xml' % id return get_html(url) def parse_srt_p(p): @@ -54,7 +54,7 @@ def parse_cid_playurl(xml): return urls def bilibili_download_by_cid(id, title, output_dir = '.', merge = True, info_only = False): - url = 'http://interface.bilibili.tv/playurl?cid=' + id + url = 'http://interface.bilibili.com/playurl?cid=' + id urls = [i if not re.match(r'.*\.qqvideo\.tc\.qq\.com', i) else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i) for i in parse_cid_playurl(get_html(url, 'utf-8'))] # dirty fix for QQ if re.search(r'\.(flv|hlv)\b', urls[0]): @@ -76,14 +76,13 @@ def bilibili_download_by_cid(id, title, output_dir = '.', merge = True, info_onl download_urls(urls, title, type, total_size = None, output_dir = output_dir, merge = merge) def bilibili_download(url, output_dir = '.', merge = True, info_only = False): - assert re.match(r'http://(www.bilibili.tv|bilibili.kankanews.com|bilibili.smgbb.cn)/video/av(\d+)', url) html = get_html(url) title = r1(r']*>([^<>]+)

    ', html) title = unescape_html(title) title = escape_file_path(title) - flashvars = r1_of([r'player_params=\'(cid=\d+)', r'flashvars="([^"]+)"', r'"https://[a-z]+\.bilibili\.tv/secure,(cid=\d+)(?:&aid=\d+)?"'], html) + flashvars = r1_of([r'player_params=\'(cid=\d+)', r'flashvars="([^"]+)"', r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html) assert flashvars t, id = flashvars.split('=', 1) id = id.split('&')[0] @@ -105,6 +104,6 @@ def bilibili_download(url, output_dir = '.', merge = True, info_only = False): with open(os.path.join(output_dir, title + '.cmt.xml'), 'w', encoding='utf-8') as x: x.write(xml) -site_info = "bilibili.tv" +site_info = "bilibili.com" download = bilibili_download download_playlist = playlist_not_supported('bilibili') From e4135ba217bdd6d17496db044459a1c425e41135 Mon Sep 17 00:00:00 2001 From: np1 Date: Thu, 19 Jun 2014 23:40:33 +0100 Subject: [PATCH 142/147] Allow for dollar symbol in js function name --- src/you_get/extractor/youtube.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/you_get/extractor/youtube.py b/src/you_get/extractor/youtube.py index c7ef7fc5..cdf3e512 100644 --- a/src/you_get/extractor/youtube.py +++ b/src/you_get/extractor/youtube.py @@ -35,7 +35,7 @@ yt_codecs = [ def decipher(js, s): def tr_js(code): code = re.sub(r'function', r'def', code) - code = re.sub(r'\$', '_', code) + code = re.sub(r'\$', '_dollar', code) code = re.sub(r'\{', r':\n\t', code) code = re.sub(r'\}', r'\n', code) code = re.sub(r'var\s+', r'', code) @@ -46,17 +46,17 @@ def decipher(js, s): code = re.sub(r'(\w+).split\(""\)', r'list(\1)', code) return code - f1 = match1(js, r'\w+\.sig\|\|(\w+)\(\w+\.\w+\)') - f1def = match1(js, r'(function %s\(\w+\)\{[^\{]+\})' % f1) + f1 = match1(js, r'\w+\.sig\|\|([$\w]+)\(\w+\.\w+\)') + f1def = match1(js, r'(function %s\(\w+\)\{[^\{]+\})' % re.escape(f1)) code = tr_js(f1def) f2 = match1(f1def, r'([$\w]+)\(\w+,\d+\)') if f2 is not None: f2e = re.escape(f2) f2def = match1(js, r'(function %s\(\w+,\w+\)\{[^\{]+\})' % f2e) - f2 = re.sub(r'\$', r'_', f2) + f2 = re.sub(r'\$', '_dollar', f2) code = code + 'global %s\n' % f2 + tr_js(f2def) - code = code + 'sig=%s(s)' % f1 + code = code + 'sig=%s(s)' % re.sub(r'\$', '_dollar', f1) exec(code, globals(), locals()) return locals()['sig'] From be4799fc8c2cc0499a1403f65ac7a743423218f4 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 22 Jun 2014 13:44:51 +0200 Subject: [PATCH 143/147] Baidu Music: fix #342 for overseas IP --- src/you_get/extractor/baidu.py | 58 ++++++++++++++-------------------- 1 file changed, 23 insertions(+), 35 deletions(-) diff --git a/src/you_get/extractor/baidu.py b/src/you_get/extractor/baidu.py index b93b0333..c671fa74 100755 --- a/src/you_get/extractor/baidu.py +++ b/src/you_get/extractor/baidu.py @@ -12,29 +12,11 @@ def baidu_get_song_data(sid): data = json.loads(get_html('http://music.baidu.com/data/music/fmlink?songIds=%s' % sid, faker = True))['data'] if data['xcode'] != '': - # inside china mainland + # inside china mainland return data['songList'][0] else: - # outside china mainland - html = get_html("http://music.baidu.com/song/%s" % sid) - - # baidu pan link - sourceLink = r1(r'"link-src-info"> Date: Tue, 24 Jun 2014 02:30:11 +0200 Subject: [PATCH 144/147] update .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 0a5d13ab..1d987ed9 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ _*/ +*.bak *.download *.cmt.* *.3gp From 72be4176f9abcc95ff61f2780d0d6dc0055f0027 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 24 Jun 2014 02:31:49 +0200 Subject: [PATCH 145/147] bump version number: 0.3.30dev --- src/you_get/version.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/version.py b/src/you_get/version.py index c5b74ca0..b60d6e85 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -2,5 +2,5 @@ __all__ = ['__version__', '__date__'] __name__ = 'you-get' -__version__ = '0.3.29' -__date__ = '2014-05-29' +__version__ = '0.3.30dev' +__date__ = '2014-06-24' From 1411c8986e32d5d555d5af9e5727e0d34a5b8b1a Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 24 Jun 2014 03:59:47 +0200 Subject: [PATCH 146/147] Youku: fix #331, refactoring --- src/you_get/common.py | 121 +++++++++++++- src/you_get/extractor/acfun.py | 4 +- src/you_get/extractor/bilibili.py | 4 +- src/you_get/extractor/miomio.py | 8 +- src/you_get/extractor/tudou.py | 22 +-- src/you_get/extractor/youku.py | 260 +++++++----------------------- 6 files changed, 194 insertions(+), 225 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index d80f39c5..dfccd436 100644 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -16,6 +16,7 @@ from .util import log, sogou_proxy_server, get_filename, unescape_html dry_run = False force = False player = None +extractor_proxy = None sogou_proxy = None sogou_env = None cookies_txt = None @@ -824,14 +825,15 @@ def script_main(script_name, download, download_playlist = None): -o | --output-dir Set the output directory for downloaded videos. -p | --player Directly play the video with PLAYER like vlc/smplayer. -x | --http-proxy Use specific HTTP proxy for downloading. + -y | --extractor-proxy Use specific HTTP proxy for extracting stream data. --no-proxy Don't use any proxy. (ignore $http_proxy) -S | --sogou Use a Sogou proxy server for downloading. --sogou-proxy Run a standalone Sogou proxy server. --debug Show traceback on KeyboardInterrupt. ''' - short_opts = 'Vhfiuc:nSo:p:x:' - opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-merge', 'no-proxy', 'debug', 'sogou', 'output-dir=', 'player=', 'http-proxy=', 'sogou-proxy=', 'sogou-env='] + short_opts = 'Vhfiuc:nSo:p:x:y:' + opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-merge', 'no-proxy', 'debug', 'sogou', 'output-dir=', 'player=', 'http-proxy=', 'extractor-proxy=', 'sogou-proxy=', 'sogou-env='] if download_playlist: short_opts = 'l' + short_opts opts = ['playlist'] + opts @@ -846,6 +848,7 @@ def script_main(script_name, download, download_playlist = None): global force global dry_run global player + global extractor_proxy global sogou_proxy global sogou_env global cookies_txt @@ -856,6 +859,7 @@ def script_main(script_name, download, download_playlist = None): merge = True output_dir = '.' proxy = None + extractor_proxy = None traceback = False for o, a in opts: if o in ('-V', '--version'): @@ -889,6 +893,8 @@ def script_main(script_name, download, download_playlist = None): player = a elif o in ('-x', '--http-proxy'): proxy = a + elif o in ('-y', '--extractor-proxy'): + extractor_proxy = a elif o in ('-S', '--sogou'): sogou_proxy = ("0.0.0.0", 0) elif o in ('--sogou-proxy',): @@ -924,3 +930,114 @@ def script_main(script_name, download, download_playlist = None): raise else: sys.exit(1) + + + +class VideoExtractor(): + def __init__(self, *args): + self.url = None + self.title = None + self.vid = None + self.streams = {} + self.streams_sorted = [] + + if args: + self.url = args[0] + + def download_by_url(self, url, **kwargs): + self.url = url + + self.prepare(**kwargs) + + self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams] + + global extractor_proxy + if extractor_proxy: + set_proxy(parse_host(extractor_proxy)) + self.extract(**kwargs) + if extractor_proxy: + unset_proxy() + + self.download(**kwargs) + + def download_by_vid(self, vid, **kwargs): + self.vid = vid + + self.prepare(**kwargs) + + self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams] + + global extractor_proxy + if extractor_proxy: + set_proxy(parse_host(extractor_proxy)) + self.extract(**kwargs) + if extractor_proxy: + unset_proxy() + + self.download(**kwargs) + + def prepare(self, **kwargs): + pass + #raise NotImplementedError() + + def extract(self, **kwargs): + pass + #raise NotImplementedError() + + def p_stream(self, stream_id): + stream = self.streams[stream_id] + print(" - id: \033[7m%s\033[0m" % stream_id) + print(" container: %s" % stream['container']) + print(" video-profile: %s" % stream['video_profile']) + print(" size: %s MiB (%s bytes)" % (round(stream['size'] / 1048576, 1), stream['size'])) + #print(" # download-with: \033[4myou-get --stream=%s\033[0m" % stream_id) + print() + + def p(self, stream_id=None): + print("site: %s" % self.__class__.name) + print("title: %s" % self.title) + if stream_id: + # Print the stream + print("stream:") + self.p_stream(stream_id) + + elif stream_id is None: + # Print stream with best quality + print("stream: # Best quality") + stream_id = self.streams_sorted[0]['id'] + self.p_stream(stream_id) + + elif stream_id == []: + # Print all available streams + print("streams: # Available quality and codecs") + for stream in self.streams_sorted: + self.p_stream(stream['id']) + + def download(self, **kwargs): + if 'info_only' in kwargs and kwargs['info_only']: + if 'stream_id' in kwargs and kwargs['stream_id']: + # Display the stream + stream_id = kwargs['stream_id'] + self.p(stream_id) + else: + # Display all available streams + self.p([]) + else: + if 'stream_id' in kwargs and kwargs['stream_id']: + # Download the stream + stream_id = kwargs['stream_id'] + else: + # Download stream with the best quality + stream_id = self.streams_sorted[0]['id'] + + self.p(None) + + urls = self.streams[stream_id]['src'] + if not urls: + log.e('[Failed] Cannot extract video source.') + log.e('This is most likely because the video has not been made available in your country.') + log.e('You may try to use a proxy via \'-y\' for extracting stream data.') + exit(1) + download_urls(urls, self.title, self.streams[stream_id]['container'], self.streams[stream_id]['size'], output_dir=kwargs['output_dir'], merge=kwargs['merge']) + + self.__init__() diff --git a/src/you_get/extractor/acfun.py b/src/you_get/extractor/acfun.py index 31b4cd06..00a2d21b 100644 --- a/src/you_get/extractor/acfun.py +++ b/src/you_get/extractor/acfun.py @@ -7,7 +7,7 @@ from ..common import * from .qq import qq_download_by_id from .sina import sina_download_by_vid from .tudou import tudou_download_by_iid -from .youku import youku_download_by_id +from .youku import youku_download_by_vid import json, re @@ -27,7 +27,7 @@ def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only if sourceType == 'sina': sina_download_by_vid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) elif sourceType == 'youku': - youku_download_by_id(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) + youku_download_by_vid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) elif sourceType == 'tudou': tudou_download_by_iid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) elif sourceType == 'qq': diff --git a/src/you_get/extractor/bilibili.py b/src/you_get/extractor/bilibili.py index b10ae7da..934afdd6 100644 --- a/src/you_get/extractor/bilibili.py +++ b/src/you_get/extractor/bilibili.py @@ -6,7 +6,7 @@ from ..common import * from .sina import sina_download_by_vid from .tudou import tudou_download_by_id -from .youku import youku_download_by_id +from .youku import youku_download_by_vid import re @@ -91,7 +91,7 @@ def bilibili_download(url, output_dir = '.', merge = True, info_only = False): elif t == 'vid': sina_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only) elif t == 'ykid': - youku_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only) + youku_download_by_vid(id, title, output_dir = output_dir, merge = merge, info_only = info_only) elif t == 'uid': tudou_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only) else: diff --git a/src/you_get/extractor/miomio.py b/src/you_get/extractor/miomio.py index bac1f64c..4c23c929 100644 --- a/src/you_get/extractor/miomio.py +++ b/src/you_get/extractor/miomio.py @@ -6,18 +6,18 @@ from ..common import * from .sina import sina_download_by_vid from .tudou import tudou_download_by_id -from .youku import youku_download_by_id +from .youku import youku_download_by_vid def miomio_download(url, output_dir = '.', merge = True, info_only = False): html = get_html(url) - + title = r1(r'[^<]', r'([^<>]*)'], page) - else: - title = r1_of([r'
    [^<]', r'([^-]+)—在线播放.*', r'([^<>]*)', page) - if subtitle: - subtitle = subtitle.group(1).strip() - if subtitle == title: - subtitle = None - if subtitle: - title += '-' + subtitle - return title - -def parse_playlist_title(url, page): - if re.search(r'v_playlist', url): - # if we are playing a video from play list, the meta title might be incorrect - title = re.search(r'([^<>]*)', page).group(1) - else: - title = re.search(r'> 16 - c = source.pop(index) - mixed += c - - ids = info['data'][0]['streamfileids'][stream_type].split('*')[:-1] - vid = ''.join(mixed[int(i)] for i in ids) - - sid = '%s%s%s' % (int(time() * 1000), randint(1000, 1999), randint(1000, 9999)) - - urls = [] - for s in segs[stream_type]: - no = '%02x' % int(s['no']) - url = 'http://f.youku.com/player/getFlvPath/sid/%s_%s/st/%s/fileid/%s%s%s?K=%s&ts=%s' % (sid, no, file_type, vid[:8], no.upper(), vid[10:], s['k'], s['seconds']) - urls.append((url, int(s['size']))) - return urls + return None -def file_type_of_url(url): - return str(re.search(r'/st/([^/]+)/', url).group(1)) + def parse_m3u8(m3u8): + return re.findall(r'(http://[^?]+)\?ts_start=0', m3u8) -def youku_download_by_id(id, title, output_dir = '.', stream_type = None, merge = True, info_only = False): - # Open Sogou proxy if required - if get_sogou_proxy() is not None: - server = sogou_proxy_server(get_sogou_proxy(), ostream=open(os.devnull, 'w')) - server_thread = threading.Thread(target=server.serve_forever) - server_thread.daemon = True - server_thread.start() - set_proxy(server.server_address) - - info = get_info(id) - - # Close Sogou proxy if required - if get_sogou_proxy() is not None: - server.shutdown() - unset_proxy() - - urls, sizes = zip(*find_video(info, stream_type)) - ext = file_type_of_url(urls[0]) - total_size = sum(sizes) - - print_info(site_info, title, ext, total_size) - if not info_only: - download_urls(urls, title, ext, total_size, output_dir, merge = merge) + def prepare(self, **kwargs): + assert self.url or self.vid + if self.url and not self.vid: + self.vid = __class__.get_vid_from_url(self.url) -def parse_playlist_videos(html): - return re.findall(r'id="A_(\w+)"', html) + meta = json.loads(get_html('http://v.youku.com/player/getPlayList/VideoIDS/%s' % self.vid)) + metadata0 = meta['data'][0] -def parse_playlist_pages(html): - m = re.search(r'
      .*?
    ', html, flags = re.S) - if m: - urls = re.findall(r'href="([^"]+)"', m.group()) - x1, x2, x3 = re.match(r'^(.*page_)(\d+)(_.*)$', urls[-1]).groups() - return ['http://v.youku.com%s%s%s?__rt=1&__ro=listShow' % (x1, i, x3) for i in range(2, int(x2) + 1)] - else: - return [] + self.title = metadata0['title'] -def parse_playlist(url): - html = get_html(url) - video_id = re.search(r"var\s+videoId\s*=\s*'(\d+)'", html).group(1) - show_id = re.search(r'var\s+showid\s*=\s*"(\d+)"', html).group(1) - list_url = 'http://v.youku.com/v_vpofficiallist/page_1_showid_%s_id_%s.html?__rt=1&__ro=listShow' % (show_id, video_id) - html = get_html(list_url) - ids = parse_playlist_videos(html) - for url in parse_playlist_pages(html): - ids.extend(parse_playlist_videos(get_html(url))) - return ids + for stream_type in self.stream_types: + if stream_type['id'] in metadata0['streamsizes']: + stream_id = stream_type['id'] + stream_size = int(metadata0['streamsizes'][stream_id]) + self.streams[stream_id] = {'container': stream_type['container'], 'video_profile': stream_type['video_profile'], 'size': stream_size} -def parse_vplaylist(url): - id = r1_of([r'^http://www.youku.com/playlist_show/id_(\d+)(?:_ascending_\d_mode_pic(?:_page_\d+)?)?.html', - r'^http://v.youku.com/v_playlist/f(\d+)o[01]p\d+.html', - r'^http://u.youku.com/user_playlist/pid_(\d+)_id_[\w=]+(?:_page_\d+)?.html'], - url) - assert id, 'not valid vplaylist url: ' + url - url = 'http://www.youku.com/playlist_show/id_%s.html' % id - n = int(re.search(r'(\d+)', get_html(url)).group(1)) - return ['http://v.youku.com/v_playlist/f%so0p%s.html' % (id, i) for i in range(n)] + def extract(self, **kwargs): + if 'stream_id' in kwargs and kwargs['stream_id']: + # Extract the stream + stream_id = kwargs['stream_id'] + else: + # Extract stream with the best quality + stream_id = self.streams_sorted[0]['id'] -def youku_download_playlist(url, output_dir='.', merge=True, info_only=False): - """Downloads a Youku playlist. - """ - - if re.match(r'http://www.youku.com/playlist_show/id_\d+(?:_ascending_\d_mode_pic(?:_page_\d+)?)?.html', url): - ids = parse_vplaylist(url) - elif re.match(r'http://v.youku.com/v_playlist/f\d+o[01]p\d+.html', url): - ids = parse_vplaylist(url) - elif re.match(r'http://u.youku.com/user_playlist/pid_(\d+)_id_[\w=]+(?:_page_\d+)?.html', url): - ids = parse_vplaylist(url) - elif re.match(r'http://www.youku.com/show_page/id_\w+.html', url): - url = find_video_id_from_show_page(url) - assert re.match(r'http://v.youku.com/v_show/id_([\w=]+).html', url), 'URL not supported as playlist' - ids = parse_playlist(url) - else: - ids = [] - assert ids != [] - - title = parse_playlist_title(url, get_html(url)) - title = filenameable(title) - output_dir = os.path.join(output_dir, title) - - for i, id in enumerate(ids): - print('Processing %s of %s videos...' % (i + 1, len(ids))) - try: - id, title = parse_page(youku_url(id)) - youku_download_by_id(id, title, output_dir=output_dir, merge=merge, info_only=info_only) - except: - continue + m3u8_url = "http://v.youku.com/player/getM3U8/vid/{vid}/type/{stream_id}/video.m3u8".format(vid=self.vid, stream_id=stream_id) + m3u8 = get_html(m3u8_url) + if not m3u8: + log.w('[Warning] This video can only be streamed within Mainland China!') + log.w('Use \'-y\' to specify a proxy server for extracting stream data.\n') -def youku_download(url, output_dir='.', merge=True, info_only=False): - """Downloads Youku videos by URL. - """ - - try: - youku_download_playlist(url, output_dir=output_dir, merge=merge, info_only=info_only) - except: - id, title = parse_page(url) - youku_download_by_id(id, title=title, output_dir=output_dir, merge=merge, info_only=info_only) + self.streams[stream_id]['src'] = __class__.parse_m3u8(m3u8) -site_info = "Youku.com" -download = youku_download -download_playlist = youku_download_playlist +site = Youku() +download = site.download_by_url +download_playlist = playlist_not_supported('youku') + +youku_download_by_vid = site.download_by_vid +# Used by: acfun.py bilibili.py miomio.py tudou.py From 5dbeb09daacd4472f99f88c18f8c4103729dfd7a Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 24 Jun 2014 12:49:04 +0200 Subject: [PATCH 147/147] cleanup after download or info_only --- src/you_get/common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index dfccd436..d4a9d562 100644 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -1022,6 +1022,7 @@ class VideoExtractor(): else: # Display all available streams self.p([]) + else: if 'stream_id' in kwargs and kwargs['stream_id']: # Download the stream @@ -1040,4 +1041,4 @@ class VideoExtractor(): exit(1) download_urls(urls, self.title, self.streams[stream_id]['container'], self.streams[stream_id]['size'], output_dir=kwargs['output_dir'], merge=kwargs['merge']) - self.__init__() + self.__init__()