Merge pull request #11 from soimort/master

update
2025-02-03 00:33:58 +03:00 · 2013-08-28 05:38:34 -07:00 · 2013-08-28 05:38:34 -07:00 · 6ec99038e0
commit 6ec99038e0
parent 52665f342c 6f77174f34
37 changed files with 708 additions and 287 deletions
--- a/.gitignore
+++ b/.gitignore
@ -11,6 +11,7 @@ _*/
 *.3gp
 *.asf
 *.flv
 *.lrc
 *.mkv
 *.mp3
 *.mp4
--- a/CHANGELOG.txt
+++ b/CHANGELOG.txt
@ -1,6 +1,93 @@
 Changelog
 =========
 0.3.21
 ------
 *Date: 2013-08-17*
 * Fix issues for:
    - YouTube
    - YinYueTai
    - pan.baidu.com
 0.3.20
 ------
 *Date: 2013-08-16*
 * Add support for:
    - eHow
    - Khan Academy
    - TED
    - 5sing
 * Fix issues for:
    - Tudou
 0.3.18
 ------
 *Date: 2013-07-19*
 * Fix issues for:
    - Dailymotion
    - Youku
    - Sina
    - AcFun
    - bilibili
 0.3.17
 ------
 *Date: 2013-07-12*
 * Fix issues for:
    - YouTube
    - 163
    - bilibili
 * Code cleanup.
 0.3.16
 ------
 *Date: 2013-06-28*
 * Fix issues for:
    - YouTube
    - Sohu
    - Google+ (enable HTTPS proxy)
 0.3.15
 ------
 *Date: 2013-06-21*
 * Add support for:
    - Instagram
 0.3.14
 ------
 *Date: 2013-06-14*
 * Add support for:
    - Alive.in.th
 * Remove support of:
    - JPopsuki
 * Fix issues for:
    - AcFun
    - iQIYI
 0.3.13
 ------
 *Date: 2013-06-07*
 * Add support for:
    - Baidu Wangpan (video only)
 * Fix issue for:
    - Google+
 0.3.12
 ------
@ -86,7 +173,7 @@ Changelog
 * Add support for:
    - Douban
    - MioMio
-* Fix issue for:
+* Fix issues for:
    - Tudou
    - Vimeo
--- a/README.md
+++ b/README.md
@ -17,15 +17,18 @@ Fork me on GitHub: <https://github.com/soimort/you-get>
 * Coursera <https://www.coursera.org>
 * Blip <http://blip.tv>
 * Dailymotion <http://dailymotion.com>
 * eHow <http://www.ehow.com>
 * Facebook <http://facebook.com>
 * Google+ <http://plus.google.com>
 * Google Drive <http://docs.google.com>
 * Khan Academy <http://www.khanacademy.org>
 * TED <http://www.ted.com>
 * Tumblr <http://www.tumblr.com>
 * Vine <http://vine.co>
 * Instagram <http://instagram.com>
 * SoundCloud <http://soundcloud.com>
 * Mixcloud <http://www.mixcloud.com>
 * Freesound <http://www.freesound.org>
 * JPopsuki <http://jpopsuki.tv>
 * VID48 <http://vid48.com>
 * Niconico (ニコニコ動画) <http://www.nicovideo.jp>
 * Youku (优酷) <http://www.youku.com>
@ -47,8 +50,11 @@ Fork me on GitHub: <https://github.com/soimort/you-get>
 * Sohu (搜狐视频) <http://tv.sohu.com>
 * 56 (56网) <http://www.56.com>
 * Xiami (虾米) <http://www.xiami.com>
-* Baidu (百度音乐) <http://music.baidu.com>
+* 5sing <http://www.5sing.com>
 * Baidu Music (百度音乐) <http://music.baidu.com>
 * Baidu Wangpan (百度网盘) <http://pan.baidu.com>
 * SongTaste <http://www.songtaste.com>
 * Alive.in.th <http://alive.in.th>
 ## Dependencies
@ -233,15 +239,18 @@ You-Get基于优酷下载脚本[iambus/youku-lixian](https://github.com/iambus/y
 * Coursera <https://www.coursera.org>
 * Blip <http://blip.tv>
 * Dailymotion <http://dailymotion.com>
 * eHow <http://www.ehow.com>
 * Facebook <http://facebook.com>
 * Google+ <http://plus.google.com>
 * Google Drive <http://docs.google.com>
 * Khan Academy <http://www.khanacademy.org>
 * TED <http://www.ted.com>
 * Tumblr <http://www.tumblr.com>
 * Vine <http://vine.co>
 * Instagram <http://instagram.com>
 * SoundCloud <http://soundcloud.com>
 * Mixcloud <http://www.mixcloud.com>
 * Freesound <http://www.freesound.org>
 * JPopsuki <http://jpopsuki.tv>
 * VID48 <http://vid48.com>
 * NICONICO动画 <http://www.nicovideo.jp>
 * 优酷 <http://www.youku.com>
@ -263,8 +272,11 @@ You-Get基于优酷下载脚本[iambus/youku-lixian](https://github.com/iambus/y
 * 搜狐视频 <http://tv.sohu.com>
 * 56网 <http://www.56.com>
 * 虾米 <http://www.xiami.com>
 * 5sing <http://www.5sing.com>
 * 百度音乐 <http://music.baidu.com>
 * 百度网盘 <http://pan.baidu.com>
 * SongTaste <http://www.songtaste.com>
 * Alive.in.th <http://alive.in.th>
 ## 依赖
--- a/README.txt
+++ b/README.txt
@ -20,15 +20,18 @@ Supported Sites (As of Now)
 * Coursera https://www.coursera.org
 * Blip http://blip.tv
 * Dailymotion http://dailymotion.com
 * eHow http://www.ehow.com
 * Facebook http://facebook.com
 * Google+ http://plus.google.com
 * Google Drive http://docs.google.com
 * Khan Academy http://www.khanacademy.org
 * TED http://www.ted.com
 * Tumblr http://www.tumblr.com
 * Vine http://vine.co
 * Instagram http://instagram.com
 * SoundCloud http://soundcloud.com
 * Mixcloud http://www.mixcloud.com
 * Freesound http://www.freesound.org
 * JPopsuki http://jpopsuki.tv
 * VID48 http://vid48.com
 * Niconico (ニコニコ動画) http://www.nicovideo.jp
 * Youku (优酷) http://www.youku.com
@ -50,8 +53,11 @@ Supported Sites (As of Now)
 * Sohu (搜狐视频) http://tv.sohu.com
 * 56 (56网) http://www.56.com
 * Xiami (虾米) http://www.xiami.com
-* Baidu (百度音乐) http://music.baidu.com
+* 5sing http://www.5sing.com
 * Baidu Music (百度音乐) http://music.baidu.com
 * Baidu Wangpan (百度网盘) http://pan.baidu.com
 * SongTaste http://www.songtaste.com
 * Alive.in.th http://alive.in.th
 Dependencies
 ------------
--- a/src/you_get/init.py
+++ b/src/you_get/init.py
@ -1,9 +1,9 @@
 #!/usr/bin/env python
 from .processor import *
 from .downloader import *
 from .version import *
 from .common import *
-from .__main__ import *
+from .version import *
 # Easy import
 #from .cli_wrapper.converter import *
 #from .cli_wrapper.player import *
 from .downloader import *
--- a/src/you_get/common.py
+++ b/src/you_get/common.py
@ -7,6 +7,7 @@ import os
 import re
 import sys
 from urllib import request, parse
 import platform
 from .version import __version__
@ -33,20 +34,63 @@ def tr(s):
    except:
        return str(s.encode('utf-8'))[2:-1]
 # DEPRECATED in favor of match1()
 def r1(pattern, text):
    m = re.search(pattern, text)
    if m:
        return m.group(1)
 # DEPRECATED in favor of match1()
 def r1_of(patterns, text):
    for p in patterns:
        x = r1(p, text)
        if x:
            return x
 def match1(text, *patterns):
    """Scans through a string for substrings matched some patterns (first-subgroups only).
    Args:
        text: A string to be scanned.
        patterns: Arbitrary number of regex patterns.
    Returns:
        When only one pattern is given, returns a string (None if no match found).
        When more than one pattern are given, returns a list of strings ([] if no match found).
    """
    if len(patterns) == 1:
        pattern = patterns[0]
        match = re.search(pattern, text)
        if match:
            return match.group(1)
        else:
            return None
    else:
        ret = []
        for pattern in patterns:
            match = re.search(pattern, text)
            if match:
                ret.append(match.group(1))
        return ret
 def parse_query_param(url, param):
    """Parses the query string of a URL and returns the value of a parameter.
    Args:
        url: A URL.
        param: A string representing the name of the parameter.
    Returns:
        The value of the parameter.
    """
    return parse.parse_qs(parse.urlparse(url).query)[param][0]
 def unicodize(text):
    return re.sub(r'\\u([0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])', lambda x: chr(int(x.group(0)[2:], 16)), text)
 # DEPRECATED in favor of filenameable()
 def escape_file_path(path):
    path = path.replace('/', '-')
    path = path.replace('\\', '-')
@ -54,23 +98,57 @@ def escape_file_path(path):
    path = path.replace('?', '-')
    return path
 def filenameable(text):
    """Converts a string to a legal filename through various OSes.
    """
    # All POSIX systems
    text = text.translate({
        0: None,
        ord('/'): '-',
    })
    if platform.system() == 'Darwin': # For Mac OS
        text = text.translate({
            ord(':'): '-',
        })
    elif platform.system() == 'Windows': # For Windows
        text = text.translate({
            ord(':'): '-',
            ord('*'): '-',
            ord('?'): '-',
            ord('\\'): '-',
            ord('\"'): '\'',
            ord('<'): '-',
            ord('>'): '-',
            ord('|'): '-',
            ord('+'): '-',
            ord('['): '(',
            ord(']'): ')',
        })
    return text
 def unescape_html(html):
    from html import parser
    html = parser.HTMLParser().unescape(html)
    html = re.sub(r'&#(\d+);', lambda x: chr(int(x.group(1))), html)
    return html
-def ungzip(s):
+def ungzip(data):
    """Decompresses data for Content-Encoding: gzip.
    """
    from io import BytesIO
    import gzip
-    buffer = BytesIO(s)
+    buffer = BytesIO(data)
-    f = gzip.GzipFile(fileobj = buffer)
+    f = gzip.GzipFile(fileobj=buffer)
    return f.read()
-def undeflate(s):
+def undeflate(data):
    """Decompresses data for Content-Encoding: deflate.
    (the zlib compression is used.)
    """
    import zlib
-    return zlib.decompress(s, -zlib.MAX_WBITS)
+    return zlib.decompress(data, -zlib.MAX_WBITS)
 # DEPRECATED in favor of get_content()
 def get_response(url, faker = False):
    if faker:
        response = request.urlopen(request.Request(url, headers = fake_headers), None)
@ -85,10 +163,12 @@ def get_response(url, faker = False):
    response.data = data
    return response
 # DEPRECATED in favor of get_content()
 def get_html(url, encoding = None, faker = False):
    content = get_response(url, faker).data
    return str(content, 'utf-8', 'ignore')
 # DEPRECATED in favor of get_content()
 def get_decoded_html(url, faker = False):
    response = get_response(url, faker)
    data = response.data
@ -98,6 +178,38 @@ def get_decoded_html(url, faker = False):
    else:
        return data
 def get_content(url, headers={}, decoded=True):
    """Gets the content of a URL via sending a HTTP GET request.
    Args:
        url: A URL.
        headers: Request headers used by the client.
        decoded: Whether decode the response body using UTF-8 or the charset specified in Content-Type.
    Returns:
        The content as a string.
    """
    response = request.urlopen(request.Request(url, headers=headers))
    data = response.read()
    # Handle HTTP compression for gzip and deflate (zlib)
    content_encoding = response.getheader('Content-Encoding')
    if content_encoding == 'gzip':
        data = ungzip(data)
    elif content_encoding == 'deflate':
        data = undeflate(data)
    # Decode the response body
    if decoded:
        charset = match1(response.getheader('Content-Type'), r'charset=([\w-]+)')
        if charset is not None:
            data = data.decode(charset)
        else:
            data = data.decode('utf-8')
    return data
 def url_size(url, faker = False):
    if faker:
        response = request.urlopen(request.Request(url, headers = fake_headers), None)
@ -136,7 +248,7 @@ def url_info(url, faker = False):
        type = None
        if headers['content-disposition']:
            try:
-                filename = parse.unquote(r1(r'filename="?(.+)"?', headers['content-disposition']))
+                filename = parse.unquote(r1(r'filename="?([^"]+)"?', headers['content-disposition']))
                if len(filename.split('.')) > 1:
                    ext = filename.split('.')[-1]
                else:
@ -388,7 +500,9 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None,
            import sys
            traceback.print_exc(file = sys.stdout)
            pass
-    title = escape_file_path(title)
+    
    title = filenameable(title)
    filename = '%s.%s' % (title, ext)
    filepath = os.path.join(output_dir, filename)
    if total_size:
@ -437,19 +551,18 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None,
        elif ext == 'mp4':
            try:
                from .processor.join_mp4 import concat_mp4
                concat_mp4(parts, os.path.join(output_dir, title + '.mp4'))
                for part in parts:
                    os.remove(part)
            except:
                from .processor.ffmpeg import has_ffmpeg_installed
                if has_ffmpeg_installed():
                    from .processor.ffmpeg import ffmpeg_concat_mp4_to_mp4
                    ffmpeg_concat_mp4_to_mp4(parts, os.path.join(output_dir, title + '.mp4'))
                    for part in parts:
                        os.remove(part)
                else:
-                    print('No ffmpeg is found. Merging aborted.')
+                    from .processor.join_mp4 import concat_mp4
                    concat_mp4(parts, os.path.join(output_dir, title + '.mp4'))
            except:
                raise
            else:
                for part in parts:
                    os.remove(part)
        else:
            print("Can't merge %s files" % ext)
@ -463,7 +576,9 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir = '.', refer
        return
    assert ext in ('ts')
-    title = escape_file_path(title)
+    
    title = filenameable(title)
    filename = '%s.%s' % (title, 'ts')
    filepath = os.path.join(output_dir, filename)
    if total_size:
@ -597,9 +712,7 @@ def set_http_proxy(proxy):
    elif proxy == '': # Don't use any proxy
        proxy_support = request.ProxyHandler({})
    else: # Use proxy
-        if not proxy.startswith('http://'):
+        proxy_support = request.ProxyHandler({'http': '%s' % proxy, 'https': '%s' % proxy})
            proxy = 'http://' + proxy
        proxy_support = request.ProxyHandler({'http': '%s' % proxy})
    opener = request.build_opener(proxy_support)
    request.install_opener(opener)
@ -615,8 +728,18 @@ def download_main(download, download_playlist, urls, playlist, output_dir, merge
        else:
            download(url, output_dir = output_dir, merge = merge, info_only = info_only)
 def get_version():
    try:
        import subprocess
        real_dir = os.path.dirname(os.path.realpath(__file__))
        git_hash = subprocess.Popen(['git', 'rev-parse', '--short', 'HEAD'], cwd=real_dir, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL).stdout.read().decode('utf-8').strip()
        assert git_hash
        return '%s-%s' % (__version__, git_hash)
    except:
        return __version__
 def script_main(script_name, download, download_playlist = None):
-    version = 'You-Get %s, a video downloader.' % __version__
+    version = 'You-Get %s, a video downloader.' % get_version()
    help = 'Usage: %s [OPTION]... [URL]...\n' % script_name
    help += '''\nStartup options:
    -V | --version                           Display the version and exit.
--- a/src/you_get/downloader/init.py
+++ b/src/you_get/downloader/init.py
@ -1,6 +1,7 @@
 #!/usr/bin/env python
 from .acfun import *
 from .alive import *
 from .baidu import *
 from .bilibili import *
 from .blip import *
@ -8,13 +9,15 @@ from .cntv import *
 from .coursera import *
 from .dailymotion import *
 from .douban import *
 from .ehow import *
 from .facebook import *
 from .fivesing import *
 from .freesound import *
 from .google import *
 from .ifeng import *
 from .instagram import *
 from .iqiyi import *
 from .joy import *
 from .jpopsuki import *
 from .ku6 import *
 from .miomio import *
 from .mixcloud import *
@ -36,3 +39,7 @@ from .xiami import *
 from .yinyuetai import *
 from .youku import *
 from .youtube import *
 from .ted import *
 from .khan import *
 from .__main__ import *
--- a/src/you_get/downloader/main.py
+++ b/src/you_get/downloader/main.py
@ -1,9 +1,8 @@
 #!/usr/bin/env python
 __all__ = ['main', 'any_download', 'any_download_playlist']
-from .downloader import *
+from ..downloader import *
-from .common import *
+from ..common import *
 def url_to_module(url):
    site = r1(r'http://([^/]+)/', url)
@ -20,6 +19,7 @@ def url_to_module(url):
    downloads = {
        '163': netease,
        '56': w56,
        '5sing': fivesing,
        'acfun': acfun,
        'baidu': baidu,
        'bilibili': bilibili,
@ -28,14 +28,16 @@ def url_to_module(url):
        'coursera': coursera,
        'dailymotion': dailymotion,
        'douban': douban,
        'ehow': ehow,
        'facebook': facebook,
        'freesound': freesound,
        'google': google,
        'iask': sina,
        'ifeng': ifeng,
        'in': alive,
        'instagram': instagram,
        'iqiyi': iqiyi,
        'joy': joy,
        'jpopsuki': jpopsuki,
        'kankanews': bilibili,
        'ku6': ku6,
        'miomio': miomio,
@ -48,6 +50,7 @@ def url_to_module(url):
        'sohu': sohu,
        'songtaste':songtaste,
        'soundcloud': soundcloud,
        'ted': ted,
        'tudou': tudou,
        'tumblr': tumblr,
        'vid48': vid48,
@ -58,6 +61,7 @@ def url_to_module(url):
        'youku': youku,
        'youtu': youtube,
        'youtube': youtube,
        'khanacademy': khan,
        #TODO
    }
    if k in downloads:
--- a/src/you_get/downloader/acfun.py
+++ b/src/you_get/downloader/acfun.py
@ -5,7 +5,7 @@ __all__ = ['acfun_download']
 from ..common import *
 from .qq import qq_download_by_id
-from .sina import sina_download_by_id
+from .sina import sina_download_by_vid
 from .tudou import tudou_download_by_iid
 from .youku import youku_download_by_id
@ -16,11 +16,11 @@ def get_srt_json(id):
    return get_html(url)
 def acfun_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
-    info = json.loads(get_html('http://www.acfun.tv/api/getVideoByID.aspx?vid=' + id))
+    info = json.loads(get_html('http://wenzhou.acfun.tv/api/getVideoByID.aspx?vid=' + id))
    t = info['vtype']
    vid = info['vid']
    if t == 'sina':
-        sina_download_by_id(vid, title, output_dir = output_dir, merge = merge, info_only = info_only)
+        sina_download_by_vid(vid, title, output_dir = output_dir, merge = merge, info_only = info_only)
    elif t == 'youku':
        youku_download_by_id(vid, title, output_dir = output_dir, merge = merge, info_only = info_only)
    elif t == 'tudou':
@ -37,7 +37,7 @@ def acfun_download_by_id(id, title = None, output_dir = '.', merge = True, info_
            x.write(cmt)
 def acfun_download(url, output_dir = '.', merge = True, info_only = False):
-    assert re.match(r'http://www.acfun.tv/v/ac(\d+)', url)
+    assert re.match(r'http://[^\.]+.acfun.tv/v/ac(\d+)', url)
    html = get_html(url)
    title = r1(r'<h1 id="title-article" class="title"[^<>]*>([^<>]+)<', html)
@ -49,7 +49,7 @@ def acfun_download(url, output_dir = '.', merge = True, info_only = False):
    id = r1(r"\[Video\](\d+)\[/Video\]", html) or r1(r"\[video\](\d+)\[/video\]", html)
    if not id:
        id = r1(r"src=\"/newflvplayer/player.*id=(\d+)", html)
-        sina_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
+        sina_download_by_vid(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
    else:
        acfun_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
--- a/src/you_get/downloader/alive.py
+++ b/src/you_get/downloader/alive.py
@ -0,0 +1,21 @@
 #!/usr/bin/env python
 __all__ = ['alive_download']
 from ..common import *
 def alive_download(url, output_dir = '.', merge = True, info_only = False):
    html = get_html(url)
    title = r1(r'<meta property="og:title" content="([^"]+)"', html)
    url = r1(r'file: "(http://alive[^"]+)"', html)
    type, ext, size = url_info(url)
    print_info(site_info, title, type, size)
    if not info_only:
        download_urls([url], title, ext, size, output_dir, merge = merge)
 site_info = "Alive.in.th"
 download = alive_download
 download_playlist = playlist_not_supported('alive')
--- a/src/you_get/downloader/baidu.py
+++ b/src/you_get/downloader/baidu.py
@ -68,12 +68,25 @@ def baidu_download_album(aid, output_dir = '.', merge = True, info_only = False)
        track_nr += 1
 def baidu_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False):
    if re.match(r'http://pan.baidu.com', url):
        html = get_html(url)
-    if re.match(r'http://music.baidu.com/album/\d+', url):
+        title = r1(r'server_filename="([^"]+)"', html)
        if len(title.split('.')) > 1:
            title = ".".join(title.split('.')[:-1])
        real_url = r1(r'\\"dlink\\":\\"([^"]*)\\"', html).replace('\\\\/', '/')
        type, ext, size = url_info(real_url, faker = True)
        print_info(site_info, title, ext, size)
        if not info_only:
            download_urls([real_url], title, ext, size, output_dir, merge = merge)
    elif re.match(r'http://music.baidu.com/album/\d+', url):
        id = r1(r'http://music.baidu.com/album/(\d+)', url)
        baidu_download_album(id, output_dir, merge, info_only)
-    if re.match('http://music.baidu.com/song/\d+', url):
+    elif re.match('http://music.baidu.com/song/\d+', url):
        id = r1(r'http://music.baidu.com/song/(\d+)', url)
        baidu_download_song(id, output_dir, merge, info_only)
--- a/src/you_get/downloader/bilibili.py
+++ b/src/you_get/downloader/bilibili.py
@ -4,7 +4,7 @@ __all__ = ['bilibili_download']
 from ..common import *
-from .sina import sina_download_by_id
+from .sina import sina_download_by_vid
 from .tudou import tudou_download_by_id
 from .youku import youku_download_by_id
@ -64,7 +64,7 @@ def bilibili_download_by_cid(id, title, output_dir = '.', merge = True, info_onl
    elif re.search(r'/mp4/', urls[0]):
        type = 'mp4'
    else:
-        raise NotImplementedError(urls[0])
+        type = 'flv'
    size = 0
    for url in urls:
@ -83,7 +83,7 @@ def bilibili_download(url, output_dir = '.', merge = True, info_only = False):
    title = unescape_html(title)
    title = escape_file_path(title)
-    flashvars = r1_of([r'flashvars="([^"]+)"', r'"https://secure.bilibili.tv/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
+    flashvars = r1_of([r'player_params=\'(cid=\d+)', r'flashvars="([^"]+)"', r'"https://secure.bilibili.tv/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
    assert flashvars
    t, id = flashvars.split('=', 1)
    id = id.split('&')[0]
--- a/src/you_get/downloader/dailymotion.py
+++ b/src/you_get/downloader/dailymotion.py
@ -5,16 +5,22 @@ __all__ = ['dailymotion_download']
 from ..common import *
 def dailymotion_download(url, output_dir = '.', merge = True, info_only = False):
-    html = get_html(url)
+    """Downloads Dailymotion videos by URL.
-    html = parse.unquote(html).replace('\/', '/')
+    """
-    title = r1(r'meta property="og:title" content="([^"]+)"', html)
+    id = match1(url, r'/video/([^\?]+)')
-    title = escape_file_path(title)
+    embed_url = 'http://www.dailymotion.com/embed/video/%s' % id
    html = get_content(embed_url)
-    for quality in ['hd720URL', 'hqURL', 'sdURL']:
+    info = json.loads(match1(html, r'var\s*info\s*=\s*({.+}),\n'))
-        real_url = r1(r',\"' + quality + '\"\:\"([^\"]+?)\",', html)
+    
    title = info['title']
    for quality in ['stream_h264_hd1080_url', 'stream_h264_hd_url', 'stream_h264_hq_url', 'stream_h264_url', 'stream_h264_ld_url']:
        real_url = info[quality]
        if real_url:
            break
    type, ext, size = url_info(real_url)
    print_info(site_info, title, type, size)
--- a/src/you_get/downloader/ehow.py
+++ b/src/you_get/downloader/ehow.py
@ -0,0 +1,38 @@
 #!/usr/bin/env python
 __all__ = ['ehow_download']
 from ..common import *
 def ehow_download(url, output_dir = '.', merge = True, info_only = False):
 	assert re.search(r'http://www.ehow.com/video_', url), "URL you entered is not supported"
 	html = get_html(url)
 	contentid = r1(r'<meta name="contentid" scheme="DMINSTR2" content="([^"]+)" />', html)
 	vid = r1(r'"demand_ehow_videoid":"([^"]+)"', html)
 	assert vid
 	xml = get_html('http://www.ehow.com/services/video/series.xml?demand_ehow_videoid=%s' % vid)
 	from xml.dom.minidom import parseString
 	doc = parseString(xml)
 	tab = doc.getElementsByTagName('related')[0].firstChild
 	for video in tab.childNodes:
 		if re.search(contentid, video.attributes['link'].value):
 			url = video.attributes['flv'].value
 			break
 	title = video.attributes['title'].value
 	assert title 
 	type, ext, size = url_info(url)
 	print_info(site_info, title, type, size)
 	if not info_only:
 		download_urls([url], title, ext, size, output_dir, merge = merge)
 site_info = "ehow.com"
 download = ehow_download
 download_playlist = playlist_not_supported('ehow')
--- a/src/you_get/downloader/fivesing.py
+++ b/src/you_get/downloader/fivesing.py
@ -0,0 +1,18 @@
 #!/usr/bin/env python
 __all__ = ['fivesing_download']
 from ..common import *
 def fivesing_download(url, output_dir=".", merge=True, info_only=False):
    html = get_html(url)
    title = r1(r'var SongName   = "(.*)";', html)
    url = r1(r'file: "(\S*)"', html)
    songtype, ext, size = url_info(url)
    print_info(site_info, title, songtype, size)
    if not info_only:
        download_urls([url], title, ext, size, output_dir)
 site_info = "5sing.com"
 download = fivesing_download
 download_playlist = playlist_not_supported("5sing")
--- a/src/you_get/downloader/google.py
+++ b/src/you_get/downloader/google.py
@ -6,6 +6,40 @@ from ..common import *
 import re
 # YouTube media encoding options, in descending quality order.
 # taken from http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs, 3/22/2013.
 youtube_codecs = [
    {'itag': 38, 'container': 'MP4', 'video_resolution': '3072p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3.5-5', 'audio_encoding': 'AAC', 'audio_bitrate': '192'},
    {'itag': 46, 'container': 'WebM', 'video_resolution': '1080p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'},
    {'itag': 37, 'container': 'MP4', 'video_resolution': '1080p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3-4.3', 'audio_encoding': 'AAC', 'audio_bitrate': '192'},
    {'itag': 102, 'container': '', 'video_resolution': '', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '2', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'},
    {'itag': 45, 'container': 'WebM', 'video_resolution': '720p', 'video_encoding': '', 'video_profile': '', 'video_bitrate': '', 'audio_encoding': '', 'audio_bitrate': ''},
    {'itag': 22, 'container': 'MP4', 'video_resolution': '720p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '2-2.9', 'audio_encoding': 'AAC', 'audio_bitrate': '192'},
    {'itag': 84, 'container': 'MP4', 'video_resolution': '720p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '2-2.9', 'audio_encoding': 'AAC', 'audio_bitrate': '152'},
    {'itag': 120, 'container': 'FLV', 'video_resolution': '720p', 'video_encoding': 'AVC', 'video_profile': 'Main@L3.1', 'video_bitrate': '2', 'audio_encoding': 'AAC', 'audio_bitrate': '128'},
    {'itag': 85, 'container': 'MP4', 'video_resolution': '520p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '2-2.9', 'audio_encoding': 'AAC', 'audio_bitrate': '152'},
    {'itag': 44, 'container': 'WebM', 'video_resolution': '480p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '1', 'audio_encoding': 'Vorbis', 'audio_bitrate': '128'},
    {'itag': 35, 'container': 'FLV', 'video_resolution': '480p', 'video_encoding': 'H.264', 'video_profile': 'Main', 'video_bitrate': '0.8-1', 'audio_encoding': 'AAC', 'audio_bitrate': '128'},
    {'itag': 101, 'container': 'WebM', 'video_resolution': '360p', 'video_encoding': 'VP8', 'video_profile': '3D', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'},
    {'itag': 100, 'container': 'WebM', 'video_resolution': '360p', 'video_encoding': 'VP8', 'video_profile': '3D', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '128'},
    {'itag': 43, 'container': 'WebM', 'video_resolution': '360p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '0.5', 'audio_encoding': 'Vorbis', 'audio_bitrate': '128'},
    {'itag': 34, 'container': 'FLV', 'video_resolution': '360p', 'video_encoding': 'H.264', 'video_profile': 'Main', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '128'},
    {'itag': 82, 'container': 'MP4', 'video_resolution': '360p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '96'},
    {'itag': 18, 'container': 'MP4', 'video_resolution': '270p/360p', 'video_encoding': 'H.264', 'video_profile': 'Baseline', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '96'},
    {'itag': 6, 'container': 'FLV', 'video_resolution': '270p', 'video_encoding': 'Sorenson H.263', 'video_profile': '', 'video_bitrate': '0.8', 'audio_encoding': 'MP3', 'audio_bitrate': '64'},
    {'itag': 83, 'container': 'MP4', 'video_resolution': '240p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '96'},
    {'itag': 13, 'container': '3GP', 'video_resolution': '', 'video_encoding': 'MPEG-4 Visual', 'video_profile': '', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': ''},
    {'itag': 5, 'container': 'FLV', 'video_resolution': '240p', 'video_encoding': 'Sorenson H.263', 'video_profile': '', 'video_bitrate': '0.25', 'audio_encoding': 'MP3', 'audio_bitrate': '64'},
    {'itag': 36, 'container': '3GP', 'video_resolution': '240p', 'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.17', 'audio_encoding': 'AAC', 'audio_bitrate': '38'},
    {'itag': 17, 'container': '3GP', 'video_resolution': '144p', 'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.05', 'audio_encoding': 'AAC', 'audio_bitrate': '24'},
 ]
 fmt_level = dict(
    zip(
        [str(codec['itag'])
            for codec in
                youtube_codecs],
        range(len(youtube_codecs))))
 def google_download(url, output_dir = '.', merge = True, info_only = False):
    # Percent-encoding Unicode URL
    url = parse.quote(url, safe = ':/+%')
@ -14,25 +48,22 @@ def google_download(url, output_dir = '.', merge = True, info_only = False):
    if service == 'plus': # Google Plus
-        if re.search(r'plus.google.com/photos/\d+/albums/\d+/\d+', url):
+        if not re.search(r'plus.google.com/photos/[^/]*/albums/\d+/\d+', url):
            oid = r1(r'plus.google.com/photos/(\d+)/albums/\d+/\d+', url)
            pid = r1(r'plus.google.com/photos/\d+/albums/\d+/(\d+)', url)
        elif re.search(r'plus.google.com/photos/\d+/albums/posts/\d+', url):
            oid = r1(r'plus.google.com/photos/(\d+)/albums/posts/\d+', url)
            pid = r1(r'plus.google.com/photos/\d+/albums/posts/(\d+)', url)
        else:
            html = get_html(url)
-            oid = r1(r'"https://plus.google.com/photos/(\d+)/albums/\d+/\d+', html)
+            url = r1(r'"(https://plus.google.com/photos/\d+/albums/\d+/\d+)', html)
-            pid = r1(r'"https://plus.google.com/photos/\d+/albums/\d+/(\d+)', html)
+            title = r1(r'<title>([^<\n]+)', html)
-        
+        else:
-        url = "http://plus.google.com/photos/%s/albums/posts/%s?oid=%s&pid=%s" % (oid, pid, oid, pid)
+            title = None
        html = get_html(url)
-        real_url = unicodize(r1(r'"(https://video.googleusercontent.com/[^"]*)",\d\]', html).replace('\/', '/'))
+        real_urls = re.findall(r'\[(\d+),\d+,\d+,"([^"]+)"\]', html)
        real_url = unicodize(sorted(real_urls, key = lambda x : fmt_level[x[0]])[0][1])
        if title is None:
            post_url = r1(r'"(https://plus.google.com/\d+/posts/[^"]*)"', html)
            post_html = get_html(post_url)
            title = r1(r'<title>([^<\n]+)', post_html)
        title = r1(r"\"([^\"]+)\",\"%s\"" % pid, html)
        if title is None:
            response = request.urlopen(request.Request(real_url))
            if response.headers['content-disposition']:
--- a/src/you_get/downloader/instagram.py
+++ b/src/you_get/downloader/instagram.py
@ -0,0 +1,22 @@
 #!/usr/bin/env python
 __all__ = ['instagram_download']
 from ..common import *
 def instagram_download(url, output_dir = '.', merge = True, info_only = False):
    html = get_html(url)
    id = r1(r'instagram.com/p/([^/]+)/', html)
    description = r1(r'<meta property="og:description" content="([^"]*)"', html)
    title = description + " [" + id + "]"
    url = r1(r'<meta property="og:video" content="([^"]*)"', html)
    type, ext, size = url_info(url)
    print_info(site_info, title, type, size)
    if not info_only:
        download_urls([url], title, ext, size, output_dir, merge = merge)
 site_info = "Instagram.com"
 download = instagram_download
 download_playlist = playlist_not_supported('instagram')
--- a/src/you_get/downloader/iqiyi.py
+++ b/src/you_get/downloader/iqiyi.py
@ -6,13 +6,8 @@ from ..common import *
 def iqiyi_download(url, output_dir = '.', merge = True, info_only = False):
    html = get_html(url)
-    #title = r1(r'title\s*:\s*"([^"]+)"', html)
+    
-    #title = unescape_html(title).decode('utf-8')
+    videoId = r1(r'data-player-videoid="([^"]+)"', html)
    #videoId = r1(r'videoId\s*:\s*"([^"]+)"', html)
    #pid = r1(r'pid\s*:\s*"([^"]+)"', html)
    #ptype = r1(r'ptype\s*:\s*"([^"]+)"', html)
    #info_url = 'http://cache.video.qiyi.com/v/%s/%s/%s/' % (videoId, pid, ptype)
    videoId = r1(r'''["']videoId["'][:=]["']([^"']+)["']''', html)
    assert videoId
    info_url = 'http://cache.video.qiyi.com/v/%s' % videoId
--- a/src/you_get/downloader/jpopsuki.py
+++ b/src/you_get/downloader/jpopsuki.py
@ -1,23 +0,0 @@
 #!/usr/bin/env python
 __all__ = ['jpopsuki_download']
 from ..common import *
 def jpopsuki_download(url, output_dir = '.', merge = True, info_only = False):
    html = get_html(url)
    title = r1(r'<meta name="title" content="([^"]*)"', html)
    if title.endswith(' - JPopsuki TV'):
        title = title[:-14]
    url = "http://jpopsuki.tv%s" % r1(r'<source src="([^"]*)"', html)
    type, ext, size = url_info(url)
    print_info(site_info, title, type, size)
    if not info_only:
        download_urls([url], title, ext, size, output_dir, merge = merge)
 site_info = "JPopsuki.tv"
 download = jpopsuki_download
 download_playlist = playlist_not_supported('jpopsuki')
--- a/src/you_get/downloader/khan.py
+++ b/src/you_get/downloader/khan.py
@ -0,0 +1,15 @@
 #!/usr/bin/env python
 __all__ = ['khan_download']
 from ..common import *
 from .youtube import youtube_download_by_id
 def khan_download(url, output_dir = '.', merge = True, info_only = False):
    page = get_html(url)
    id = page[page.find('src="https://www.youtube.com/embed/') + len('src="https://www.youtube.com/embed/') :page.find('?enablejsapi=1&wmode=transparent&modestbranding=1&rel=0&fs=1&showinfo=0')]
    youtube_download_by_id(id, output_dir=output_dir, merge=merge, info_only=info_only)
 site_info = "khanacademy.org"
 download = khan_download
 download_playlist = playlist_not_supported('khan')
--- a/src/you_get/downloader/netease.py
+++ b/src/you_get/downloader/netease.py
@ -7,10 +7,13 @@ from ..common import *
 def netease_download(url, output_dir = '.', merge = True, info_only = False):
    html = get_decoded_html(url)
-    src = r1(r'<source src="([^"]+)"', html)
+    title = r1('movieDescription=\'([^\']+)\'', html) or r1('<title>(.+)</title>', html)
-    title = r1('movieDescription=\'([^\']+)\'', html)
+    if title[0] == ' ':
        title = title[1:]
-    if title:
+    src = r1(r'<source src="([^"]+)"', html) or r1(r'<source type="[^"]+" src="([^"]+)"', html)
    if src:
        sd_url = r1(r'(.+)-mobile.mp4', src) + ".flv"
        _, _, sd_size = url_info(sd_url)
@ -24,10 +27,7 @@ def netease_download(url, output_dir = '.', merge = True, info_only = False):
        ext = 'flv'
    else:
-        title = r1('<title>(.+)</title>', html)
+        url = r1(r'["\'](.+)-list.m3u8["\']', html) + ".mp4"
        if title[0] == ' ':
            title = title[1:]
        url = r1(r'(.+)-list.m3u8', src) + ".mp4"
        _, _, size = url_info(url)
        ext = 'mp4'
--- a/src/you_get/downloader/nicovideo.py
+++ b/src/you_get/downloader/nicovideo.py
@ -23,7 +23,7 @@ def nicovideo_download(url, output_dir = '.', merge = True, info_only = False):
    nicovideo_login(user, password)
    html = get_html(url) # necessary!
-    title = unicodize(r1(r'title:\s*\'(.*)\',', html))
+    title = unicodize(r1(r'<span class="videoHeaderTitle">([^<]+)</span>', html))
    api_html = get_html('http://www.nicovideo.jp/api/getflv?v=%s' % url.split('/')[-1])
    real_url = parse.unquote(r1(r'url=([^&]+)&', api_html))
--- a/src/you_get/downloader/pptv.py
+++ b/src/you_get/downloader/pptv.py
@ -9,18 +9,14 @@ import urllib
 import hashlib
 def pptv_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
-    xml = get_html('http://web-play.pptv.com/webplay3-151-%s.xml' % id)
+    xml = get_html('http://web-play.pptv.com/webplay3-0-%s.xml?type=web.fpp' % id)
    host = r1(r'<sh>([^<>]+)</sh>', xml)
-    port = 8080
+    key = r1(r'<key expire=[^<>]+>([^<>]+)</key>', xml)
    st = r1(r'<st>([^<>]+)</st>', xml).encode('utf-8')
    key = hashlib.md5(st).hexdigest() # FIXME: incorrect key
    rids = re.findall(r'rid="([^"]+)"', xml)
    rid = r1(r'rid="([^"]+)"', xml)
    title = r1(r'nm="([^"]+)"', xml)
    pieces = re.findall('<sgm no="(\d+)".*fs="(\d+)"', xml)
    numbers, fs = zip(*pieces)
-    urls = ['http://%s:%s/%s/%s?key=%s' % (host, port, i, rid, key) for i in numbers]
+    urls = ['http://%s/%s/%s?k=%s' % (host, i, rid, key) for i in numbers]
    urls = ['http://pptv.vod.lxdns.com/%s/%s?key=%s' % (i, rid, key) for i in numbers]
    total_size = sum(map(int, fs))
    assert rid.endswith('.mp4')
--- a/src/you_get/downloader/sina.py
+++ b/src/you_get/downloader/sina.py
@ -1,20 +1,22 @@
 #!/usr/bin/env python
-__all__ = ['sina_download', 'sina_download_by_id']
+__all__ = ['sina_download', 'sina_download_by_vid', 'sina_download_by_vkey']
 from ..common import *
 import re
 def video_info(id):
-    xml = get_decoded_html('http://v.iask.com/v_play.php?vid=%s' % id)
+    xml = get_content('http://v.iask.com/v_play.php?vid=%s' % id, decoded=True)
    urls = re.findall(r'<url>(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?</url>', xml)
-    name = r1(r'<vname>(?:<!\[CDATA\[)?(.+?)(?:\]\]>)?</vname>', xml)
+    name = match1(xml, r'<vname>(?:<!\[CDATA\[)?(.+?)(?:\]\]>)?</vname>')
-    vstr = r1(r'<vstr>(?:<!\[CDATA\[)?(.+?)(?:\]\]>)?</vstr>', xml)
+    vstr = match1(xml, r'<vstr>(?:<!\[CDATA\[)?(.+?)(?:\]\]>)?</vstr>')
    return urls, name, vstr
-def sina_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
+def sina_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only=False):
-    urls, name, vstr = video_info(id)
+    """Downloads a Sina video by its unique vid.
    http://video.sina.com.cn/
    """
    urls, name, vstr = video_info(vid)
    title = title or name
    assert title
    size = 0
@ -26,11 +28,36 @@ def sina_download_by_id(id, title = None, output_dir = '.', merge = True, info_o
    if not info_only:
        download_urls(urls, title, 'flv', size, output_dir = output_dir, merge = merge)
-def sina_download(url, output_dir = '.', merge = True, info_only = False):
+def sina_download_by_vkey(vkey, title=None, output_dir='.', merge=True, info_only=False):
-    id = r1(r'[^_]vid\s*:\s*\'([^\']+)\',', get_html(url)).split('|')[-1]
+    """Downloads a Sina video by its unique vkey.
-    assert id
+    http://video.sina.com/
    """
-    sina_download_by_id(id, output_dir = output_dir, merge = merge, info_only = info_only)
+    url = 'http://video.sina.com/v/flvideo/%s_0.flv' % vkey
    type, ext, size = url_info(url)
    print_info(site_info, title, 'flv', size)
    if not info_only:
        download_urls([url], title, 'flv', size, output_dir = output_dir, merge = merge)
 def sina_download(url, output_dir='.', merge=True, info_only=False):
    """Downloads Sina videos by URL.
    """
    vid = match1(url, r'vid=(\d+)')
    if vid is None:
        video_page = get_content(url)
        vid = hd_vid = match1(video_page, r'hd_vid\s*:\s*\'([^\']+)\'')
        if hd_vid == '0':
            vids = match1(video_page, r'[^\w]vid\s*:\s*\'([^\']+)\'').split('|')
            vid = vids[-1]
    if vid:
        sina_download_by_vid(vid, output_dir=output_dir, merge=merge, info_only=info_only)
    else:
        vkey = match1(video_page, r'vkey\s*:\s*"([^"]+)"')
        title = match1(video_page, r'title\s*:\s*"([^"]+)"')
        sina_download_by_vkey(vkey, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
 site_info = "Sina.com"
 download = sina_download
--- a/src/you_get/downloader/sohu.py
+++ b/src/you_get/downloader/sohu.py
@ -8,7 +8,7 @@ import json
 def real_url(host, prot, file, new):
    url = 'http://%s/?prot=%s&file=%s&new=%s' % (host, prot, file, new)
-    start, _, host, key, _, _ = get_html(url).split('|')
+    start, _, host, key = get_html(url).split('|')[:4]
    return '%s%s?key=%s' % (start[:-1], new, key)
 def sohu_download(url, output_dir = '.', merge = True, info_only = False):
--- a/src/you_get/downloader/ted.py
+++ b/src/you_get/downloader/ted.py
@ -0,0 +1,24 @@
 #!/usr/bin/env python
 __all__ = ['ted_download']
 from ..common import *
 def ted_download(url, output_dir = '.', merge = True, info_only = False):
    page = get_html(url).split("\n")
    for line in page:
        if line.find("<title>") > -1:
            title = line.replace("<title>", "").replace("</title>", "").replace("\t", "")
            title = title[:title.find(' | ')]
        if line.find("no-flash-video-download") > -1:
            url = line.replace('<a id="no-flash-video-download" href="', "").replace(" ", "").replace("\t", "").replace(".mp4", "-480p-en.mp4")
            url = url[:url.find('"')]
            type, ext, size = url_info(url)
            print_info(site_info, title, type, size)
            if not info_only:
                download_urls([url], title, ext, size, output_dir, merge=merge)
            break
 site_info = "ted.com"
 download = ted_download
 download_playlist = playlist_not_supported('ted')
--- a/src/you_get/downloader/tudou.py
+++ b/src/you_get/downloader/tudou.py
@ -5,26 +5,31 @@ __all__ = ['tudou_download', 'tudou_download_playlist', 'tudou_download_by_id',
 from ..common import *
 def tudou_download_by_iid(iid, title, output_dir = '.', merge = True, info_only = False):
-    xml = get_html('http://v2.tudou.com/v?it=' + iid + '&st=1,2,3,4,99')
+    data = json.loads(get_decoded_html('http://www.tudou.com/outplay/goto/getItemSegs.action?iid=%s' % iid))
    vids = []
    for k in data:
        if len(data[k]) == 1:
            vids.append({"k": data[k][0]["k"], "size": data[k][0]["size"]})
    temp = max(vids, key=lambda x:x["size"])
    vid, size = temp["k"], temp["size"]
    xml = get_html('http://ct.v2.tudou.com/f?id=%s' % vid)
    from xml.dom.minidom import parseString
    doc = parseString(xml)
-    title = title or doc.firstChild.getAttribute('tt') or doc.firstChild.getAttribute('title')
+    url = [n.firstChild.nodeValue.strip() for n in doc.getElementsByTagName('f')][0]
    urls = [(int(n.getAttribute('brt')), n.firstChild.nodeValue.strip()) for n in doc.getElementsByTagName('f')]
-    url = max(urls, key = lambda x:x[0])[1]
+    ext = r1(r'http://[\w.]*/(\w+)/[\w.]*', url)
    assert 'f4v' in url
-    type, ext, size = url_info(url)
+    print_info(site_info, title, ext, size)
    print_info(site_info, title, type, size)
    if not info_only:
-        #url_save(url, filepath, bar):
+        download_urls([url], title, ext, size, output_dir = output_dir, merge = merge)
        download_urls([url], title, ext, total_size = None, output_dir = output_dir, merge = merge)
 def tudou_download_by_id(id, title, output_dir = '.', merge = True, info_only = False):     
    html = get_html('http://www.tudou.com/programs/view/%s/' % id)
    iid = r1(r'iid\s*[:=]\s*(\S+)', html)    
    title = r1(r'kw\s*[:=]\s*[\'\"]([^\']+?)[\'\"]', html)
    tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge, info_only = info_only)
 def tudou_download(url, output_dir = '.', merge = True, info_only = False):
--- a/src/you_get/downloader/tumblr.py
+++ b/src/you_get/downloader/tumblr.py
@ -10,7 +10,9 @@ def tumblr_download(url, output_dir = '.', merge = True, info_only = False):
    html = get_html(url)
    html = parse.unquote(html).replace('\/', '/')
-    title = unescape_html(r1(r'<meta property="og:title" content="([^"]*)" />', html))
+    title = unescape_html(r1(r'<meta property="og:title" content="([^"]*)" />', html) or
        r1(r'<meta property="og:description" content="([^"]*)" />', html) or
        r1(r'<title>([^<\n]*)', html)).replace('\n', '')
    real_url = r1(r'source src=\\x22([^\\]+)\\', html)
    if not real_url:
        real_url = r1(r'audio_file=([^&]+)&', html) + '?plead=please-dont-download-this-or-our-lawyers-wont-let-us-host-audio'
--- a/src/you_get/downloader/xiami.py
+++ b/src/you_get/downloader/xiami.py
@ -55,11 +55,14 @@ def xiami_download_song(sid, output_dir = '.', merge = True, info_only = False):
    if not ext:
        ext = 'mp3'
-    print_info(site_info, song_title, type, size)
+    print_info(site_info, song_title, ext, size)
    if not info_only:
        file_name = "%s - %s - %s" % (song_title, album_name, artist)
        download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True)
-        xiami_download_lyric(lrc_url, file_name, output_dir)
+        try:
            xiami_download_lyric(lrc_url, file_name, output_dir)
        except:
            pass
 def xiami_download_showcollect(cid, output_dir = '.', merge = True, info_only = False):
    html = get_html('http://www.xiami.com/song/showcollect/id/' + cid, faker = True)
@ -84,7 +87,10 @@ def xiami_download_showcollect(cid, output_dir = '.', merge = True, info_only =
        if not info_only:
            file_name = "%02d.%s - %s - %s" % (track_nr, song_title, artist, album_name)
            download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True)
-            xiami_download_lyric(lrc_url, file_name, output_dir)
+            try:
                xiami_download_lyric(lrc_url, file_name, output_dir)
            except:
                pass
        track_nr += 1
@ -112,7 +118,10 @@ def xiami_download_album(aid, output_dir = '.', merge = True, info_only = False)
        if not info_only:
            file_name = "%02d.%s" % (track_nr, song_title)
            download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True)
-            xiami_download_lyric(lrc_url, file_name, output_dir)
+            try:
                xiami_download_lyric(lrc_url, file_name, output_dir)
            except:
                pass
            if not pic_exist:
                xiami_download_pic(pic_url, 'cover', output_dir)
                pic_exist = True
@ -132,6 +141,10 @@ def xiami_download(url, output_dir = '.', stream_type = None, merge = True, info
        id = r1(r'http://www.xiami.com/song/(\d+)', url)
        xiami_download_song(id, output_dir, merge, info_only)
    if re.match('http://www.xiami.com/song/detail/id/\d+', url):
        id = r1(r'http://www.xiami.com/song/detail/id/(\d+)', url)
        xiami_download_song(id, output_dir, merge, info_only)
 site_info = "Xiami.com"
 download = xiami_download
 download_playlist = playlist_not_supported("xiami")
--- a/src/you_get/downloader/yinyuetai.py
+++ b/src/you_get/downloader/yinyuetai.py
@ -20,10 +20,10 @@ def yinyuetai_download_by_id(id, title = None, output_dir = '.', merge = True, i
        download_urls([url], title, ext, size, output_dir, merge = merge)
 def yinyuetai_download(url, output_dir = '.', merge = True, info_only = False):
-    id = r1(r'http://www.yinyuetai.com/video/(\d+)$', url)
+    id = r1(r'http://\w+.yinyuetai.com/video/(\d+)$', url)
    assert id
    html = get_html(url, 'utf-8')
-    title = r1(r'<meta property="og:title" content="([^"]+)"/>', html)
+    title = r1(r'<meta property="og:title"\s+content="([^"]+)"/>', html)
    assert title
    title = parse.unquote(title)
    title = escape_file_path(title)
--- a/src/you_get/downloader/youku.py
+++ b/src/you_get/downloader/youku.py
@ -25,7 +25,7 @@ def find_video_id_from_url(url):
    return r1_of(patterns, url)
 def find_video_id_from_show_page(url):
-    return re.search(r'<div class="btnplay">.*href="([^"]+)"', get_html(url)).group(1)
+    return re.search(r'<a class="btnShow btnplay.*href="([^"]+)"', get_html(url)).group(1)
 def youku_url(url):
    id = find_video_id_from_url(url)
@ -61,7 +61,7 @@ def parse_video_title(url, page):
 def parse_playlist_title(url, page):
    if re.search(r'v_playlist', url):
-        # if we are playing a viedo from play list, the meta title might be incorrect
+        # if we are playing a video from play list, the meta title might be incorrect
        title = re.search(r'<title>([^<>]*)</title>', page).group(1)
    else:
        title = re.search(r'<meta name="title" content="([^"]*)"', page).group(1)
@ -80,7 +80,7 @@ def parse_page(url):
    return id2, title
 def get_info(videoId2):
-    return json.loads(get_html('http://v.youku.com/player/getPlayList/VideoIDS/' + videoId2))
+    return json.loads(get_html('http://v.youku.com/player/getPlayList/VideoIDS/' + videoId2 + '/timezone/+08/version/5/source/out/Sc/2'))
 def find_video(info, stream_type = None):
    #key = '%s%x' % (info['data'][0]['key2'], int(info['data'][0]['key1'], 16) ^ 0xA55AA5A5)
@ -120,28 +120,16 @@ def find_video(info, stream_type = None):
 def file_type_of_url(url):
    return str(re.search(r'/st/([^/]+)/', url).group(1))
-def youku_download_by_id(id2, title, output_dir = '.', stream_type = None, merge = True, info_only = False):
+def youku_download_by_id(id, title, output_dir = '.', stream_type = None, merge = True, info_only = False):
-    info = get_info(id2)
+    info = get_info(id)
    urls, sizes = zip(*find_video(info, stream_type))
    ext = file_type_of_url(urls[0])
    total_size = sum(sizes)
    urls = url_locations(urls) # Use real (redirected) URLs for resuming of downloads
    print_info(site_info, title, ext, total_size)
    if not info_only:
        download_urls(urls, title, ext, total_size, output_dir, merge = merge)
 def youku_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False):
    if not youku_url(url):
        youku_download_playlist(url, output_dir, merge, info_only)
        return
    id2, title = parse_page(url)
    title = title.replace('?', '-')
    youku_download_by_id(id2, title, output_dir, merge = merge, info_only = info_only)
 def parse_playlist_videos(html):
    return re.findall(r'id="A_(\w+)"', html)
@ -175,9 +163,9 @@ def parse_vplaylist(url):
    n = int(re.search(r'<span class="num">(\d+)</span>', get_html(url)).group(1))
    return ['http://v.youku.com/v_playlist/f%so0p%s.html' % (id, i) for i in range(n)]
-def youku_download_playlist(url, output_dir = '.', merge = True, info_only = False):
+def youku_download_playlist(url, output_dir='.', merge=True, info_only=False):
-    if re.match(r'http://www.youku.com/show_page/id_\w+.html', url):
+    """Downloads a Youku playlist.
-        url = find_video_id_from_show_page(url)
+    """
    if re.match(r'http://www.youku.com/playlist_show/id_\d+(?:_ascending_\d_mode_pic(?:_page_\d+)?)?.html', url):
        ids = parse_vplaylist(url)
@ -185,21 +173,36 @@ def youku_download_playlist(url, output_dir = '.', merge = True, info_only = Fal
        ids = parse_vplaylist(url)
    elif re.match(r'http://u.youku.com/user_playlist/pid_(\d+)_id_[\w=]+(?:_page_\d+)?.html', url):
        ids = parse_vplaylist(url)
-    else:
+    elif re.match(r'http://www.youku.com/show_page/id_\w+.html', url):
        url = find_video_id_from_show_page(url)
        assert re.match(r'http://v.youku.com/v_show/id_([\w=]+).html', url), 'URL not supported as playlist'
        ids = parse_playlist(url)
    else:
        ids = []
    assert ids != []
    title = parse_playlist_title(url, get_html(url))
-    title = title.replace('?', '-')
+    title = filenameable(title)
    output_dir = os.path.join(output_dir, title)
    for i, id in enumerate(ids):
        print('Processing %s of %s videos...' % (i + 1, len(ids)))
        try:
-            print('Processing %s of %s videos...' % (i + 1, len(ids)))
+            id, title = parse_page(youku_url(id))
-            youku_download(id, output_dir, merge = merge, info_only = info_only)
+            youku_download_by_id(id, title, output_dir=output_dir, merge=merge, info_only=info_only)
        except:
            continue
 def youku_download(url, output_dir='.', merge=True, info_only=False):
    """Downloads Youku videos by URL.
    """
    try:
        youku_download_playlist(url, output_dir=output_dir, merge=merge, info_only=info_only)
    except:
        id, title = parse_page(url)
        youku_download_by_id(id, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
 site_info = "Youku.com"
 download = youku_download
 download_playlist = youku_download_playlist
--- a/src/you_get/downloader/youtube.py
+++ b/src/you_get/downloader/youtube.py
@ -6,7 +6,7 @@ from ..common import *
 # YouTube media encoding options, in descending quality order.
 # taken from http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs, 3/22/2013.
-youtube_codecs = [
+yt_codecs = [
    {'itag': 38, 'container': 'MP4', 'video_resolution': '3072p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3.5-5', 'audio_encoding': 'AAC', 'audio_bitrate': '192'},
    {'itag': 46, 'container': 'WebM', 'video_resolution': '1080p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'},
    {'itag': 37, 'container': 'MP4', 'video_resolution': '1080p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3-4.3', 'audio_encoding': 'AAC', 'audio_bitrate': '192'},
@ -32,102 +32,70 @@ youtube_codecs = [
    {'itag': 17, 'container': '3GP', 'video_resolution': '144p', 'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.05', 'audio_encoding': 'AAC', 'audio_bitrate': '24'},
 ]
-def parse_video_info(raw_info):
+def decipher(js, s):
-    """Parser for YouTube's get_video_info data.
+    def tr_js(code):
-    Returns a dict, where 'url_encoded_fmt_stream_map' maps to a sorted list.
+        code = re.sub(r'function', r'def', code)
        code = re.sub(r'\{', r':\n\t', code)
        code = re.sub(r'\}', r'\n', code)
        code = re.sub(r'var\s+', r'', code)
        code = re.sub(r'(\w+).join\(""\)', r'"".join(\1)', code)
        code = re.sub(r'(\w+).length', r'len(\1)', code)
        code = re.sub(r'(\w+).reverse\(\)', r'\1[::-1]', code)
        code = re.sub(r'(\w+).slice\((\d+)\)', r'\1[\2:]', code)
        code = re.sub(r'(\w+).split\(""\)', r'list(\1)', code)
        return code
    f1 = match1(js, r'g.sig\|\|(\w+)\(g.s\)')
    f1def = match1(js, r'(function %s\(\w+\)\{[^\{]+\})' % f1)
    code = tr_js(f1def)
    f2 = match1(f1def, r'(\w+)\(\w+,\d+\)')
    if f2 is not None:
        f2def = match1(js, r'(function %s\(\w+,\w+\)\{[^\{]+\})' % f2)
        code = code + 'global %s\n' % f2 + tr_js(f2def)
    code = code + 'sig=%s(s)' % f1
    exec(code, globals(), locals())
    return locals()['sig']
 def youtube_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False):
    """Downloads a YouTube video by its unique id.
    """
-    # Percent-encoding reserved characters, used as separators.
+    raw_video_info = get_content('http://www.youtube.com/get_video_info?video_id=%s' % id)
-    sepr = {
+    video_info = parse.parse_qs(raw_video_info)
-        '&': '%26',
+    
-        ',': '%2C',
+    if video_info['status'] == ['ok'] and ('use_cipher_signature' not in video_info or video_info['use_cipher_signature'] == ['False']):
-        '=': '%3D',
+        title = parse.unquote_plus(video_info['title'][0])
        stream_list = parse.parse_qs(raw_video_info)['url_encoded_fmt_stream_map'][0].split(',')
    else:
        # Parse video page when video_info is not usable.
        video_page = get_content('http://www.youtube.com/watch?v=%s' % id)
        ytplayer_config = json.loads(match1(video_page, r'ytplayer.config\s*=\s*([^\n]+);'))
        title = ytplayer_config['args']['title']
        stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
        html5player = ytplayer_config['assets']['js']
    streams = {
        parse.parse_qs(stream)['itag'][0] : parse.parse_qs(stream)
            for stream in stream_list
    }
-    # fmt_level = {'itag': level, ...}
+    for codec in yt_codecs:
-    # itag of a higher quality maps to a lower level number.
+        itag = str(codec['itag'])
-    # The highest quality has level number 0.
+        if itag in streams:
-    fmt_level = dict(
+            download_stream = streams[itag]
-        zip(
+            break
            [str(codec['itag'])
                for codec in
                    youtube_codecs],
            range(len(youtube_codecs))))
-    # {key1: value1, key2: value2, ...,
+    url = download_stream['url'][0]
-    #   'url_encoded_fmt_stream_map': [{'itag': '38', ...}, ...]
+    if 'sig' in download_stream:
-    # }
+        sig = download_stream['sig'][0]
-    return dict(
+    else:
-        [(lambda metadata:
+        js = get_content(html5player)
-            ['url_encoded_fmt_stream_map', (
+        sig = decipher(js, download_stream['s'][0])
-                lambda stream_map:
+    url = '%s&signature=%s' % (url, sig)
                    sorted(
                        [dict(
                            [subitem.split(sepr['='])
                                for subitem in
                                    item.split(sepr['&'])])
                            for item in
                                stream_map.split(sepr[','])],
                        key =
                            lambda stream:
                                fmt_level[stream['itag']]))
                (metadata[1])]
            if metadata[0] == 'url_encoded_fmt_stream_map'
            else metadata)
        (item.split('='))
            for item in
                raw_info.split('&')])
 def youtube_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
    raw_info = request.urlopen('http://www.youtube.com/get_video_info?video_id=%s' % id).read().decode('utf-8')
    video_info = parse_video_info(raw_info)
    if video_info['status'] == 'ok': # use get_video_info data
        title = parse.unquote(video_info['title'].replace('+', ' '))
        signature = video_info['url_encoded_fmt_stream_map'][0]['sig']
        url = parse.unquote(parse.unquote(video_info['url_encoded_fmt_stream_map'][0]['url'])) + "&signature=%s" % signature
    else: # parse video page when "embedding disabled by request"
        import json
        html = request.urlopen('http://www.youtube.com/watch?v=' + id).read().decode('utf-8')
        html = unescape_html(html)
        yt_player_config = json.loads(r1(r'ytplayer.config = ([^\n]+);', html))
        title = yt_player_config['args']['title']
        title = unicodize(title)
        title = parse.unquote(title)
        title = escape_file_path(title)
        for itag in [
            '38',
            '46', '37',
            '102', '45', '22',
            '84',
            '120',
            '85',
            '44', '35',
            '101', '100', '43', '34', '82', '18',
            '6', '83', '13', '5', '36', '17',
        ]:
            fmt = r1(r'([^,\"]*itag=' + itag + "[^,\"]*)", html)
            if fmt:
                url = r1(r'url=([^\\]+)', fmt)
                url = unicodize(url)
                url = parse.unquote(url)
                sig = r1(r'sig=([^\\]+)', fmt)
                url = url + '&signature=' + sig
                break
        try:
            url
        except NameError:
            url = r1(r'ytdns.ping\("([^"]+)"[^;]*;</script>', html)
            url = unicodize(url)
            url = re.sub(r'\\/', '/', url)
            url = re.sub(r'generate_204', 'videoplayback', url)
    type, ext, size = url_info(url)
@ -135,13 +103,14 @@ def youtube_download_by_id(id, title = None, output_dir = '.', merge = True, inf
    if not info_only:
        download_urls([url], title, ext, size, output_dir, merge = merge)
-def youtube_download(url, output_dir = '.', merge = True, info_only = False):
+def youtube_download(url, output_dir='.', merge=True, info_only=False):
-    id = r1(r'youtu.be/(.*)', url)
+    """Downloads YouTube videos by URL.
-    if not id:
+    """
-        id = parse.parse_qs(parse.urlparse(url).query)['v'][0]
+    
    id = match1(url, r'youtu.be/([^/]+)') or parse_query_param(url, 'v')
    assert id
-    youtube_download_by_id(id, None, output_dir, merge = merge, info_only = info_only)
+    youtube_download_by_id(id, title=None, output_dir=output_dir, merge=merge, info_only=info_only)
 site_info = "YouTube.com"
 download = youtube_download
--- a/src/you_get/version.py
+++ b/src/you_get/version.py
@ -1,6 +1,5 @@
 #!/usr/bin/env python
 __all__ = ['__version__', '__date__']
-__version__ = '0.3.12'
+__version__ = '0.3.21'
-__date__ = '2013-05-19'
+__date__ = '2013-08-17'
--- a/tests/test.py
+++ b/tests/test.py
@ -4,7 +4,7 @@
 import unittest
 from you_get import *
-from you_get.__main__ import url_to_module
+from you_get.downloader.__main__ import url_to_module
 def test_urls(urls):
    for url in urls:
@ -17,11 +17,6 @@ class YouGetTests(unittest.TestCase):
            "http://www.freesound.org/people/Corsica_S/sounds/184419/",
        ])
    def test_jpopsuki(self):
        test_urls([
            #"http://jpopsuki.tv/video/Dragon-Ash---Run-to-the-Sun/8ad7aec604badd0b0798cd999b63ae17",
        ])
    def test_mixcloud(self):
        test_urls([
            "http://www.mixcloud.com/beatbopz/beat-bopz-disco-mix/",
--- a/tests/test_common.py
+++ b/tests/test_common.py
@ -0,0 +1,11 @@
 #!/usr/bin/env python
 import unittest
 from you_get import *
 class TestCommon(unittest.TestCase):
    def test_match1(self):
        self.assertEqual(match1('http://youtu.be/1234567890A', r'youtu.be/([^/]+)'), '1234567890A')
        self.assertEqual(match1('http://youtu.be/1234567890A', r'youtu.be/([^/]+)', r'youtu.(\w+)'), ['1234567890A', 'be'])
--- a/9
+++ b/9
@ -1,9 +1,10 @@
 #!/usr/bin/env python3
 import os, sys
-sys.path.insert(0, os.path.join((os.path.dirname(os.path.realpath(__file__))), "src"))
+__path__ = os.path.dirname(os.path.realpath(__file__))
 __srcdir__ = 'src'
 sys.path.insert(1, os.path.join(__path__, __srcdir__))
 from you_get.downloader import main
-from you_get import *
+if __name__ == '__main__':
 if __name__ == "__main__":
    main()
--- a/you-get.json
+++ b/you-get.json
@ -31,6 +31,6 @@
        ],
    "console_scripts": [
-        "you-get = you_get.__main__:main"
+        "you-get = you_get.downloader.__main__:main"
        ]
 }