Merge pull request #11 from soimort/master

update
2025-02-03 00:33:58 +03:00 · 2013-08-28 05:38:34 -07:00 · 2013-08-28 05:38:34 -07:00 · 6ec99038e0
commit 6ec99038e0
parent 52665f342c 6f77174f34
37 changed files with 708 additions and 287 deletions
--- a/.gitignore
+++ b/.gitignore
@ -11,6 +11,7 @@ _*/
 *.3gp
 *.asf
 *.flv
+*.lrc
 *.mkv
 *.mp3
 *.mp4
--- a/CHANGELOG.txt
+++ b/CHANGELOG.txt
@ -1,6 +1,93 @@
 Changelog
 =========

+0.3.21
+------
+
+*Date: 2013-08-17*
+
+* Fix issues for:
+    - YouTube
+    - YinYueTai
+    - pan.baidu.com
+
+0.3.20
+------
+
+*Date: 2013-08-16*
+
+* Add support for:
+    - eHow
+    - Khan Academy
+    - TED
+    - 5sing
+* Fix issues for:
+    - Tudou
+
+0.3.18
+------
+
+*Date: 2013-07-19*
+
+* Fix issues for:
+    - Dailymotion
+    - Youku
+    - Sina
+    - AcFun
+    - bilibili
+
+0.3.17
+------
+
+*Date: 2013-07-12*
+
+* Fix issues for:
+    - YouTube
+    - 163
+    - bilibili
+* Code cleanup.
+
+0.3.16
+------
+
+*Date: 2013-06-28*
+
+* Fix issues for:
+    - YouTube
+    - Sohu
+    - Google+ (enable HTTPS proxy)
+
+0.3.15
+------
+
+*Date: 2013-06-21*
+
+* Add support for:
+    - Instagram
+
+0.3.14
+------
+
+*Date: 2013-06-14*
+
+* Add support for:
+    - Alive.in.th
+* Remove support of:
+    - JPopsuki
+* Fix issues for:
+    - AcFun
+    - iQIYI
+
+0.3.13
+------
+
+*Date: 2013-06-07*
+
+* Add support for:
+    - Baidu Wangpan (video only)
+* Fix issue for:
+    - Google+
+
 0.3.12
 ------

@ -86,7 +173,7 @@ Changelog
 * Add support for:
    - Douban
    - MioMio
-* Fix issue for:
+* Fix issues for:
    - Tudou
    - Vimeo

--- a/README.md
+++ b/README.md
@ -17,15 +17,18 @@ Fork me on GitHub: <https://github.com/soimort/you-get>
 * Coursera <https://www.coursera.org>
 * Blip <http://blip.tv>
 * Dailymotion <http://dailymotion.com>
+* eHow <http://www.ehow.com>
 * Facebook <http://facebook.com>
 * Google+ <http://plus.google.com>
 * Google Drive <http://docs.google.com>
+* Khan Academy <http://www.khanacademy.org>
+* TED <http://www.ted.com>
 * Tumblr <http://www.tumblr.com>
 * Vine <http://vine.co>
+* Instagram <http://instagram.com>
 * SoundCloud <http://soundcloud.com>
 * Mixcloud <http://www.mixcloud.com>
 * Freesound <http://www.freesound.org>
-* JPopsuki <http://jpopsuki.tv>
 * VID48 <http://vid48.com>
 * Niconico (ニコニコ動画) <http://www.nicovideo.jp>
 * Youku (优酷) <http://www.youku.com>
@ -47,8 +50,11 @@ Fork me on GitHub: <https://github.com/soimort/you-get>
 * Sohu (搜狐视频) <http://tv.sohu.com>
 * 56 (56网) <http://www.56.com>
 * Xiami (虾米) <http://www.xiami.com>
-* Baidu (百度音乐) <http://music.baidu.com>
+* 5sing <http://www.5sing.com>
+* Baidu Music (百度音乐) <http://music.baidu.com>
+* Baidu Wangpan (百度网盘) <http://pan.baidu.com>
 * SongTaste <http://www.songtaste.com>
+* Alive.in.th <http://alive.in.th>

 ## Dependencies

@ -233,15 +239,18 @@ You-Get基于优酷下载脚本[iambus/youku-lixian](https://github.com/iambus/y
 * Coursera <https://www.coursera.org>
 * Blip <http://blip.tv>
 * Dailymotion <http://dailymotion.com>
+* eHow <http://www.ehow.com>
 * Facebook <http://facebook.com>
 * Google+ <http://plus.google.com>
 * Google Drive <http://docs.google.com>
+* Khan Academy <http://www.khanacademy.org>
+* TED <http://www.ted.com>
 * Tumblr <http://www.tumblr.com>
 * Vine <http://vine.co>
+* Instagram <http://instagram.com>
 * SoundCloud <http://soundcloud.com>
 * Mixcloud <http://www.mixcloud.com>
 * Freesound <http://www.freesound.org>
-* JPopsuki <http://jpopsuki.tv>
 * VID48 <http://vid48.com>
 * NICONICO动画 <http://www.nicovideo.jp>
 * 优酷 <http://www.youku.com>
@ -263,8 +272,11 @@ You-Get基于优酷下载脚本[iambus/youku-lixian](https://github.com/iambus/y
 * 搜狐视频 <http://tv.sohu.com>
 * 56网 <http://www.56.com>
 * 虾米 <http://www.xiami.com>
+* 5sing <http://www.5sing.com>
 * 百度音乐 <http://music.baidu.com>
+* 百度网盘 <http://pan.baidu.com>
 * SongTaste <http://www.songtaste.com>
+* Alive.in.th <http://alive.in.th>

 ## 依赖

--- a/README.txt
+++ b/README.txt
@ -20,15 +20,18 @@ Supported Sites (As of Now)
 * Coursera https://www.coursera.org
 * Blip http://blip.tv
 * Dailymotion http://dailymotion.com
+* eHow http://www.ehow.com
 * Facebook http://facebook.com
 * Google+ http://plus.google.com
 * Google Drive http://docs.google.com
+* Khan Academy http://www.khanacademy.org
+* TED http://www.ted.com
 * Tumblr http://www.tumblr.com
 * Vine http://vine.co
+* Instagram http://instagram.com
 * SoundCloud http://soundcloud.com
 * Mixcloud http://www.mixcloud.com
 * Freesound http://www.freesound.org
-* JPopsuki http://jpopsuki.tv
 * VID48 http://vid48.com
 * Niconico (ニコニコ動画) http://www.nicovideo.jp
 * Youku (优酷) http://www.youku.com
@ -50,8 +53,11 @@ Supported Sites (As of Now)
 * Sohu (搜狐视频) http://tv.sohu.com
 * 56 (56网) http://www.56.com
 * Xiami (虾米) http://www.xiami.com
-* Baidu (百度音乐) http://music.baidu.com
+* 5sing http://www.5sing.com
+* Baidu Music (百度音乐) http://music.baidu.com
+* Baidu Wangpan (百度网盘) http://pan.baidu.com
 * SongTaste http://www.songtaste.com
+* Alive.in.th http://alive.in.th

 Dependencies
 ------------
--- a/src/you_get/init.py
+++ b/src/you_get/init.py
@ -1,9 +1,9 @@
 #!/usr/bin/env python

-from .processor import *
-
-from .downloader import *
-
-from .version import *
 from .common import *
-from .__main__ import *
+from .version import *
+
+# Easy import
+#from .cli_wrapper.converter import *
+#from .cli_wrapper.player import *
+from .downloader import *
--- a/src/you_get/common.py
+++ b/src/you_get/common.py
@ -7,6 +7,7 @@ import os
 import re
 import sys
 from urllib import request, parse
+import platform

 from .version import __version__

@ -33,20 +34,63 @@ def tr(s):
    except:
        return str(s.encode('utf-8'))[2:-1]

+# DEPRECATED in favor of match1()
 def r1(pattern, text):
    m = re.search(pattern, text)
    if m:
        return m.group(1)

+# DEPRECATED in favor of match1()
 def r1_of(patterns, text):
    for p in patterns:
        x = r1(p, text)
        if x:
            return x

+def match1(text, *patterns):
+    """Scans through a string for substrings matched some patterns (first-subgroups only).
+    
+    Args:
+        text: A string to be scanned.
+        patterns: Arbitrary number of regex patterns.
+        
+    Returns:
+        When only one pattern is given, returns a string (None if no match found).
+        When more than one pattern are given, returns a list of strings ([] if no match found).
+    """
+    
+    if len(patterns) == 1:
+        pattern = patterns[0]
+        match = re.search(pattern, text)
+        if match:
+            return match.group(1)
+        else:
+            return None
+    else:
+        ret = []
+        for pattern in patterns:
+            match = re.search(pattern, text)
+            if match:
+                ret.append(match.group(1))
+        return ret
+
+def parse_query_param(url, param):
+    """Parses the query string of a URL and returns the value of a parameter.
+    
+    Args:
+        url: A URL.
+        param: A string representing the name of the parameter.
+        
+    Returns:
+        The value of the parameter.
+    """
+    
+    return parse.parse_qs(parse.urlparse(url).query)[param][0]
+
 def unicodize(text):
    return re.sub(r'\\u([0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])', lambda x: chr(int(x.group(0)[2:], 16)), text)

+# DEPRECATED in favor of filenameable()
 def escape_file_path(path):
    path = path.replace('/', '-')
    path = path.replace('\\', '-')
@ -54,23 +98,57 @@ def escape_file_path(path):
    path = path.replace('?', '-')
    return path

+def filenameable(text):
+    """Converts a string to a legal filename through various OSes.
+    """
+    # All POSIX systems
+    text = text.translate({
+        0: None,
+        ord('/'): '-',
+    })
+    if platform.system() == 'Darwin': # For Mac OS
+        text = text.translate({
+            ord(':'): '-',
+        })
+    elif platform.system() == 'Windows': # For Windows
+        text = text.translate({
+            ord(':'): '-',
+            ord('*'): '-',
+            ord('?'): '-',
+            ord('\\'): '-',
+            ord('\"'): '\'',
+            ord('<'): '-',
+            ord('>'): '-',
+            ord('|'): '-',
+            ord('+'): '-',
+            ord('['): '(',
+            ord(']'): ')',
+        })
+    return text
+
 def unescape_html(html):
    from html import parser
    html = parser.HTMLParser().unescape(html)
    html = re.sub(r'&#(\d+);', lambda x: chr(int(x.group(1))), html)
    return html

-def ungzip(s):
+def ungzip(data):
+    """Decompresses data for Content-Encoding: gzip.
+    """
    from io import BytesIO
    import gzip
-    buffer = BytesIO(s)
-    f = gzip.GzipFile(fileobj = buffer)
+    buffer = BytesIO(data)
+    f = gzip.GzipFile(fileobj=buffer)
    return f.read()

-def undeflate(s):
+def undeflate(data):
+    """Decompresses data for Content-Encoding: deflate.
+    (the zlib compression is used.)
+    """
    import zlib
-    return zlib.decompress(s, -zlib.MAX_WBITS)
+    return zlib.decompress(data, -zlib.MAX_WBITS)

+# DEPRECATED in favor of get_content()
 def get_response(url, faker = False):
    if faker:
        response = request.urlopen(request.Request(url, headers = fake_headers), None)
@ -85,10 +163,12 @@ def get_response(url, faker = False):
    response.data = data
    return response

+# DEPRECATED in favor of get_content()
 def get_html(url, encoding = None, faker = False):
    content = get_response(url, faker).data
    return str(content, 'utf-8', 'ignore')

+# DEPRECATED in favor of get_content()
 def get_decoded_html(url, faker = False):
    response = get_response(url, faker)
    data = response.data
@ -98,6 +178,38 @@ def get_decoded_html(url, faker = False):
    else:
        return data

+def get_content(url, headers={}, decoded=True):
+    """Gets the content of a URL via sending a HTTP GET request.
+    
+    Args:
+        url: A URL.
+        headers: Request headers used by the client.
+        decoded: Whether decode the response body using UTF-8 or the charset specified in Content-Type.
+        
+    Returns:
+        The content as a string.
+    """
+    
+    response = request.urlopen(request.Request(url, headers=headers))
+    data = response.read()
+    
+    # Handle HTTP compression for gzip and deflate (zlib)
+    content_encoding = response.getheader('Content-Encoding')
+    if content_encoding == 'gzip':
+        data = ungzip(data)
+    elif content_encoding == 'deflate':
+        data = undeflate(data)
+    
+    # Decode the response body
+    if decoded:
+        charset = match1(response.getheader('Content-Type'), r'charset=([\w-]+)')
+        if charset is not None:
+            data = data.decode(charset)
+        else:
+            data = data.decode('utf-8')
+    
+    return data
+
 def url_size(url, faker = False):
    if faker:
        response = request.urlopen(request.Request(url, headers = fake_headers), None)
@ -136,7 +248,7 @@ def url_info(url, faker = False):
        type = None
        if headers['content-disposition']:
            try:
-                filename = parse.unquote(r1(r'filename="?(.+)"?', headers['content-disposition']))
+                filename = parse.unquote(r1(r'filename="?([^"]+)"?', headers['content-disposition']))
                if len(filename.split('.')) > 1:
                    ext = filename.split('.')[-1]
                else:
@ -388,7 +500,9 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None,
            import sys
            traceback.print_exc(file = sys.stdout)
            pass
-    title = escape_file_path(title)
+    
+    title = filenameable(title)
+    
    filename = '%s.%s' % (title, ext)
    filepath = os.path.join(output_dir, filename)
    if total_size:
@ -437,19 +551,18 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None,
            
        elif ext == 'mp4':
            try:
-                from .processor.join_mp4 import concat_mp4
-                concat_mp4(parts, os.path.join(output_dir, title + '.mp4'))
-                for part in parts:
-                    os.remove(part)
-            except:
                from .processor.ffmpeg import has_ffmpeg_installed
                if has_ffmpeg_installed():
                    from .processor.ffmpeg import ffmpeg_concat_mp4_to_mp4
                    ffmpeg_concat_mp4_to_mp4(parts, os.path.join(output_dir, title + '.mp4'))
+                else:
+                    from .processor.join_mp4 import concat_mp4
+                    concat_mp4(parts, os.path.join(output_dir, title + '.mp4'))
+            except:
+                raise
+            else:
                for part in parts:
                    os.remove(part)
-                else:
-                    print('No ffmpeg is found. Merging aborted.')
            
        else:
            print("Can't merge %s files" % ext)
@ -463,7 +576,9 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir = '.', refer
        return
    
    assert ext in ('ts')
-    title = escape_file_path(title)
+    
+    title = filenameable(title)
+    
    filename = '%s.%s' % (title, 'ts')
    filepath = os.path.join(output_dir, filename)
    if total_size:
@ -597,9 +712,7 @@ def set_http_proxy(proxy):
    elif proxy == '': # Don't use any proxy
        proxy_support = request.ProxyHandler({})
    else: # Use proxy
-        if not proxy.startswith('http://'):
-            proxy = 'http://' + proxy
-        proxy_support = request.ProxyHandler({'http': '%s' % proxy})
+        proxy_support = request.ProxyHandler({'http': '%s' % proxy, 'https': '%s' % proxy})
    opener = request.build_opener(proxy_support)
    request.install_opener(opener)

@ -615,8 +728,18 @@ def download_main(download, download_playlist, urls, playlist, output_dir, merge
        else:
            download(url, output_dir = output_dir, merge = merge, info_only = info_only)

+def get_version():
+    try:
+        import subprocess
+        real_dir = os.path.dirname(os.path.realpath(__file__))
+        git_hash = subprocess.Popen(['git', 'rev-parse', '--short', 'HEAD'], cwd=real_dir, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL).stdout.read().decode('utf-8').strip()
+        assert git_hash
+        return '%s-%s' % (__version__, git_hash)
+    except:
+        return __version__
+
 def script_main(script_name, download, download_playlist = None):
-    version = 'You-Get %s, a video downloader.' % __version__
+    version = 'You-Get %s, a video downloader.' % get_version()
    help = 'Usage: %s [OPTION]... [URL]...\n' % script_name
    help += '''\nStartup options:
    -V | --version                           Display the version and exit.
--- a/src/you_get/downloader/init.py
+++ b/src/you_get/downloader/init.py
@ -1,6 +1,7 @@
 #!/usr/bin/env python

 from .acfun import *
+from .alive import *
 from .baidu import *
 from .bilibili import *
 from .blip import *
@ -8,13 +9,15 @@ from .cntv import *
 from .coursera import *
 from .dailymotion import *
 from .douban import *
+from .ehow import *
 from .facebook import *
+from .fivesing import *
 from .freesound import *
 from .google import *
 from .ifeng import *
+from .instagram import *
 from .iqiyi import *
 from .joy import *
-from .jpopsuki import *
 from .ku6 import *
 from .miomio import *
 from .mixcloud import *
@ -36,3 +39,7 @@ from .xiami import *
 from .yinyuetai import *
 from .youku import *
 from .youtube import *
+from .ted import *
+from .khan import *
+
+from .__main__ import *
--- a/src/you_get/downloader/main.py
+++ b/src/you_get/downloader/main.py
@ -1,9 +1,8 @@
 #!/usr/bin/env python
-
 __all__ = ['main', 'any_download', 'any_download_playlist']

-from .downloader import *
-from .common import *
+from ..downloader import *
+from ..common import *

 def url_to_module(url):
    site = r1(r'http://([^/]+)/', url)
@ -20,6 +19,7 @@ def url_to_module(url):
    downloads = {
        '163': netease,
        '56': w56,
+        '5sing': fivesing,
        'acfun': acfun,
        'baidu': baidu,
        'bilibili': bilibili,
@ -28,14 +28,16 @@ def url_to_module(url):
        'coursera': coursera,
        'dailymotion': dailymotion,
        'douban': douban,
+        'ehow': ehow,
        'facebook': facebook,
        'freesound': freesound,
        'google': google,
        'iask': sina,
        'ifeng': ifeng,
+        'in': alive,
+        'instagram': instagram,
        'iqiyi': iqiyi,
        'joy': joy,
-        'jpopsuki': jpopsuki,
        'kankanews': bilibili,
        'ku6': ku6,
        'miomio': miomio,
@ -48,6 +50,7 @@ def url_to_module(url):
        'sohu': sohu,
        'songtaste':songtaste,
        'soundcloud': soundcloud,
+        'ted': ted,
        'tudou': tudou,
        'tumblr': tumblr,
        'vid48': vid48,
@ -58,6 +61,7 @@ def url_to_module(url):
        'youku': youku,
        'youtu': youtube,
        'youtube': youtube,
+        'khanacademy': khan,
        #TODO
    }
    if k in downloads:
--- a/src/you_get/downloader/acfun.py
+++ b/src/you_get/downloader/acfun.py
@ -5,7 +5,7 @@ __all__ = ['acfun_download']
 from ..common import *

 from .qq import qq_download_by_id
-from .sina import sina_download_by_id
+from .sina import sina_download_by_vid
 from .tudou import tudou_download_by_iid
 from .youku import youku_download_by_id

@ -16,11 +16,11 @@ def get_srt_json(id):
    return get_html(url)

 def acfun_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
-    info = json.loads(get_html('http://www.acfun.tv/api/getVideoByID.aspx?vid=' + id))
+    info = json.loads(get_html('http://wenzhou.acfun.tv/api/getVideoByID.aspx?vid=' + id))
    t = info['vtype']
    vid = info['vid']
    if t == 'sina':
-        sina_download_by_id(vid, title, output_dir = output_dir, merge = merge, info_only = info_only)
+        sina_download_by_vid(vid, title, output_dir = output_dir, merge = merge, info_only = info_only)
    elif t == 'youku':
        youku_download_by_id(vid, title, output_dir = output_dir, merge = merge, info_only = info_only)
    elif t == 'tudou':
@ -37,7 +37,7 @@ def acfun_download_by_id(id, title = None, output_dir = '.', merge = True, info_
            x.write(cmt)

 def acfun_download(url, output_dir = '.', merge = True, info_only = False):
-    assert re.match(r'http://www.acfun.tv/v/ac(\d+)', url)
+    assert re.match(r'http://[^\.]+.acfun.tv/v/ac(\d+)', url)
    html = get_html(url)
    
    title = r1(r'<h1 id="title-article" class="title"[^<>]*>([^<>]+)<', html)
@ -49,7 +49,7 @@ def acfun_download(url, output_dir = '.', merge = True, info_only = False):
    id = r1(r"\[Video\](\d+)\[/Video\]", html) or r1(r"\[video\](\d+)\[/video\]", html)
    if not id:
        id = r1(r"src=\"/newflvplayer/player.*id=(\d+)", html)
-        sina_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
+        sina_download_by_vid(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
    else:
        acfun_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only)

--- a/src/you_get/downloader/alive.py
+++ b/src/you_get/downloader/alive.py
@ -0,0 +1,21 @@
+#!/usr/bin/env python
+
+__all__ = ['alive_download']
+
+from ..common import *
+
+def alive_download(url, output_dir = '.', merge = True, info_only = False):
+    html = get_html(url)
+    
+    title = r1(r'<meta property="og:title" content="([^"]+)"', html)
+    
+    url = r1(r'file: "(http://alive[^"]+)"', html)
+    type, ext, size = url_info(url)
+    
+    print_info(site_info, title, type, size)
+    if not info_only:
+        download_urls([url], title, ext, size, output_dir, merge = merge)
+
+site_info = "Alive.in.th"
+download = alive_download
+download_playlist = playlist_not_supported('alive')
--- a/src/you_get/downloader/baidu.py
+++ b/src/you_get/downloader/baidu.py
@ -68,12 +68,25 @@ def baidu_download_album(aid, output_dir = '.', merge = True, info_only = False)
        track_nr += 1

 def baidu_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False):
+    if re.match(r'http://pan.baidu.com', url):
+        html = get_html(url)
        
-    if re.match(r'http://music.baidu.com/album/\d+', url):
+        title = r1(r'server_filename="([^"]+)"', html)
+        if len(title.split('.')) > 1:
+            title = ".".join(title.split('.')[:-1])
+        
+        real_url = r1(r'\\"dlink\\":\\"([^"]*)\\"', html).replace('\\\\/', '/')
+        type, ext, size = url_info(real_url, faker = True)
+        
+        print_info(site_info, title, ext, size)
+        if not info_only:
+            download_urls([real_url], title, ext, size, output_dir, merge = merge)
+    
+    elif re.match(r'http://music.baidu.com/album/\d+', url):
        id = r1(r'http://music.baidu.com/album/(\d+)', url)
        baidu_download_album(id, output_dir, merge, info_only)

-    if re.match('http://music.baidu.com/song/\d+', url):
+    elif re.match('http://music.baidu.com/song/\d+', url):
        id = r1(r'http://music.baidu.com/song/(\d+)', url)
        baidu_download_song(id, output_dir, merge, info_only)

--- a/src/you_get/downloader/bilibili.py
+++ b/src/you_get/downloader/bilibili.py
@ -4,7 +4,7 @@ __all__ = ['bilibili_download']

 from ..common import *

-from .sina import sina_download_by_id
+from .sina import sina_download_by_vid
 from .tudou import tudou_download_by_id
 from .youku import youku_download_by_id

@ -64,7 +64,7 @@ def bilibili_download_by_cid(id, title, output_dir = '.', merge = True, info_onl
    elif re.search(r'/mp4/', urls[0]):
        type = 'mp4'
    else:
-        raise NotImplementedError(urls[0])
+        type = 'flv'
    
    size = 0
    for url in urls:
@ -83,7 +83,7 @@ def bilibili_download(url, output_dir = '.', merge = True, info_only = False):
    title = unescape_html(title)
    title = escape_file_path(title)
    
-    flashvars = r1_of([r'flashvars="([^"]+)"', r'"https://secure.bilibili.tv/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
+    flashvars = r1_of([r'player_params=\'(cid=\d+)', r'flashvars="([^"]+)"', r'"https://secure.bilibili.tv/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
    assert flashvars
    t, id = flashvars.split('=', 1)
    id = id.split('&')[0]
--- a/src/you_get/downloader/dailymotion.py
+++ b/src/you_get/downloader/dailymotion.py
@ -5,16 +5,22 @@ __all__ = ['dailymotion_download']
 from ..common import *

 def dailymotion_download(url, output_dir = '.', merge = True, info_only = False):
-    html = get_html(url)
-    html = parse.unquote(html).replace('\/', '/')
+    """Downloads Dailymotion videos by URL.
+    """
    
-    title = r1(r'meta property="og:title" content="([^"]+)"', html)
-    title = escape_file_path(title)
+    id = match1(url, r'/video/([^\?]+)')
+    embed_url = 'http://www.dailymotion.com/embed/video/%s' % id
+    html = get_content(embed_url)
    
-    for quality in ['hd720URL', 'hqURL', 'sdURL']:
-        real_url = r1(r',\"' + quality + '\"\:\"([^\"]+?)\",', html)
+    info = json.loads(match1(html, r'var\s*info\s*=\s*({.+}),\n'))
+    
+    title = info['title']
+    
+    for quality in ['stream_h264_hd1080_url', 'stream_h264_hd_url', 'stream_h264_hq_url', 'stream_h264_url', 'stream_h264_ld_url']:
+        real_url = info[quality]
        if real_url:
            break
+    
    type, ext, size = url_info(real_url)
    
    print_info(site_info, title, type, size)
--- a/src/you_get/downloader/ehow.py
+++ b/src/you_get/downloader/ehow.py
@ -0,0 +1,38 @@
+#!/usr/bin/env python
+
+__all__ = ['ehow_download']
+
+from ..common import *
+
+def ehow_download(url, output_dir = '.', merge = True, info_only = False):
+	
+	assert re.search(r'http://www.ehow.com/video_', url), "URL you entered is not supported"
+
+	html = get_html(url)
+	contentid = r1(r'<meta name="contentid" scheme="DMINSTR2" content="([^"]+)" />', html)
+	vid = r1(r'"demand_ehow_videoid":"([^"]+)"', html)
+	assert vid
+
+	xml = get_html('http://www.ehow.com/services/video/series.xml?demand_ehow_videoid=%s' % vid)
+    
+	from xml.dom.minidom import parseString
+	doc = parseString(xml)
+	tab = doc.getElementsByTagName('related')[0].firstChild
+
+	for video in tab.childNodes:
+		if re.search(contentid, video.attributes['link'].value):
+			url = video.attributes['flv'].value
+			break
+
+	title = video.attributes['title'].value
+	assert title 
+
+	type, ext, size = url_info(url)
+	print_info(site_info, title, type, size)
+	
+	if not info_only:
+		download_urls([url], title, ext, size, output_dir, merge = merge)
+
+site_info = "ehow.com"
+download = ehow_download
+download_playlist = playlist_not_supported('ehow')
--- a/src/you_get/downloader/fivesing.py
+++ b/src/you_get/downloader/fivesing.py
@ -0,0 +1,18 @@
+#!/usr/bin/env python
+
+__all__ = ['fivesing_download']
+
+from ..common import *
+
+def fivesing_download(url, output_dir=".", merge=True, info_only=False):
+    html = get_html(url)
+    title = r1(r'var SongName   = "(.*)";', html)
+    url = r1(r'file: "(\S*)"', html)
+    songtype, ext, size = url_info(url)
+    print_info(site_info, title, songtype, size)
+    if not info_only:
+        download_urls([url], title, ext, size, output_dir)
+
+site_info = "5sing.com"
+download = fivesing_download
+download_playlist = playlist_not_supported("5sing")
--- a/src/you_get/downloader/google.py
+++ b/src/you_get/downloader/google.py
@ -6,6 +6,40 @@ from ..common import *

 import re

+# YouTube media encoding options, in descending quality order.
+# taken from http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs, 3/22/2013.
+youtube_codecs = [
+    {'itag': 38, 'container': 'MP4', 'video_resolution': '3072p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3.5-5', 'audio_encoding': 'AAC', 'audio_bitrate': '192'},
+    {'itag': 46, 'container': 'WebM', 'video_resolution': '1080p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'},
+    {'itag': 37, 'container': 'MP4', 'video_resolution': '1080p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3-4.3', 'audio_encoding': 'AAC', 'audio_bitrate': '192'},
+    {'itag': 102, 'container': '', 'video_resolution': '', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '2', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'},
+    {'itag': 45, 'container': 'WebM', 'video_resolution': '720p', 'video_encoding': '', 'video_profile': '', 'video_bitrate': '', 'audio_encoding': '', 'audio_bitrate': ''},
+    {'itag': 22, 'container': 'MP4', 'video_resolution': '720p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '2-2.9', 'audio_encoding': 'AAC', 'audio_bitrate': '192'},
+    {'itag': 84, 'container': 'MP4', 'video_resolution': '720p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '2-2.9', 'audio_encoding': 'AAC', 'audio_bitrate': '152'},
+    {'itag': 120, 'container': 'FLV', 'video_resolution': '720p', 'video_encoding': 'AVC', 'video_profile': 'Main@L3.1', 'video_bitrate': '2', 'audio_encoding': 'AAC', 'audio_bitrate': '128'},
+    {'itag': 85, 'container': 'MP4', 'video_resolution': '520p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '2-2.9', 'audio_encoding': 'AAC', 'audio_bitrate': '152'},
+    {'itag': 44, 'container': 'WebM', 'video_resolution': '480p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '1', 'audio_encoding': 'Vorbis', 'audio_bitrate': '128'},
+    {'itag': 35, 'container': 'FLV', 'video_resolution': '480p', 'video_encoding': 'H.264', 'video_profile': 'Main', 'video_bitrate': '0.8-1', 'audio_encoding': 'AAC', 'audio_bitrate': '128'},
+    {'itag': 101, 'container': 'WebM', 'video_resolution': '360p', 'video_encoding': 'VP8', 'video_profile': '3D', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'},
+    {'itag': 100, 'container': 'WebM', 'video_resolution': '360p', 'video_encoding': 'VP8', 'video_profile': '3D', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '128'},
+    {'itag': 43, 'container': 'WebM', 'video_resolution': '360p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '0.5', 'audio_encoding': 'Vorbis', 'audio_bitrate': '128'},
+    {'itag': 34, 'container': 'FLV', 'video_resolution': '360p', 'video_encoding': 'H.264', 'video_profile': 'Main', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '128'},
+    {'itag': 82, 'container': 'MP4', 'video_resolution': '360p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '96'},
+    {'itag': 18, 'container': 'MP4', 'video_resolution': '270p/360p', 'video_encoding': 'H.264', 'video_profile': 'Baseline', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '96'},
+    {'itag': 6, 'container': 'FLV', 'video_resolution': '270p', 'video_encoding': 'Sorenson H.263', 'video_profile': '', 'video_bitrate': '0.8', 'audio_encoding': 'MP3', 'audio_bitrate': '64'},
+    {'itag': 83, 'container': 'MP4', 'video_resolution': '240p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '96'},
+    {'itag': 13, 'container': '3GP', 'video_resolution': '', 'video_encoding': 'MPEG-4 Visual', 'video_profile': '', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': ''},
+    {'itag': 5, 'container': 'FLV', 'video_resolution': '240p', 'video_encoding': 'Sorenson H.263', 'video_profile': '', 'video_bitrate': '0.25', 'audio_encoding': 'MP3', 'audio_bitrate': '64'},
+    {'itag': 36, 'container': '3GP', 'video_resolution': '240p', 'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.17', 'audio_encoding': 'AAC', 'audio_bitrate': '38'},
+    {'itag': 17, 'container': '3GP', 'video_resolution': '144p', 'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.05', 'audio_encoding': 'AAC', 'audio_bitrate': '24'},
+]
+fmt_level = dict(
+    zip(
+        [str(codec['itag'])
+            for codec in
+                youtube_codecs],
+        range(len(youtube_codecs))))
+
 def google_download(url, output_dir = '.', merge = True, info_only = False):
    # Percent-encoding Unicode URL
    url = parse.quote(url, safe = ':/+%')
@ -14,25 +48,22 @@ def google_download(url, output_dir = '.', merge = True, info_only = False):
    
    if service == 'plus': # Google Plus
        
-        if re.search(r'plus.google.com/photos/\d+/albums/\d+/\d+', url):
-            oid = r1(r'plus.google.com/photos/(\d+)/albums/\d+/\d+', url)
-            pid = r1(r'plus.google.com/photos/\d+/albums/\d+/(\d+)', url)
-            
-        elif re.search(r'plus.google.com/photos/\d+/albums/posts/\d+', url):
-            oid = r1(r'plus.google.com/photos/(\d+)/albums/posts/\d+', url)
-            pid = r1(r'plus.google.com/photos/\d+/albums/posts/(\d+)', url)
-            
+        if not re.search(r'plus.google.com/photos/[^/]*/albums/\d+/\d+', url):
+            html = get_html(url)
+            url = r1(r'"(https://plus.google.com/photos/\d+/albums/\d+/\d+)', html)
+            title = r1(r'<title>([^<\n]+)', html)
        else:
-            html = get_html(url)
-            oid = r1(r'"https://plus.google.com/photos/(\d+)/albums/\d+/\d+', html)
-            pid = r1(r'"https://plus.google.com/photos/\d+/albums/\d+/(\d+)', html)
-        
-        url = "http://plus.google.com/photos/%s/albums/posts/%s?oid=%s&pid=%s" % (oid, pid, oid, pid)
+            title = None
        
        html = get_html(url)
-        real_url = unicodize(r1(r'"(https://video.googleusercontent.com/[^"]*)",\d\]', html).replace('\/', '/'))
+        real_urls = re.findall(r'\[(\d+),\d+,\d+,"([^"]+)"\]', html)
+        real_url = unicodize(sorted(real_urls, key = lambda x : fmt_level[x[0]])[0][1])
+        
+        if title is None:
+            post_url = r1(r'"(https://plus.google.com/\d+/posts/[^"]*)"', html)
+            post_html = get_html(post_url)
+            title = r1(r'<title>([^<\n]+)', post_html)
        
-        title = r1(r"\"([^\"]+)\",\"%s\"" % pid, html)
        if title is None:
            response = request.urlopen(request.Request(real_url))
            if response.headers['content-disposition']:
--- a/src/you_get/downloader/instagram.py
+++ b/src/you_get/downloader/instagram.py
@ -0,0 +1,22 @@
+#!/usr/bin/env python
+
+__all__ = ['instagram_download']
+
+from ..common import *
+
+def instagram_download(url, output_dir = '.', merge = True, info_only = False):
+    html = get_html(url)
+    
+    id = r1(r'instagram.com/p/([^/]+)/', html)
+    description = r1(r'<meta property="og:description" content="([^"]*)"', html)
+    title = description + " [" + id + "]"
+    url = r1(r'<meta property="og:video" content="([^"]*)"', html)
+    type, ext, size = url_info(url)
+    
+    print_info(site_info, title, type, size)
+    if not info_only:
+        download_urls([url], title, ext, size, output_dir, merge = merge)
+
+site_info = "Instagram.com"
+download = instagram_download
+download_playlist = playlist_not_supported('instagram')
--- a/src/you_get/downloader/iqiyi.py
+++ b/src/you_get/downloader/iqiyi.py
@ -6,13 +6,8 @@ from ..common import *

 def iqiyi_download(url, output_dir = '.', merge = True, info_only = False):
    html = get_html(url)
-    #title = r1(r'title\s*:\s*"([^"]+)"', html)
-    #title = unescape_html(title).decode('utf-8')
-    #videoId = r1(r'videoId\s*:\s*"([^"]+)"', html)
-    #pid = r1(r'pid\s*:\s*"([^"]+)"', html)
-    #ptype = r1(r'ptype\s*:\s*"([^"]+)"', html)
-    #info_url = 'http://cache.video.qiyi.com/v/%s/%s/%s/' % (videoId, pid, ptype)
-    videoId = r1(r'''["']videoId["'][:=]["']([^"']+)["']''', html)
+    
+    videoId = r1(r'data-player-videoid="([^"]+)"', html)
    assert videoId
    
    info_url = 'http://cache.video.qiyi.com/v/%s' % videoId
--- a/src/you_get/downloader/jpopsuki.py
+++ b/src/you_get/downloader/jpopsuki.py
@ -1,23 +0,0 @@
-#!/usr/bin/env python
-
-__all__ = ['jpopsuki_download']
-
-from ..common import *
-
-def jpopsuki_download(url, output_dir = '.', merge = True, info_only = False):
-    html = get_html(url)
-    
-    title = r1(r'<meta name="title" content="([^"]*)"', html)
-    if title.endswith(' - JPopsuki TV'):
-        title = title[:-14]
-    
-    url = "http://jpopsuki.tv%s" % r1(r'<source src="([^"]*)"', html)
-    type, ext, size = url_info(url)
-    
-    print_info(site_info, title, type, size)
-    if not info_only:
-        download_urls([url], title, ext, size, output_dir, merge = merge)
-
-site_info = "JPopsuki.tv"
-download = jpopsuki_download
-download_playlist = playlist_not_supported('jpopsuki')
--- a/src/you_get/downloader/khan.py
+++ b/src/you_get/downloader/khan.py
@ -0,0 +1,15 @@
+#!/usr/bin/env python
+
+__all__ = ['khan_download']
+
+from ..common import *
+from .youtube import youtube_download_by_id
+
+def khan_download(url, output_dir = '.', merge = True, info_only = False):
+    page = get_html(url)
+    id = page[page.find('src="https://www.youtube.com/embed/') + len('src="https://www.youtube.com/embed/') :page.find('?enablejsapi=1&wmode=transparent&modestbranding=1&rel=0&fs=1&showinfo=0')]
+    youtube_download_by_id(id, output_dir=output_dir, merge=merge, info_only=info_only)
+
+site_info = "khanacademy.org"
+download = khan_download
+download_playlist = playlist_not_supported('khan')
--- a/src/you_get/downloader/netease.py
+++ b/src/you_get/downloader/netease.py
@ -7,10 +7,13 @@ from ..common import *
 def netease_download(url, output_dir = '.', merge = True, info_only = False):
    html = get_decoded_html(url)
    
-    src = r1(r'<source src="([^"]+)"', html)
-    title = r1('movieDescription=\'([^\']+)\'', html)
+    title = r1('movieDescription=\'([^\']+)\'', html) or r1('<title>(.+)</title>', html)
+    if title[0] == ' ':
+        title = title[1:]
    
-    if title:
+    src = r1(r'<source src="([^"]+)"', html) or r1(r'<source type="[^"]+" src="([^"]+)"', html)
+    
+    if src:
        sd_url = r1(r'(.+)-mobile.mp4', src) + ".flv"
        _, _, sd_size = url_info(sd_url)
        
@ -24,10 +27,7 @@ def netease_download(url, output_dir = '.', merge = True, info_only = False):
        ext = 'flv'
        
    else:
-        title = r1('<title>(.+)</title>', html)
-        if title[0] == ' ':
-            title = title[1:]
-        url = r1(r'(.+)-list.m3u8', src) + ".mp4"
+        url = r1(r'["\'](.+)-list.m3u8["\']', html) + ".mp4"
        _, _, size = url_info(url)
        ext = 'mp4'
    
--- a/src/you_get/downloader/nicovideo.py
+++ b/src/you_get/downloader/nicovideo.py
@ -23,7 +23,7 @@ def nicovideo_download(url, output_dir = '.', merge = True, info_only = False):
    nicovideo_login(user, password)
    
    html = get_html(url) # necessary!
-    title = unicodize(r1(r'title:\s*\'(.*)\',', html))
+    title = unicodize(r1(r'<span class="videoHeaderTitle">([^<]+)</span>', html))
    
    api_html = get_html('http://www.nicovideo.jp/api/getflv?v=%s' % url.split('/')[-1])
    real_url = parse.unquote(r1(r'url=([^&]+)&', api_html))
--- a/src/you_get/downloader/pptv.py
+++ b/src/you_get/downloader/pptv.py
@ -9,18 +9,14 @@ import urllib
 import hashlib

 def pptv_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
-    xml = get_html('http://web-play.pptv.com/webplay3-151-%s.xml' % id)
+    xml = get_html('http://web-play.pptv.com/webplay3-0-%s.xml?type=web.fpp' % id)
    host = r1(r'<sh>([^<>]+)</sh>', xml)
-    port = 8080
-    st = r1(r'<st>([^<>]+)</st>', xml).encode('utf-8')
-    key = hashlib.md5(st).hexdigest() # FIXME: incorrect key
-    rids = re.findall(r'rid="([^"]+)"', xml)
+    key = r1(r'<key expire=[^<>]+>([^<>]+)</key>', xml)
    rid = r1(r'rid="([^"]+)"', xml)
    title = r1(r'nm="([^"]+)"', xml)
    pieces = re.findall('<sgm no="(\d+)".*fs="(\d+)"', xml)
    numbers, fs = zip(*pieces)
-    urls = ['http://%s:%s/%s/%s?key=%s' % (host, port, i, rid, key) for i in numbers]
-    urls = ['http://pptv.vod.lxdns.com/%s/%s?key=%s' % (i, rid, key) for i in numbers]
+    urls = ['http://%s/%s/%s?k=%s' % (host, i, rid, key) for i in numbers]
    total_size = sum(map(int, fs))
    assert rid.endswith('.mp4')
    
--- a/src/you_get/downloader/sina.py
+++ b/src/you_get/downloader/sina.py
@ -1,20 +1,22 @@
 #!/usr/bin/env python

-__all__ = ['sina_download', 'sina_download_by_id']
+__all__ = ['sina_download', 'sina_download_by_vid', 'sina_download_by_vkey']

 from ..common import *

-import re
-
 def video_info(id):
-    xml = get_decoded_html('http://v.iask.com/v_play.php?vid=%s' % id)
+    xml = get_content('http://v.iask.com/v_play.php?vid=%s' % id, decoded=True)
    urls = re.findall(r'<url>(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?</url>', xml)
-    name = r1(r'<vname>(?:<!\[CDATA\[)?(.+?)(?:\]\]>)?</vname>', xml)
-    vstr = r1(r'<vstr>(?:<!\[CDATA\[)?(.+?)(?:\]\]>)?</vstr>', xml)
+    name = match1(xml, r'<vname>(?:<!\[CDATA\[)?(.+?)(?:\]\]>)?</vname>')
+    vstr = match1(xml, r'<vstr>(?:<!\[CDATA\[)?(.+?)(?:\]\]>)?</vstr>')
    return urls, name, vstr

-def sina_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
-    urls, name, vstr = video_info(id)
+def sina_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only=False):
+    """Downloads a Sina video by its unique vid.
+    http://video.sina.com.cn/
+    """
+    
+    urls, name, vstr = video_info(vid)
    title = title or name
    assert title
    size = 0
@ -26,11 +28,36 @@ def sina_download_by_id(id, title = None, output_dir = '.', merge = True, info_o
    if not info_only:
        download_urls(urls, title, 'flv', size, output_dir = output_dir, merge = merge)

-def sina_download(url, output_dir = '.', merge = True, info_only = False):
-    id = r1(r'[^_]vid\s*:\s*\'([^\']+)\',', get_html(url)).split('|')[-1]
-    assert id
+def sina_download_by_vkey(vkey, title=None, output_dir='.', merge=True, info_only=False):
+    """Downloads a Sina video by its unique vkey.
+    http://video.sina.com/
+    """
    
-    sina_download_by_id(id, output_dir = output_dir, merge = merge, info_only = info_only)
+    url = 'http://video.sina.com/v/flvideo/%s_0.flv' % vkey
+    type, ext, size = url_info(url)
+    
+    print_info(site_info, title, 'flv', size)
+    if not info_only:
+        download_urls([url], title, 'flv', size, output_dir = output_dir, merge = merge)
+
+def sina_download(url, output_dir='.', merge=True, info_only=False):
+    """Downloads Sina videos by URL.
+    """
+    
+    vid = match1(url, r'vid=(\d+)')
+    if vid is None:
+        video_page = get_content(url)
+        vid = hd_vid = match1(video_page, r'hd_vid\s*:\s*\'([^\']+)\'')
+        if hd_vid == '0':
+            vids = match1(video_page, r'[^\w]vid\s*:\s*\'([^\']+)\'').split('|')
+            vid = vids[-1]
+    
+    if vid:
+        sina_download_by_vid(vid, output_dir=output_dir, merge=merge, info_only=info_only)
+    else:
+        vkey = match1(video_page, r'vkey\s*:\s*"([^"]+)"')
+        title = match1(video_page, r'title\s*:\s*"([^"]+)"')
+        sina_download_by_vkey(vkey, title=title, output_dir=output_dir, merge=merge, info_only=info_only)

 site_info = "Sina.com"
 download = sina_download
--- a/src/you_get/downloader/sohu.py
+++ b/src/you_get/downloader/sohu.py
@ -8,7 +8,7 @@ import json

 def real_url(host, prot, file, new):
    url = 'http://%s/?prot=%s&file=%s&new=%s' % (host, prot, file, new)
-    start, _, host, key, _, _ = get_html(url).split('|')
+    start, _, host, key = get_html(url).split('|')[:4]
    return '%s%s?key=%s' % (start[:-1], new, key)

 def sohu_download(url, output_dir = '.', merge = True, info_only = False):
--- a/src/you_get/downloader/ted.py
+++ b/src/you_get/downloader/ted.py
@ -0,0 +1,24 @@
+#!/usr/bin/env python
+
+__all__ = ['ted_download']
+
+from ..common import *
+
+def ted_download(url, output_dir = '.', merge = True, info_only = False):
+    page = get_html(url).split("\n")
+    for line in page:
+        if line.find("<title>") > -1:
+            title = line.replace("<title>", "").replace("</title>", "").replace("\t", "")
+            title = title[:title.find(' | ')]
+        if line.find("no-flash-video-download") > -1:
+            url = line.replace('<a id="no-flash-video-download" href="', "").replace(" ", "").replace("\t", "").replace(".mp4", "-480p-en.mp4")
+            url = url[:url.find('"')]
+            type, ext, size = url_info(url)
+            print_info(site_info, title, type, size)
+            if not info_only:
+                download_urls([url], title, ext, size, output_dir, merge=merge)
+            break
+
+site_info = "ted.com"
+download = ted_download
+download_playlist = playlist_not_supported('ted')
--- a/src/you_get/downloader/tudou.py
+++ b/src/you_get/downloader/tudou.py
@ -5,26 +5,31 @@ __all__ = ['tudou_download', 'tudou_download_playlist', 'tudou_download_by_id',
 from ..common import *

 def tudou_download_by_iid(iid, title, output_dir = '.', merge = True, info_only = False):
-    xml = get_html('http://v2.tudou.com/v?it=' + iid + '&st=1,2,3,4,99')
+    data = json.loads(get_decoded_html('http://www.tudou.com/outplay/goto/getItemSegs.action?iid=%s' % iid))
+    vids = []
+    for k in data:
+        if len(data[k]) == 1:
+            vids.append({"k": data[k][0]["k"], "size": data[k][0]["size"]})

+    temp = max(vids, key=lambda x:x["size"])
+    vid, size = temp["k"], temp["size"]
+
+    xml = get_html('http://ct.v2.tudou.com/f?id=%s' % vid)
    from xml.dom.minidom import parseString
    doc = parseString(xml)
-    title = title or doc.firstChild.getAttribute('tt') or doc.firstChild.getAttribute('title')
-    urls = [(int(n.getAttribute('brt')), n.firstChild.nodeValue.strip()) for n in doc.getElementsByTagName('f')]
+    url = [n.firstChild.nodeValue.strip() for n in doc.getElementsByTagName('f')][0]

-    url = max(urls, key = lambda x:x[0])[1]
-    assert 'f4v' in url
+    ext = r1(r'http://[\w.]*/(\w+)/[\w.]*', url)

-    type, ext, size = url_info(url)
-    
-    print_info(site_info, title, type, size)
+    print_info(site_info, title, ext, size)
    if not info_only:
-        #url_save(url, filepath, bar):
-        download_urls([url], title, ext, total_size = None, output_dir = output_dir, merge = merge)
+        download_urls([url], title, ext, size, output_dir = output_dir, merge = merge)

 def tudou_download_by_id(id, title, output_dir = '.', merge = True, info_only = False):     
    html = get_html('http://www.tudou.com/programs/view/%s/' % id)
+    
    iid = r1(r'iid\s*[:=]\s*(\S+)', html)    
+    title = r1(r'kw\s*[:=]\s*[\'\"]([^\']+?)[\'\"]', html)
    tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge, info_only = info_only)

 def tudou_download(url, output_dir = '.', merge = True, info_only = False):
--- a/src/you_get/downloader/tumblr.py
+++ b/src/you_get/downloader/tumblr.py
@ -10,7 +10,9 @@ def tumblr_download(url, output_dir = '.', merge = True, info_only = False):
    html = get_html(url)
    html = parse.unquote(html).replace('\/', '/')
    
-    title = unescape_html(r1(r'<meta property="og:title" content="([^"]*)" />', html))
+    title = unescape_html(r1(r'<meta property="og:title" content="([^"]*)" />', html) or
+        r1(r'<meta property="og:description" content="([^"]*)" />', html) or
+        r1(r'<title>([^<\n]*)', html)).replace('\n', '')
    real_url = r1(r'source src=\\x22([^\\]+)\\', html)
    if not real_url:
        real_url = r1(r'audio_file=([^&]+)&', html) + '?plead=please-dont-download-this-or-our-lawyers-wont-let-us-host-audio'
--- a/src/you_get/downloader/xiami.py
+++ b/src/you_get/downloader/xiami.py
@ -55,11 +55,14 @@ def xiami_download_song(sid, output_dir = '.', merge = True, info_only = False):
    if not ext:
        ext = 'mp3'
    
-    print_info(site_info, song_title, type, size)
+    print_info(site_info, song_title, ext, size)
    if not info_only:
        file_name = "%s - %s - %s" % (song_title, album_name, artist)
        download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True)
+        try:
            xiami_download_lyric(lrc_url, file_name, output_dir)
+        except:
+            pass

 def xiami_download_showcollect(cid, output_dir = '.', merge = True, info_only = False):
    html = get_html('http://www.xiami.com/song/showcollect/id/' + cid, faker = True)
@ -84,7 +87,10 @@ def xiami_download_showcollect(cid, output_dir = '.', merge = True, info_only =
        if not info_only:
            file_name = "%02d.%s - %s - %s" % (track_nr, song_title, artist, album_name)
            download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True)
+            try:
                xiami_download_lyric(lrc_url, file_name, output_dir)
+            except:
+                pass
        
        track_nr += 1

@ -112,7 +118,10 @@ def xiami_download_album(aid, output_dir = '.', merge = True, info_only = False)
        if not info_only:
            file_name = "%02d.%s" % (track_nr, song_title)
            download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True)
+            try:
                xiami_download_lyric(lrc_url, file_name, output_dir)
+            except:
+                pass
            if not pic_exist:
                xiami_download_pic(pic_url, 'cover', output_dir)
                pic_exist = True
@ -132,6 +141,10 @@ def xiami_download(url, output_dir = '.', stream_type = None, merge = True, info
        id = r1(r'http://www.xiami.com/song/(\d+)', url)
        xiami_download_song(id, output_dir, merge, info_only)
    
+    if re.match('http://www.xiami.com/song/detail/id/\d+', url):
+        id = r1(r'http://www.xiami.com/song/detail/id/(\d+)', url)
+        xiami_download_song(id, output_dir, merge, info_only)
+
 site_info = "Xiami.com"
 download = xiami_download
 download_playlist = playlist_not_supported("xiami")
--- a/src/you_get/downloader/yinyuetai.py
+++ b/src/you_get/downloader/yinyuetai.py
@ -20,10 +20,10 @@ def yinyuetai_download_by_id(id, title = None, output_dir = '.', merge = True, i
        download_urls([url], title, ext, size, output_dir, merge = merge)

 def yinyuetai_download(url, output_dir = '.', merge = True, info_only = False):
-    id = r1(r'http://www.yinyuetai.com/video/(\d+)$', url)
+    id = r1(r'http://\w+.yinyuetai.com/video/(\d+)$', url)
    assert id
    html = get_html(url, 'utf-8')
-    title = r1(r'<meta property="og:title" content="([^"]+)"/>', html)
+    title = r1(r'<meta property="og:title"\s+content="([^"]+)"/>', html)
    assert title
    title = parse.unquote(title)
    title = escape_file_path(title)
--- a/src/you_get/downloader/youku.py
+++ b/src/you_get/downloader/youku.py
@ -25,7 +25,7 @@ def find_video_id_from_url(url):
    return r1_of(patterns, url)

 def find_video_id_from_show_page(url):
-    return re.search(r'<div class="btnplay">.*href="([^"]+)"', get_html(url)).group(1)
+    return re.search(r'<a class="btnShow btnplay.*href="([^"]+)"', get_html(url)).group(1)

 def youku_url(url):
    id = find_video_id_from_url(url)
@ -61,7 +61,7 @@ def parse_video_title(url, page):

 def parse_playlist_title(url, page):
    if re.search(r'v_playlist', url):
-        # if we are playing a viedo from play list, the meta title might be incorrect
+        # if we are playing a video from play list, the meta title might be incorrect
        title = re.search(r'<title>([^<>]*)</title>', page).group(1)
    else:
        title = re.search(r'<meta name="title" content="([^"]*)"', page).group(1)
@ -80,7 +80,7 @@ def parse_page(url):
    return id2, title

 def get_info(videoId2):
-    return json.loads(get_html('http://v.youku.com/player/getPlayList/VideoIDS/' + videoId2))
+    return json.loads(get_html('http://v.youku.com/player/getPlayList/VideoIDS/' + videoId2 + '/timezone/+08/version/5/source/out/Sc/2'))
    
 def find_video(info, stream_type = None):
    #key = '%s%x' % (info['data'][0]['key2'], int(info['data'][0]['key1'], 16) ^ 0xA55AA5A5)
@ -120,28 +120,16 @@ def find_video(info, stream_type = None):
 def file_type_of_url(url):
    return str(re.search(r'/st/([^/]+)/', url).group(1))

-def youku_download_by_id(id2, title, output_dir = '.', stream_type = None, merge = True, info_only = False):
-    info = get_info(id2)
+def youku_download_by_id(id, title, output_dir = '.', stream_type = None, merge = True, info_only = False):
+    info = get_info(id)
    urls, sizes = zip(*find_video(info, stream_type))
    ext = file_type_of_url(urls[0])
    total_size = sum(sizes)
    
-    urls = url_locations(urls) # Use real (redirected) URLs for resuming of downloads
-    
    print_info(site_info, title, ext, total_size)
    if not info_only:
        download_urls(urls, title, ext, total_size, output_dir, merge = merge)

-def youku_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False):
-    if not youku_url(url):
-        youku_download_playlist(url, output_dir, merge, info_only)
-        return
-    
-    id2, title = parse_page(url)
-    title = title.replace('?', '-')
-    
-    youku_download_by_id(id2, title, output_dir, merge = merge, info_only = info_only)
-
 def parse_playlist_videos(html):
    return re.findall(r'id="A_(\w+)"', html)

@ -175,9 +163,9 @@ def parse_vplaylist(url):
    n = int(re.search(r'<span class="num">(\d+)</span>', get_html(url)).group(1))
    return ['http://v.youku.com/v_playlist/f%so0p%s.html' % (id, i) for i in range(n)]

-def youku_download_playlist(url, output_dir = '.', merge = True, info_only = False):
-    if re.match(r'http://www.youku.com/show_page/id_\w+.html', url):
-        url = find_video_id_from_show_page(url)
+def youku_download_playlist(url, output_dir='.', merge=True, info_only=False):
+    """Downloads a Youku playlist.
+    """
    
    if re.match(r'http://www.youku.com/playlist_show/id_\d+(?:_ascending_\d_mode_pic(?:_page_\d+)?)?.html', url):
        ids = parse_vplaylist(url)
@ -185,21 +173,36 @@ def youku_download_playlist(url, output_dir = '.', merge = True, info_only = Fal
        ids = parse_vplaylist(url)
    elif re.match(r'http://u.youku.com/user_playlist/pid_(\d+)_id_[\w=]+(?:_page_\d+)?.html', url):
        ids = parse_vplaylist(url)
-    else:
+    elif re.match(r'http://www.youku.com/show_page/id_\w+.html', url):
+        url = find_video_id_from_show_page(url)
        assert re.match(r'http://v.youku.com/v_show/id_([\w=]+).html', url), 'URL not supported as playlist'
        ids = parse_playlist(url)
+    else:
+        ids = []
+    assert ids != []
    
    title = parse_playlist_title(url, get_html(url))
-    title = title.replace('?', '-')
+    title = filenameable(title)
    output_dir = os.path.join(output_dir, title)
    
    for i, id in enumerate(ids):
-        try:
        print('Processing %s of %s videos...' % (i + 1, len(ids)))
-            youku_download(id, output_dir, merge = merge, info_only = info_only)
+        try:
+            id, title = parse_page(youku_url(id))
+            youku_download_by_id(id, title, output_dir=output_dir, merge=merge, info_only=info_only)
        except:
            continue

+def youku_download(url, output_dir='.', merge=True, info_only=False):
+    """Downloads Youku videos by URL.
+    """
+    
+    try:
+        youku_download_playlist(url, output_dir=output_dir, merge=merge, info_only=info_only)
+    except:
+        id, title = parse_page(url)
+        youku_download_by_id(id, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
+
 site_info = "Youku.com"
 download = youku_download
 download_playlist = youku_download_playlist
--- a/src/you_get/downloader/youtube.py
+++ b/src/you_get/downloader/youtube.py
@ -6,7 +6,7 @@ from ..common import *

 # YouTube media encoding options, in descending quality order.
 # taken from http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs, 3/22/2013.
-youtube_codecs = [
+yt_codecs = [
    {'itag': 38, 'container': 'MP4', 'video_resolution': '3072p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3.5-5', 'audio_encoding': 'AAC', 'audio_bitrate': '192'},
    {'itag': 46, 'container': 'WebM', 'video_resolution': '1080p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'},
    {'itag': 37, 'container': 'MP4', 'video_resolution': '1080p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3-4.3', 'audio_encoding': 'AAC', 'audio_bitrate': '192'},
@ -32,102 +32,70 @@ youtube_codecs = [
    {'itag': 17, 'container': '3GP', 'video_resolution': '144p', 'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.05', 'audio_encoding': 'AAC', 'audio_bitrate': '24'},
 ]

-def parse_video_info(raw_info):
-    """Parser for YouTube's get_video_info data.
-    Returns a dict, where 'url_encoded_fmt_stream_map' maps to a sorted list.
+def decipher(js, s):
+    def tr_js(code):
+        code = re.sub(r'function', r'def', code)
+        code = re.sub(r'\{', r':\n\t', code)
+        code = re.sub(r'\}', r'\n', code)
+        code = re.sub(r'var\s+', r'', code)
+        code = re.sub(r'(\w+).join\(""\)', r'"".join(\1)', code)
+        code = re.sub(r'(\w+).length', r'len(\1)', code)
+        code = re.sub(r'(\w+).reverse\(\)', r'\1[::-1]', code)
+        code = re.sub(r'(\w+).slice\((\d+)\)', r'\1[\2:]', code)
+        code = re.sub(r'(\w+).split\(""\)', r'list(\1)', code)
+        return code
+    
+    f1 = match1(js, r'g.sig\|\|(\w+)\(g.s\)')
+    f1def = match1(js, r'(function %s\(\w+\)\{[^\{]+\})' % f1)
+    code = tr_js(f1def)
+    f2 = match1(f1def, r'(\w+)\(\w+,\d+\)')
+    if f2 is not None:
+        f2def = match1(js, r'(function %s\(\w+,\w+\)\{[^\{]+\})' % f2)
+        code = code + 'global %s\n' % f2 + tr_js(f2def)
+    
+    code = code + 'sig=%s(s)' % f1
+    exec(code, globals(), locals())
+    return locals()['sig']
+
+def youtube_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False):
+    """Downloads a YouTube video by its unique id.
    """
    
-    # Percent-encoding reserved characters, used as separators.
-    sepr = {
-        '&': '%26',
-        ',': '%2C',
-        '=': '%3D',
+    raw_video_info = get_content('http://www.youtube.com/get_video_info?video_id=%s' % id)
+    video_info = parse.parse_qs(raw_video_info)
+    
+    if video_info['status'] == ['ok'] and ('use_cipher_signature' not in video_info or video_info['use_cipher_signature'] == ['False']):
+        title = parse.unquote_plus(video_info['title'][0])
+        stream_list = parse.parse_qs(raw_video_info)['url_encoded_fmt_stream_map'][0].split(',')
+        
+    else:
+        # Parse video page when video_info is not usable.
+        video_page = get_content('http://www.youtube.com/watch?v=%s' % id)
+        ytplayer_config = json.loads(match1(video_page, r'ytplayer.config\s*=\s*([^\n]+);'))
+        
+        title = ytplayer_config['args']['title']
+        stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
+        
+        html5player = ytplayer_config['assets']['js']
+    
+    streams = {
+        parse.parse_qs(stream)['itag'][0] : parse.parse_qs(stream)
+            for stream in stream_list
    }
    
-    # fmt_level = {'itag': level, ...}
-    # itag of a higher quality maps to a lower level number.
-    # The highest quality has level number 0.
-    fmt_level = dict(
-        zip(
-            [str(codec['itag'])
-                for codec in
-                    youtube_codecs],
-            range(len(youtube_codecs))))
-    
-    # {key1: value1, key2: value2, ...,
-    #   'url_encoded_fmt_stream_map': [{'itag': '38', ...}, ...]
-    # }
-    return dict(
-        [(lambda metadata:
-            ['url_encoded_fmt_stream_map', (
-                lambda stream_map:
-                    sorted(
-                        [dict(
-                            [subitem.split(sepr['='])
-                                for subitem in
-                                    item.split(sepr['&'])])
-                            for item in
-                                stream_map.split(sepr[','])],
-                        key =
-                            lambda stream:
-                                fmt_level[stream['itag']]))
-                (metadata[1])]
-            if metadata[0] == 'url_encoded_fmt_stream_map'
-            else metadata)
-        (item.split('='))
-            for item in
-                raw_info.split('&')])
-
-def youtube_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
-    
-    raw_info = request.urlopen('http://www.youtube.com/get_video_info?video_id=%s' % id).read().decode('utf-8')
-    
-    video_info = parse_video_info(raw_info)
-    
-    if video_info['status'] == 'ok': # use get_video_info data
-        
-        title = parse.unquote(video_info['title'].replace('+', ' '))
-        
-        signature = video_info['url_encoded_fmt_stream_map'][0]['sig']
-        url = parse.unquote(parse.unquote(video_info['url_encoded_fmt_stream_map'][0]['url'])) + "&signature=%s" % signature
-        
-    else: # parse video page when "embedding disabled by request"
-        
-        import json
-        html = request.urlopen('http://www.youtube.com/watch?v=' + id).read().decode('utf-8')
-        html = unescape_html(html)
-        yt_player_config = json.loads(r1(r'ytplayer.config = ([^\n]+);', html))
-        title = yt_player_config['args']['title']
-        title = unicodize(title)
-        title = parse.unquote(title)
-        title = escape_file_path(title)
-        
-        for itag in [
-            '38',
-            '46', '37',
-            '102', '45', '22',
-            '84',
-            '120',
-            '85',
-            '44', '35',
-            '101', '100', '43', '34', '82', '18',
-            '6', '83', '13', '5', '36', '17',
-        ]:
-            fmt = r1(r'([^,\"]*itag=' + itag + "[^,\"]*)", html)
-            if fmt:
-                url = r1(r'url=([^\\]+)', fmt)
-                url = unicodize(url)
-                url = parse.unquote(url)
-                sig = r1(r'sig=([^\\]+)', fmt)
-                url = url + '&signature=' + sig
+    for codec in yt_codecs:
+        itag = str(codec['itag'])
+        if itag in streams:
+            download_stream = streams[itag]
            break
-        try:
-            url
-        except NameError:
-            url = r1(r'ytdns.ping\("([^"]+)"[^;]*;</script>', html)
-            url = unicodize(url)
-            url = re.sub(r'\\/', '/', url)
-            url = re.sub(r'generate_204', 'videoplayback', url)
+    
+    url = download_stream['url'][0]
+    if 'sig' in download_stream:
+        sig = download_stream['sig'][0]
+    else:
+        js = get_content(html5player)
+        sig = decipher(js, download_stream['s'][0])
+    url = '%s&signature=%s' % (url, sig)
    
    type, ext, size = url_info(url)
    
@ -135,13 +103,14 @@ def youtube_download_by_id(id, title = None, output_dir = '.', merge = True, inf
    if not info_only:
        download_urls([url], title, ext, size, output_dir, merge = merge)

-def youtube_download(url, output_dir = '.', merge = True, info_only = False):
-    id = r1(r'youtu.be/(.*)', url)
-    if not id:
-        id = parse.parse_qs(parse.urlparse(url).query)['v'][0]
+def youtube_download(url, output_dir='.', merge=True, info_only=False):
+    """Downloads YouTube videos by URL.
+    """
+    
+    id = match1(url, r'youtu.be/([^/]+)') or parse_query_param(url, 'v')
    assert id
    
-    youtube_download_by_id(id, None, output_dir, merge = merge, info_only = info_only)
+    youtube_download_by_id(id, title=None, output_dir=output_dir, merge=merge, info_only=info_only)

 site_info = "YouTube.com"
 download = youtube_download
--- a/src/you_get/version.py
+++ b/src/you_get/version.py
@ -1,6 +1,5 @@
 #!/usr/bin/env python
-
 __all__ = ['__version__', '__date__']

-__version__ = '0.3.12'
-__date__ = '2013-05-19'
+__version__ = '0.3.21'
+__date__ = '2013-08-17'
--- a/tests/test.py
+++ b/tests/test.py
@ -4,7 +4,7 @@
 import unittest

 from you_get import *
-from you_get.__main__ import url_to_module
+from you_get.downloader.__main__ import url_to_module

 def test_urls(urls):
    for url in urls:
@ -17,11 +17,6 @@ class YouGetTests(unittest.TestCase):
            "http://www.freesound.org/people/Corsica_S/sounds/184419/",
        ])
        
-    def test_jpopsuki(self):
-        test_urls([
-            #"http://jpopsuki.tv/video/Dragon-Ash---Run-to-the-Sun/8ad7aec604badd0b0798cd999b63ae17",
-        ])
-        
    def test_mixcloud(self):
        test_urls([
            "http://www.mixcloud.com/beatbopz/beat-bopz-disco-mix/",
--- a/tests/test_common.py
+++ b/tests/test_common.py
@ -0,0 +1,11 @@
+#!/usr/bin/env python
+
+import unittest
+
+from you_get import *
+
+class TestCommon(unittest.TestCase):
+    
+    def test_match1(self):
+        self.assertEqual(match1('http://youtu.be/1234567890A', r'youtu.be/([^/]+)'), '1234567890A')
+        self.assertEqual(match1('http://youtu.be/1234567890A', r'youtu.be/([^/]+)', r'youtu.(\w+)'), ['1234567890A', 'be'])
--- a/9
+++ b/9
@ -1,9 +1,10 @@
 #!/usr/bin/env python3

 import os, sys
-sys.path.insert(0, os.path.join((os.path.dirname(os.path.realpath(__file__))), "src"))
+__path__ = os.path.dirname(os.path.realpath(__file__))
+__srcdir__ = 'src'
+sys.path.insert(1, os.path.join(__path__, __srcdir__))
+from you_get.downloader import main

-from you_get import *
-
-if __name__ == "__main__":
+if __name__ == '__main__':
    main()
--- a/you-get.json
+++ b/you-get.json
@ -31,6 +31,6 @@
        ],
    
    "console_scripts": [
-        "you-get = you_get.__main__:main"
+        "you-get = you_get.downloader.__main__:main"
        ]
 }