Merge pull request #1 from soimort/develop

Project Updatding
2025-02-03 16:53:56 +03:00 · 2016-11-25 19:16:16 +08:00 · 2016-11-25 19:16:16 +08:00 · c44a7ec1b9
commit c44a7ec1b9
parent 8674f43c36 250672f42d
21 changed files with 675 additions and 166 deletions
--- a/README.md
+++ b/README.md
@ -37,7 +37,7 @@ Interested? [Install it](#installation) now and [get started by examples](#getti

 Are you a Python programmer? Then check out [the source](https://github.com/soimort/you-get) and fork it!

-![](http://i.imgur.com/GfthFAz.png)
+![](https://i.imgur.com/GfthFAz.png)

 ## Installation

@ -339,6 +339,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
 | **Tumblr**  | <https://www.tumblr.com/>     |✓|✓|✓|
 | TED         | <http://www.ted.com/>         |✓| | |
 | SoundCloud  | <https://soundcloud.com/>     | | |✓|
+| SHOWROOM    | <https://www.showroom-live.com/> |✓| | |
 | Pinterest   | <https://www.pinterest.com/>  | |✓| |
 | MusicPlayOn | <http://en.musicplayon.com/>  |✓| | |
 | MTV81       | <http://www.mtv81.com/>       |✓| | |
@ -372,7 +373,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
 | 爆米花网 | <http://www.baomihua.com/>     |✓| | |
 | **bilibili<br/>哔哩哔哩** | <http://www.bilibili.com/> |✓| | |
 | Dilidili | <http://www.dilidili.com/>     |✓| | |
-| 豆瓣     | <http://www.douban.com/>       | | |✓|
+| 豆瓣     | <http://www.douban.com/>       |✓| |✓|
 | 斗鱼     | <http://www.douyutv.com/>      |✓| | |
 | Panda<br/>熊猫 | <http://www.panda.tv/>      |✓| | |
 | 凤凰视频 | <http://v.ifeng.com/>          |✓| | |
@ -406,6 +407,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
 | 花瓣     | <http://huaban.com/>           | |✓| |
 | Naver<br/>네이버 | <http://tvcast.naver.com/>     |✓| | |
 | 芒果TV   | <http://www.mgtv.com/>         |✓| | |
+| 火猫TV   | <http://www.huomao.com/>         |✓| | |

 For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page.

--- a/src/you_get/common.py
+++ b/src/you_get/common.py
@ -27,7 +27,9 @@ SITES = {
    'google'           : 'google',
    'heavy-music'      : 'heavymusic',
    'huaban'           : 'huaban',
+    'huomao'           : 'huomaotv',
    'iask'             : 'sina',
+    'icourses'         : 'icourses',
    'ifeng'            : 'ifeng',
    'imgur'            : 'imgur',
    'in'               : 'alive',
@ -340,6 +342,45 @@ def get_content(url, headers={}, decoded=True):

    return data

+def post_content(url, headers={}, post_data={}, decoded=True):
+    """Post the content of a URL via sending a HTTP POST request.
+
+    Args:
+        url: A URL.
+        headers: Request headers used by the client.
+        decoded: Whether decode the response body using UTF-8 or the charset specified in Content-Type.
+
+    Returns:
+        The content as a string.
+    """
+
+    logging.debug('post_content: %s \n post_data: %s' % (url, post_data))
+
+    req = request.Request(url, headers=headers)
+    if cookies:
+        cookies.add_cookie_header(req)
+        req.headers.update(req.unredirected_hdrs)
+    post_data_enc = bytes(parse.urlencode(post_data), 'utf-8')
+    response = request.urlopen(req, data = post_data_enc)
+    data = response.read()
+
+    # Handle HTTP compression for gzip and deflate (zlib)
+    content_encoding = response.getheader('Content-Encoding')
+    if content_encoding == 'gzip':
+        data = ungzip(data)
+    elif content_encoding == 'deflate':
+        data = undeflate(data)
+
+    # Decode the response body
+    if decoded:
+        charset = match1(response.getheader('Content-Type'), r'charset=([\w-]+)')
+        if charset is not None:
+            data = data.decode(charset)
+        else:
+            data = data.decode('utf-8')
+
+    return data
+
 def url_size(url, faker = False, headers = {}):
    if faker:
        response = request.urlopen(request.Request(url, headers = fake_headers), None)
@ -507,7 +548,11 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False, h
        os.remove(filepath) # on Windows rename could fail if destination filepath exists
    os.rename(temp_filepath, filepath)

-def url_save_chunked(url, filepath, bar, refer = None, is_part = False, faker = False, headers = {}):
+def url_save_chunked(url, filepath, bar, dyn_callback=None, chunk_size=0, ignore_range=False, refer=None, is_part=False, faker=False, headers={}):
+    def dyn_update_url(received):
+        if callable(dyn_callback):
+            logging.debug('Calling callback %s for new URL from %s' % (dyn_callback.__name__, received))
+            return dyn_callback(received)
    if os.path.exists(filepath):
        if not force:
            if not is_part:
@ -545,19 +590,26 @@ def url_save_chunked(url, filepath, bar, refer = None, is_part = False, faker =
    else:
        headers = {}
    if received:
+        url = dyn_update_url(received)
+        if not ignore_range:
            headers['Range'] = 'bytes=' + str(received) + '-'
    if refer:
        headers['Referer'] = refer

-    response = request.urlopen(request.Request(url, headers = headers), None)
+    response = request.urlopen(request.Request(url, headers=headers), None)

    with open(temp_filepath, open_mode) as output:
+        this_chunk = received
        while True:
            buffer = response.read(1024 * 256)
            if not buffer:
                break
            output.write(buffer)
            received += len(buffer)
+            if chunk_size and (received - this_chunk) >= chunk_size:
+                url = dyn_callback(received)
+                this_chunk = received
+                response = request.urlopen(request.Request(url, headers=headers), None)
            if bar:
                bar.update_received(len(buffer))

@ -806,7 +858,7 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg

    print()

-def download_urls_chunked(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False, headers = {}):
+def download_urls_chunked(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False, headers = {}, **kwargs):
    assert urls
    if dry_run:
        print('Real URLs:\n%s\n' % urls)
@ -820,7 +872,7 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir='.', refer=No

    filename = '%s.%s' % (title, ext)
    filepath = os.path.join(output_dir, filename)
-    if total_size and ext in ('ts'):
+    if total_size:
        if not force and os.path.exists(filepath[:-3] + '.mkv'):
            print('Skipping %s: file already exists' % filepath[:-3] + '.mkv')
            print()
@ -835,7 +887,7 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir='.', refer=No
        print('Downloading %s ...' % tr(filename))
        filepath = os.path.join(output_dir, filename)
        parts.append(filepath)
-        url_save_chunked(url, filepath, bar, refer = refer, faker = faker, headers = headers)
+        url_save_chunked(url, filepath, bar, refer = refer, faker = faker, headers = headers, **kwargs)
        bar.done()

        if not merge:
--- a/src/you_get/extractors/init.py
+++ b/src/you_get/extractors/init.py
@ -24,6 +24,7 @@ from .funshion import *
 from .google import *
 from .heavymusic import *
 from .huaban import *
+from .icourses import *
 from .ifeng import *
 from .imgur import *
 from .infoq import *
--- a/src/you_get/extractors/acfun.py
+++ b/src/you_get/extractors/acfun.py
@ -73,14 +73,14 @@ def acfun_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
    assert re.match(r'http://[^\.]+.acfun.[^\.]+/\D/\D\D(\d+)', url)
    html = get_html(url)

-    title = r1(r'<h1 id="txt-title-view">([^<>]+)<', html)
+    title = r1(r'data-title="([^"]+)"', html)
    title = unescape_html(title)
    title = escape_file_path(title)
    assert title

-    video = re.search('data-vid="(\d+)"\s*data-scode=""[^<]*title="([^"]+)"', html)
-    vid = video.group(1)
-    title = title + ' - ' + video.group(2)
+    vid = r1('data-vid="(\d+)"', html)
+    up = r1('data-name="([^"]+)"', html)
+    title = title + ' - ' + up
    acfun_download_by_vid(vid, title,
                          output_dir=output_dir,
                          merge=merge,
--- a/src/you_get/extractors/baidu.py
+++ b/src/you_get/extractors/baidu.py
@ -7,8 +7,10 @@ from ..common import *
 from .embed import *
 from .universal import *

+
 def baidu_get_song_data(sid):
-    data = json.loads(get_html('http://music.baidu.com/data/music/fmlink?songIds=%s' % sid, faker = True))['data']
+    data = json.loads(get_html(
+        'http://music.baidu.com/data/music/fmlink?songIds=%s' % sid, faker=True))['data']

    if data['xcode'] != '':
        # inside china mainland
@ -17,22 +19,28 @@ def baidu_get_song_data(sid):
        # outside china mainland
        return None

+
 def baidu_get_song_url(data):
    return data['songLink']

+
 def baidu_get_song_artist(data):
    return data['artistName']

+
 def baidu_get_song_album(data):
    return data['albumName']

+
 def baidu_get_song_title(data):
    return data['songName']

+
 def baidu_get_song_lyric(data):
    lrc = data['lrcLink']
    return None if lrc is '' else "http://music.baidu.com%s" % lrc

+
 def baidu_download_song(sid, output_dir='.', merge=True, info_only=False):
    data = baidu_get_song_data(sid)
    if data is not None:
@ -51,7 +59,8 @@ def baidu_download_song(sid, output_dir='.', merge=True, info_only=False):
    type, ext, size = url_info(url, faker=True)
    print_info(site_info, title, type, size)
    if not info_only:
-        download_urls([url], file_name, ext, size, output_dir, merge=merge, faker=True)
+        download_urls([url], file_name, ext, size,
+                      output_dir, merge=merge, faker=True)

    try:
        type, ext, size = url_info(lrc, faker=True)
@ -61,12 +70,14 @@ def baidu_download_song(sid, output_dir='.', merge=True, info_only=False):
    except:
        pass

-def baidu_download_album(aid, output_dir = '.', merge = True, info_only = False):
-    html = get_html('http://music.baidu.com/album/%s' % aid, faker = True)
+
+def baidu_download_album(aid, output_dir='.', merge=True, info_only=False):
+    html = get_html('http://music.baidu.com/album/%s' % aid, faker=True)
    album_name = r1(r'<h2 class="album-name">(.+?)<\/h2>', html)
    artist = r1(r'<span class="author_list" title="(.+?)">', html)
    output_dir = '%s/%s - %s' % (output_dir, artist, album_name)
-    ids = json.loads(r1(r'<span class="album-add" data-adddata=\'(.+?)\'>', html).replace('&quot', '').replace(';', '"'))['ids']
+    ids = json.loads(r1(r'<span class="album-add" data-adddata=\'(.+?)\'>',
+                        html).replace('&quot', '').replace(';', '"'))['ids']
    track_nr = 1
    for id in ids:
        song_data = baidu_get_song_data(id)
@ -75,38 +86,29 @@ def baidu_download_album(aid, output_dir = '.', merge = True, info_only = False)
        song_lrc = baidu_get_song_lyric(song_data)
        file_name = '%02d.%s' % (track_nr, song_title)

-        type, ext, size = url_info(song_url, faker = True)
+        type, ext, size = url_info(song_url, faker=True)
        print_info(site_info, song_title, type, size)
        if not info_only:
-            download_urls([song_url], file_name, ext, size, output_dir, merge = merge, faker = True)
+            download_urls([song_url], file_name, ext, size,
+                          output_dir, merge=merge, faker=True)

        if song_lrc:
-            type, ext, size = url_info(song_lrc, faker = True)
+            type, ext, size = url_info(song_lrc, faker=True)
            print_info(site_info, song_title, type, size)
            if not info_only:
-                download_urls([song_lrc], file_name, ext, size, output_dir, faker = True)
+                download_urls([song_lrc], file_name, ext,
+                              size, output_dir, faker=True)

        track_nr += 1

-def baidu_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False, **kwargs):
-    if re.match(r'http://imgsrc.baidu.com', url):
-        universal_download(url, output_dir, merge=merge, info_only=info_only)
-        return

-    elif re.match(r'http://pan.baidu.com', url):
-        html = get_html(url)
+def baidu_download(url, output_dir='.', stream_type=None, merge=True, info_only=False, **kwargs):

-        title = r1(r'server_filename="([^"]+)"', html)
-        if len(title.split('.')) > 1:
-            title = ".".join(title.split('.')[:-1])
-
-        real_url = r1(r'\\"dlink\\":\\"([^"]*)\\"', html).replace('\\\\/', '/')
-        type, ext, size = url_info(real_url, faker = True)
-
-        print_info(site_info, title, ext, size)
+    if re.match(r'http://pan.baidu.com', url):
+        real_url, title, ext, size = baidu_pan_download(url)
        if not info_only:
-            download_urls([real_url], title, ext, size, output_dir, merge = merge)
-
+            download_urls([real_url], title, ext, size,
+                          output_dir, url, merge=merge, faker=True)
    elif re.match(r'http://music.baidu.com/album/\d+', url):
        id = r1(r'http://music.baidu.com/album/(\d+)', url)
        baidu_download_album(id, output_dir, merge, info_only)
@ -124,17 +126,20 @@ def baidu_download(url, output_dir = '.', stream_type = None, merge = True, info
            html = get_html(url)
            title = r1(r'title:"([^"]+)"', html)

-            items = re.findall(r'//imgsrc.baidu.com/forum/w[^"]+/([^/"]+)', html)
+            items = re.findall(
+                r'//imgsrc.baidu.com/forum/w[^"]+/([^/"]+)', html)
            urls = ['http://imgsrc.baidu.com/forum/pic/item/' + i
                    for i in set(items)]

            # handle albums
            kw = r1(r'kw=([^&]+)', html) or r1(r"kw:'([^']+)'", html)
            tid = r1(r'tid=(\d+)', html) or r1(r"tid:'([^']+)'", html)
-            album_url = 'http://tieba.baidu.com/photo/g/bw/picture/list?kw=%s&tid=%s' % (kw, tid)
+            album_url = 'http://tieba.baidu.com/photo/g/bw/picture/list?kw=%s&tid=%s' % (
+                kw, tid)
            album_info = json.loads(get_content(album_url))
            for i in album_info['data']['pic_list']:
-                urls.append('http://imgsrc.baidu.com/forum/pic/item/' + i['pic_id'] + '.jpg')
+                urls.append(
+                    'http://imgsrc.baidu.com/forum/pic/item/' + i['pic_id'] + '.jpg')

            ext = 'jpg'
            size = float('Inf')
@ -144,6 +149,170 @@ def baidu_download(url, output_dir = '.', stream_type = None, merge = True, info
                download_urls(urls, title, ext, size,
                              output_dir=output_dir, merge=False)

+
+def baidu_pan_download(url):
+    errno_patt = r'errno":([^"]+),'
+    refer_url = ""
+    fake_headers = {
+        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+        'Accept-Charset': 'UTF-8,*;q=0.5',
+        'Accept-Encoding': 'gzip,deflate,sdch',
+        'Accept-Language': 'en-US,en;q=0.8',
+        'Host': 'pan.baidu.com',
+        'Origin': 'http://pan.baidu.com',
+        'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:13.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2500.0 Safari/537.36',
+        'Referer': refer_url
+    }
+    if cookies:
+        print('Use user specified cookies')
+    else:
+        print('Generating cookies...')
+        fake_headers['Cookie'] = baidu_pan_gen_cookies(url)
+    refer_url = "http://pan.baidu.com"
+    html = get_content(url, fake_headers, decoded=True)
+    isprotected = False
+    sign, timestamp, bdstoken, appid, primary_id, fs_id, uk = baidu_pan_parse(
+        html)
+    if sign == None:
+        if re.findall(r'\baccess-code\b', html):
+            isprotected = True
+            sign, timestamp, bdstoken, appid, primary_id, fs_id, uk, fake_headers, psk = baidu_pan_protected_share(
+                url)
+            # raise NotImplementedError("Password required!")
+        if isprotected != True:
+            raise AssertionError("Share not found or canceled: %s" % url)
+    if bdstoken == None:
+        bdstoken = ""
+    if isprotected != True:
+        sign, timestamp, bdstoken, appid, primary_id, fs_id, uk = baidu_pan_parse(
+            html)
+    request_url = "http://pan.baidu.com/api/sharedownload?sign=%s&timestamp=%s&bdstoken=%s&channel=chunlei&clienttype=0&web=1&app_id=%s" % (
+        sign, timestamp, bdstoken, appid)
+    refer_url = url
+    post_data = {
+        'encrypt': 0,
+        'product': 'share',
+        'uk': uk,
+        'primaryid': primary_id,
+        'fid_list': '[' + fs_id + ']'
+    }
+    if isprotected == True:
+        post_data['sekey'] = psk
+    response_content = post_content(request_url, fake_headers, post_data, True)
+    errno = match1(response_content, errno_patt)
+    if errno != "0":
+        raise AssertionError(
+            "Server refused to provide download link! (Errno:%s)" % errno)
+    real_url = r1(r'dlink":"([^"]+)"', response_content).replace('\\/', '/')
+    title = r1(r'server_filename":"([^"]+)"', response_content)
+    assert real_url
+    type, ext, size = url_info(real_url, faker=True)
+    title_wrapped = json.loads('{"wrapper":"%s"}' % title)
+    title = title_wrapped['wrapper']
+    logging.debug(real_url)
+    print_info(site_info, title, ext, size)
+    print('Hold on...')
+    time.sleep(5)
+    return real_url, title, ext, size
+
+
+def baidu_pan_parse(html):
+    sign_patt = r'sign":"([^"]+)"'
+    timestamp_patt = r'timestamp":([^"]+),'
+    appid_patt = r'app_id":"([^"]+)"'
+    bdstoken_patt = r'bdstoken":"([^"]+)"'
+    fs_id_patt = r'fs_id":([^"]+),'
+    uk_patt = r'uk":([^"]+),'
+    errno_patt = r'errno":([^"]+),'
+    primary_id_patt = r'shareid":([^"]+),'
+    sign = match1(html, sign_patt)
+    timestamp = match1(html, timestamp_patt)
+    appid = match1(html, appid_patt)
+    bdstoken = match1(html, bdstoken_patt)
+    fs_id = match1(html, fs_id_patt)
+    uk = match1(html, uk_patt)
+    primary_id = match1(html, primary_id_patt)
+    return sign, timestamp, bdstoken, appid, primary_id, fs_id, uk
+
+
+def baidu_pan_gen_cookies(url, post_data=None):
+    from http import cookiejar
+    cookiejar = cookiejar.CookieJar()
+    opener = request.build_opener(request.HTTPCookieProcessor(cookiejar))
+    resp = opener.open('http://pan.baidu.com')
+    if post_data != None:
+        resp = opener.open(url, bytes(parse.urlencode(post_data), 'utf-8'))
+    return cookjar2hdr(cookiejar)
+
+
+def baidu_pan_protected_share(url):
+    print('This share is protected by password!')
+    inpwd = input('Please provide unlock password: ')
+    inpwd = inpwd.replace(' ', '').replace('\t', '')
+    print('Please wait...')
+    post_pwd = {
+        'pwd': inpwd,
+        'vcode': None,
+        'vstr': None
+    }
+    from http import cookiejar
+    import time
+    cookiejar = cookiejar.CookieJar()
+    opener = request.build_opener(request.HTTPCookieProcessor(cookiejar))
+    resp = opener.open('http://pan.baidu.com')
+    resp = opener.open(url)
+    init_url = resp.geturl()
+    verify_url = 'http://pan.baidu.com/share/verify?%s&t=%s&channel=chunlei&clienttype=0&web=1' % (
+        init_url.split('?', 1)[1], int(time.time()))
+    refer_url = init_url
+    fake_headers = {
+        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+        'Accept-Charset': 'UTF-8,*;q=0.5',
+        'Accept-Encoding': 'gzip,deflate,sdch',
+        'Accept-Language': 'en-US,en;q=0.8',
+        'Host': 'pan.baidu.com',
+        'Origin': 'http://pan.baidu.com',
+        'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:13.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2500.0 Safari/537.36',
+        'Referer': refer_url
+    }
+    opener.addheaders = dict2triplet(fake_headers)
+    pwd_resp = opener.open(verify_url, bytes(
+        parse.urlencode(post_pwd), 'utf-8'))
+    pwd_resp_str = ungzip(pwd_resp.read()).decode('utf-8')
+    pwd_res = json.loads(pwd_resp_str)
+    if pwd_res['errno'] != 0:
+        raise AssertionError(
+            'Server returned an error: %s (Incorrect password?)' % pwd_res['errno'])
+    pg_resp = opener.open('http://pan.baidu.com/share/link?%s' %
+                          init_url.split('?', 1)[1])
+    content = ungzip(pg_resp.read()).decode('utf-8')
+    sign, timestamp, bdstoken, appid, primary_id, fs_id, uk = baidu_pan_parse(
+        content)
+    psk = query_cookiejar(cookiejar, 'BDCLND')
+    psk = parse.unquote(psk)
+    fake_headers['Cookie'] = cookjar2hdr(cookiejar)
+    return sign, timestamp, bdstoken, appid, primary_id, fs_id, uk, fake_headers, psk
+
+
+def cookjar2hdr(cookiejar):
+    cookie_str = ''
+    for i in cookiejar:
+        cookie_str = cookie_str + i.name + '=' + i.value + ';'
+    return cookie_str[:-1]
+
+
+def query_cookiejar(cookiejar, name):
+    for i in cookiejar:
+        if i.name == name:
+            return i.value
+
+
+def dict2triplet(dictin):
+    out_triplet = []
+    for i in dictin:
+        out_triplet.append((i, dictin[i]))
+    return out_triplet
+
 site_info = "Baidu.com"
 download = baidu_download
 download_playlist = playlist_not_supported("baidu")
--- a/src/you_get/extractors/bilibili.py
+++ b/src/you_get/extractors/bilibili.py
@ -119,17 +119,21 @@ def bilibili_live_download_by_cid(cid, title, output_dir='.', merge=True, info_o
 def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
    html = get_content(url)

-    if re.match(r'https?://bangumi\.bilibili\.com/', url):
-        # quick hack for bangumi URLs
-        url = r1(r'"([^"]+)" class="v-av-link"', html)
-        html = get_content(url)
-
    title = r1_of([r'<meta name="title" content="\s*([^<>]{1,999})\s*" />',
                   r'<h1[^>]*>\s*([^<>]+)\s*</h1>'], html)
    if title:
        title = unescape_html(title)
        title = escape_file_path(title)

+    if re.match(r'https?://bangumi\.bilibili\.com/', url):
+        # quick hack for bangumi URLs
+        episode_id = r1(r'data-current-episode-id="(\d+)"', html)
+        cont = post_content('http://bangumi.bilibili.com/web_api/get_source',
+                            post_data={'episode_id': episode_id})
+        cid = json.loads(cont)['result']['cid']
+        bilibili_download_by_cid(str(cid), title, output_dir=output_dir, merge=merge, info_only=info_only)
+
+    else:
        flashvars = r1_of([r'(cid=\d+)', r'(cid: \d+)', r'flashvars="([^"]+)"',
                           r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
        assert flashvars
--- a/src/you_get/extractors/dailymotion.py
+++ b/src/you_get/extractors/dailymotion.py
@ -4,6 +4,11 @@ __all__ = ['dailymotion_download']

 from ..common import *

+def extract_m3u(url):
+    content = get_content(url)
+    m3u_url = re.findall(r'http://.*', content)[0]
+    return match1(m3u_url, r'([^#]+)')
+
 def dailymotion_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
    """Downloads Dailymotion videos by URL.
    """
@ -13,7 +18,7 @@ def dailymotion_download(url, output_dir = '.', merge = True, info_only = False,
    title = match1(html, r'"video_title"\s*:\s*"([^"]+)"') or \
            match1(html, r'"title"\s*:\s*"([^"]+)"')

-    for quality in ['720','480','380','240','auto']:
+    for quality in ['1080','720','480','380','240','auto']:
        try:
            real_url = info[quality][0]["url"]
            if real_url:
@ -21,11 +26,12 @@ def dailymotion_download(url, output_dir = '.', merge = True, info_only = False,
        except KeyError:
            pass

-    type, ext, size = url_info(real_url)
+    m3u_url = extract_m3u(real_url)
+    mime, ext, size = 'video/mp4', 'mp4', 0

-    print_info(site_info, title, type, size)
+    print_info(site_info, title, mime, size)
    if not info_only:
-        download_urls([real_url], title, ext, size, output_dir, merge = merge)
+        download_url_ffmpeg(m3u_url, title, ext, output_dir=output_dir, merge=merge)

 site_info = "Dailymotion.com"
 download = dailymotion_download
--- a/src/you_get/extractors/douban.py
+++ b/src/you_get/extractors/douban.py
@ -7,7 +7,18 @@ from ..common import *

 def douban_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
    html = get_html(url)
-    if 'subject' in url:
+
+    if re.match(r'https?://movie', url):
+        title = match1(html, 'name="description" content="([^"]+)')
+        tid = match1(url, 'trailer/(\d+)')
+        real_url = 'https://movie.douban.com/trailer/video_url?tid=%s' % tid
+        type, ext, size = url_info(real_url)
+
+        print_info(site_info, title, type, size)
+        if not info_only:
+            download_urls([real_url], title, ext, size, output_dir, merge = merge)
+
+    elif 'subject' in url:
        titles = re.findall(r'data-title="([^"]*)">', html)
        song_id = re.findall(r'<li class="song-item" id="([^"]*)"', html)
        song_ssid = re.findall(r'data-ssid="([^"]*)"', html)
--- a/src/you_get/extractors/embed.py
+++ b/src/you_get/extractors/embed.py
@ -25,7 +25,7 @@ youku_embed_patterns = [ 'youku\.com/v_show/id_([a-zA-Z0-9=]+)',
 """
 http://www.tudou.com/programs/view/html5embed.action?type=0&amp;code=3LS_URGvl54&amp;lcode=&amp;resourceId=0_06_05_99
 """
-tudou_embed_patterns = [ 'tudou\.com[a-zA-Z0-9\/\?=\&\.\;]+code=([a-zA-Z0-9_]+)\&',
+tudou_embed_patterns = [ 'tudou\.com[a-zA-Z0-9\/\?=\&\.\;]+code=([a-zA-Z0-9_-]+)\&',
                         'www\.tudou\.com/v/([a-zA-Z0-9_-]+)/[^"]*v\.swf'
                       ]

--- a/src/you_get/extractors/huomaotv.py
+++ b/src/you_get/extractors/huomaotv.py
@ -0,0 +1,36 @@
+#!/usr/bin/env python
+
+__all__ = ['huomaotv_download']
+
+from ..common import *
+
+
+def get_mobile_room_url(room_id):
+    return 'http://www.huomao.com/mobile/mob_live/%s' % room_id
+
+
+def get_m3u8_url(stream_id):
+    return 'http://live-ws.huomaotv.cn/live/%s/playlist.m3u8' % stream_id
+
+
+def huomaotv_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
+    room_id_pattern = r'huomao.com/(\d+)'
+    room_id = match1(url, room_id_pattern)
+    html = get_content(get_mobile_room_url(room_id))
+
+    stream_id_pattern = r'id="html_stream" value="(\w+)"'
+    stream_id = match1(html, stream_id_pattern)
+
+    m3u8_url = get_m3u8_url(stream_id)
+
+    title = match1(html, r'<title>([^<]{1,9999})</title>')
+
+    print_info(site_info, title, 'm3u8', float('inf'))
+
+    if not info_only:
+        download_url_ffmpeg(m3u8_url, title, 'm3u8', None, output_dir=output_dir, merge=merge)
+
+
+site_info = 'huomao.com'
+download = huomaotv_download
+download_playlist = playlist_not_supported('huomao')
--- a/src/you_get/extractors/icourses.py
+++ b/src/you_get/extractors/icourses.py
@ -0,0 +1,148 @@
+#!/usr/bin/env python
+from ..common import *
+from urllib import parse
+import random
+from time import sleep
+import xml.etree.ElementTree as ET
+import datetime
+import hashlib
+import base64
+import logging
+from urllib import error
+import re
+
+__all__ = ['icourses_download']
+
+
+def icourses_download(url, merge=False, output_dir='.', **kwargs):
+    icourses_parser = ICousesExactor(url=url)
+    real_url = icourses_parser.icourses_cn_url_parser(**kwargs)
+    title = icourses_parser.title
+    if real_url is not None:
+        for tries in range(0, 5):
+            try:
+                _, type_, size = url_info(real_url, faker=True)
+                break
+            except error.HTTPError:
+                logging.warning('Failed to fetch the video file! Retrying...')
+                sleep(random.Random().randint(0, 5))  # Prevent from blockage
+                real_url = icourses_parser.icourses_cn_url_parser()
+                title = icourses_parser.title
+        print_info(site_info, title, type_, size)
+        if not kwargs['info_only']:
+            download_urls_chunked([real_url], title, 'flv',
+                          total_size=size, output_dir=output_dir, refer=url, merge=merge, faker=True, ignore_range=True, chunk_size=15000000, dyn_callback=icourses_parser.icourses_cn_url_parser)
+
+
+# Why not using VideoExtractor: This site needs specical download method
+class ICousesExactor(object):
+
+    def __init__(self, url):
+        self.url = url
+        self.title = ''
+        return
+
+    def icourses_playlist_download(self, **kwargs):
+        html = get_content(self.url)
+        page_type_patt = r'showSectionNode\(this,(\d+),(\d+)\)'
+        video_js_number = r'changeforvideo\((.*?)\)'
+        fs_flag = r'<input type="hidden" value=(\w+) id="firstShowFlag">'
+        page_navi_vars = re.search(pattern=page_type_patt, string=html)
+        dummy_page = 'http://www.icourses.cn/jpk/viewCharacterDetail.action?sectionId={}&courseId={}'.format(
+            page_navi_vars.group(2), page_navi_vars.group(1))
+        html = get_content(dummy_page)
+        fs_status = match1(html, fs_flag)
+        video_list = re.findall(pattern=video_js_number, string=html)
+        for video in video_list:
+            video_args = video.replace('\'', '').split(',')
+            video_url = 'http://www.icourses.cn/jpk/changeforVideo.action?resId={}&courseId={}&firstShowFlag={}'.format(
+                video_args[0], video_args[1], fs_status or '1')
+            sleep(random.Random().randint(0, 5))  # Prevent from blockage
+            icourses_download(video_url, **kwargs)
+
+    def icourses_cn_url_parser(self, received=0, **kwargs):
+        PLAYER_BASE_VER = '150606-1'
+        ENCRYPT_MOD_VER = '151020'
+        ENCRYPT_SALT = '3DAPmXsZ4o'  # It took really long time to find this...
+        html = get_content(self.url)
+        if re.search(pattern=r'showSectionNode\(.*\)', string=html):
+            logging.warning('Switching to playlist mode!')
+            return self.icourses_playlist_download(**kwargs)
+        flashvars_patt = r'var\ flashvars\=((.|\n)*)};'
+        server_time_patt = r'MPlayer.swf\?v\=(\d+)'
+        uuid_patt = r'uuid:(\d+)'
+        other_args_patt = r'other:"(.*)"'
+        res_url_patt = r'IService:\'([^\']+)'
+        title_a_patt = r'<div class="con"> <a.*?>(.*?)</a>'
+        title_b_patt = r'<div class="con"> <a.*?/a>((.|\n)*?)</div>'
+        title_a = match1(html, title_a_patt).strip()
+        title_b = match1(html, title_b_patt).strip()
+        title = title_a + title_b  # WIP, FIXME
+        title = re.sub('( +|\n|\t|\r|\&nbsp\;)', '',
+                       unescape_html(title).replace(' ', ''))
+        server_time = match1(html, server_time_patt)
+        flashvars = match1(html, flashvars_patt)
+        uuid = match1(flashvars, uuid_patt)
+        other_args = match1(flashvars, other_args_patt)
+        res_url = match1(flashvars, res_url_patt)
+        url_parts = {'v': server_time, 'other': other_args,
+                     'uuid': uuid, 'IService': res_url}
+        req_url = '%s?%s' % (res_url, parse.urlencode(url_parts))
+        logging.debug('Requesting video resource location...')
+        xml_resp = get_html(req_url)
+        xml_obj = ET.fromstring(xml_resp)
+        logging.debug('The result was {}'.format(xml_obj.get('status')))
+        if xml_obj.get('status') != 'success':
+            raise ValueError('Server returned error!')
+        if received:
+            play_type = 'seek'
+        else:
+            play_type = 'play'
+            received -= 1
+        common_args = {'lv': PLAYER_BASE_VER, 'ls': play_type,
+                       'lt': datetime.datetime.now().strftime('%m-%d/%H:%M:%S'),
+                       'start': received + 1}
+        media_host = xml_obj.find(".//*[@name='host']").text
+        media_url = media_host + xml_obj.find(".//*[@name='url']").text
+        # This is what they called `SSLModule`... But obviously, just a kind of
+        # encryption, takes absolutely no effect in protecting data intergrity
+        if xml_obj.find(".//*[@name='ssl']").text != 'true':
+            logging.debug('The encryption mode is disabled')
+            # when the so-called `SSLMode` is not activated, the parameters, `h`
+            # and `p` can be found in response
+            arg_h = xml_obj.find(".//*[@name='h']").text
+            assert arg_h
+            arg_r = xml_obj.find(".//*[@name='p']").text or ENCRYPT_MOD_VER
+            url_args = common_args.copy()
+            url_args.update({'h': arg_h, 'r': arg_r})
+            final_url = '{}?{}'.format(
+                media_url, parse.urlencode(url_args))
+            self.title = title
+            return final_url
+        # when the `SSLMode` is activated, we need to receive the timestamp and the
+        # time offset (?) value from the server
+        logging.debug('The encryption mode is in effect')
+        ssl_callback = get_html(
+            '{}/ssl/ssl.shtml'.format(media_host)).split(',')
+        ssl_timestamp = int(datetime.datetime.strptime(
+            ssl_callback[1], "%b %d %H:%M:%S %Y").timestamp() + int(ssl_callback[0]))
+        sign_this = ENCRYPT_SALT + \
+            parse.urlparse(media_url).path + str(ssl_timestamp)
+        arg_h = base64.b64encode(hashlib.md5(
+            bytes(sign_this, 'utf-8')).digest())
+        # Post-processing, may subject to change, so leaving this alone...
+        arg_h = arg_h.decode('utf-8').strip('=').replace('+',
+                                                         '-').replace('/', '_')
+        arg_r = ssl_timestamp
+        url_args = common_args.copy()
+        url_args.update({'h': arg_h, 'r': arg_r, 'p': ENCRYPT_MOD_VER})
+        final_url = '{}?{}'.format(
+            media_url, parse.urlencode(url_args))
+        logging.debug('Crafted URL: {}'.format(final_url))
+        self.title = title
+        return final_url
+
+
+site_info = 'icourses.cn'
+download = icourses_download
+# download_playlist = icourses_playlist_download
--- a/src/you_get/extractors/mgtv.py
+++ b/src/you_get/extractors/mgtv.py
@ -12,11 +12,11 @@ import re
 class MGTV(VideoExtractor):
    name = "芒果 (MGTV)"

-    # Last updated: 2015-11-24
+    # Last updated: 2016-11-13
    stream_types = [
-        {'id': 'hd', 'container': 'flv', 'video_profile': '超清'},
-        {'id': 'sd', 'container': 'flv', 'video_profile': '高清'},
-        {'id': 'ld', 'container': 'flv', 'video_profile': '标清'},
+        {'id': 'hd', 'container': 'ts', 'video_profile': '超清'},
+        {'id': 'sd', 'container': 'ts', 'video_profile': '高清'},
+        {'id': 'ld', 'container': 'ts', 'video_profile': '标清'},
    ]
    
    id_dic = {i['video_profile']:(i['id']) for i in stream_types}
@ -27,7 +27,7 @@ class MGTV(VideoExtractor):
    def get_vid_from_url(url):
        """Extracts video ID from URL.
        """
-        return match1(url, 'http://www.mgtv.com/v/\d/\d+/\w+/(\d+).html')
+        return match1(url, 'http://www.mgtv.com/b/\d+/(\d+).html')
    
    #----------------------------------------------------------------------
    @staticmethod
@ -44,10 +44,15 @@ class MGTV(VideoExtractor):

        content = get_content(content['info'])  #get the REAL M3U url, maybe to be changed later?
        segment_list = []
+        segments_size = 0
        for i in content.split():
            if not i.startswith('#'):  #not the best way, better we use the m3u8 package
                segment_list.append(base_url + i)
-        return segment_list
+            # use ext-info for fast size calculate
+            elif i.startswith('#EXT-MGTV-File-SIZE:'):
+                segments_size += int(i[i.rfind(':')+1:])
+
+        return m3u_url, segments_size, segment_list

    def download_playlist_by_url(self, url, **kwargs):
        pass
@ -69,14 +74,10 @@ class MGTV(VideoExtractor):
                quality_id = self.id_dic[s['video_profile']]
                url = stream_available[s['video_profile']]
                url = re.sub( r'(\&arange\=\d+)', '', url)  #Un-Hum
-                segment_list_this = self.get_mgtv_real_url(url)
+                m3u8_url, m3u8_size, segment_list_this = self.get_mgtv_real_url(url)

-                container_this_stream = ''
-                size_this_stream = 0
                stream_fileid_list = []
                for i in segment_list_this:
-                    _, container_this_stream, size_this_seg = url_info(i)
-                    size_this_stream += size_this_seg
                    stream_fileid_list.append(os.path.basename(i).split('.')[0])

            #make pieces
@ -85,10 +86,11 @@ class MGTV(VideoExtractor):
                pieces.append({'fileid': i[0], 'segs': i[1],})

                self.streams[quality_id] = {
-                        'container': 'flv',
+                        'container': s['container'],
                        'video_profile': s['video_profile'],
-                        'size': size_this_stream,
-                        'pieces': pieces
+                        'size': m3u8_size,
+                        'pieces': pieces,
+                        'm3u8_url': m3u8_url
                    }

            if not kwargs['info_only']:
@ -107,6 +109,44 @@ class MGTV(VideoExtractor):
            # Extract stream with the best quality
            stream_id = self.streams_sorted[0]['id']

+    def download(self, **kwargs):
+
+        if 'stream_id' in kwargs and kwargs['stream_id']:
+            stream_id = kwargs['stream_id']
+        else:
+            stream_id = 'null'
+
+        # print video info only
+        if 'info_only' in kwargs and kwargs['info_only']:
+            if stream_id != 'null':
+                if 'index' not in kwargs:
+                    self.p(stream_id)
+                else:
+                    self.p_i(stream_id)
+            else:
+                # Display all available streams
+                if 'index' not in kwargs:
+                    self.p([])
+                else:
+                    stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag']
+                    self.p_i(stream_id)
+
+        # default to use the best quality
+        if stream_id == 'null':
+            stream_id = self.streams_sorted[0]['id']
+
+        stream_info = self.streams[stream_id]
+
+        if not kwargs['info_only']:
+            if player:
+                # with m3u8 format because some video player can process urls automatically (e.g. mpv)
+                launch_player(player, [stream_info['m3u8_url']])
+            else:
+                download_urls(stream_info['src'], self.title, stream_info['container'], stream_info['size'],
+                              output_dir=kwargs['output_dir'],
+                              merge=kwargs['merge'],
+                              av=stream_id in self.dash_streams)
+
 site = MGTV()
 download = site.download_by_url
 download_playlist = site.download_playlist_by_url
--- a/src/you_get/extractors/netease.py
+++ b/src/you_get/extractors/netease.py
@ -55,12 +55,14 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals
            cover_url = j['result']['coverImgUrl']
            download_urls([cover_url], "cover", "jpg", 0, new_dir)
        
-        for i in j['result']['tracks']:
-            netease_song_download(i, output_dir=new_dir, info_only=info_only)
+        prefix_width = len(str(len(j['result']['tracks'])))
+        for n, i in enumerate(j['result']['tracks']):
+            playlist_prefix = '%%.%dd_' % prefix_width % n
+            netease_song_download(i, output_dir=new_dir, info_only=info_only, playlist_prefix=playlist_prefix)
            try: # download lyrics
                assert kwargs['caption']
                l = loads(get_content("http://music.163.com/api/song/lyric/?id=%s&lv=-1&csrf_token=" % i['id'], headers={"Referer": "http://music.163.com/"}))
-                netease_lyric_download(i, l["lrc"]["lyric"], output_dir=new_dir, info_only=info_only)
+                netease_lyric_download(i, l["lrc"]["lyric"], output_dir=new_dir, info_only=info_only, playlist_prefix=playlist_prefix)
            except: pass

    elif "song" in url:
@ -85,10 +87,10 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals
        j = loads(get_content("http://music.163.com/api/mv/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"}))
        netease_video_download(j['data'], output_dir=output_dir, info_only=info_only)

-def netease_lyric_download(song, lyric, output_dir='.', info_only=False):
+def netease_lyric_download(song, lyric, output_dir='.', info_only=False, playlist_prefix=""):
    if info_only: return

-    title = "%s. %s" % (song['position'], song['name'])
+    title = "%s%s. %s" % (playlist_prefix, song['position'], song['name'])
    filename = '%s.lrc' % get_filename(title)
    print('Saving %s ...' % filename, end="", flush=True)
    with open(os.path.join(output_dir, filename),
@ -103,8 +105,8 @@ def netease_video_download(vinfo, output_dir='.', info_only=False):
    netease_download_common(title, url_best,
                            output_dir=output_dir, info_only=info_only)

-def netease_song_download(song, output_dir='.', info_only=False):
-    title = "%s. %s" % (song['position'], song['name'])
+def netease_song_download(song, output_dir='.', info_only=False, playlist_prefix=""):
+    title = "%s%s. %s" % (playlist_prefix, song['position'], song['name'])
    songNet = 'p' + song['mp3Url'].split('/')[2][1:]

    if 'hMusic' in song and song['hMusic'] != None:
--- a/src/you_get/extractors/qq.py
+++ b/src/you_get/extractors/qq.py
@ -7,17 +7,62 @@ from .qie import download as qieDownload
 from urllib.parse import urlparse,parse_qs

 def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):
-    api = "http://h5vv.video.qq.com/getinfo?otype=json&platform=10901&vid=%s" % vid
-    content = get_html(api)
-    output_json = json.loads(match1(content, r'QZOutputJson=(.*)')[:-1])
-    url = output_json['vl']['vi'][0]['ul']['ui'][0]['url']
+    info_api = 'http://vv.video.qq.com/getinfo?otype=json&appver=3%2E2%2E19%2E333&platform=11&defnpayver=1&vid=' + vid
+    info = get_html(info_api)
+    video_json = json.loads(match1(info, r'QZOutputJson=(.*)')[:-1])
+    parts_vid = video_json['vl']['vi'][0]['vid']
+    parts_ti = video_json['vl']['vi'][0]['ti']
+    parts_prefix = video_json['vl']['vi'][0]['ul']['ui'][0]['url']
+    parts_formats = video_json['fl']['fi']
+    # find best quality
+    # only looking for fhd(1080p) and shd(720p) here.
+    # 480p usually come with a single file, will be downloaded as fallback.
+    best_quality = ''
+    for part_format in parts_formats:
+        if part_format['name'] == 'fhd':
+            best_quality = 'fhd'
+            break
+
+        if part_format['name'] == 'shd':
+            best_quality = 'shd'
+
+    for part_format in parts_formats:
+        if (not best_quality == '') and (not part_format['name'] == best_quality):
+            continue
+        part_format_id = part_format['id']
+        part_format_sl = part_format['sl']
+        if part_format_sl == 0:
+            part_urls= []
+            total_size = 0
+            try:
+                # For fhd(1080p), every part is about 100M and 6 minutes
+                # try 100 parts here limited download longest single video of 10 hours.
+                for part in range(1,100):
+                    filename = vid + '.p' + str(part_format_id % 1000) + '.' + str(part) + '.mp4'
+                    key_api = "http://vv.video.qq.com/getkey?otype=json&platform=11&format=%s&vid=%s&filename=%s" % (part_format_id, parts_vid, filename)
+                    #print(filename)
+                    #print(key_api)
+                    part_info = get_html(key_api)
+                    key_json = json.loads(match1(part_info, r'QZOutputJson=(.*)')[:-1])
+                    #print(key_json)
+                    vkey = key_json['key']
+                    url = '%s/%s?vkey=%s' % (parts_prefix, filename, vkey)
+                    part_urls.append(url)
+                    _, ext, size = url_info(url, faker=True)
+                    total_size += size
+            except:
+                pass
+            print_info(site_info, parts_ti, ext, total_size)
+            if not info_only:
+                download_urls(part_urls, parts_ti, ext, total_size, output_dir=output_dir, merge=merge)
+        else:
            fvkey = output_json['vl']['vi'][0]['fvkey']
            mp4 = output_json['vl']['vi'][0]['cl'].get('ci', None)
            if mp4:
                mp4 = mp4[0]['keyid'].replace('.10', '.p') + '.mp4'
            else:
                mp4 = output_json['vl']['vi'][0]['fn']
-    url = '%s/%s?vkey=%s' % ( url, mp4, fvkey )
+            url = '%s/%s?vkey=%s' % ( parts_prefix, mp4, fvkey )
            _, ext, size = url_info(url, faker=True)

            print_info(site_info, title, ext, size)
--- a/src/you_get/extractors/yixia.py
+++ b/src/you_get/extractors/yixia.py
@ -51,11 +51,11 @@ def yixia_download(url, output_dir = '.', merge = True, info_only = False, **kwa
        yixia_download_by_scid = yixia_miaopai_download_by_scid
        site_info = "Yixia Miaopai"
        
-        if re.match(r'http://www.miaopai.com/show/channel/\w+', url):  #PC
+        if re.match(r'http://www.miaopai.com/show/channel/.+', url):  #PC
            scid = match1(url, r'http://www.miaopai.com/show/channel/(.+)\.htm')
-        elif re.match(r'http://www.miaopai.com/show/\w+', url):  #PC
+        elif re.match(r'http://www.miaopai.com/show/.+', url):  #PC
            scid = match1(url, r'http://www.miaopai.com/show/(.+)\.htm')
-        elif re.match(r'http://m.miaopai.com/show/channel/\w+', url):  #Mobile
+        elif re.match(r'http://m.miaopai.com/show/channel/.+', url):  #Mobile
            scid = match1(url, r'http://m.miaopai.com/show/channel/(.+)\.htm')
    
    elif 'xiaokaxiu.com' in hostname:  #Xiaokaxiu
--- a/src/you_get/extractors/youku.py
+++ b/src/you_get/extractors/youku.py
@ -314,9 +314,6 @@ class Youku(VideoExtractor):
                                q         = q
                            )
                        ksegs += [i['server'] for i in json.loads(get_content(u))]
-
-                        if (parse_host(ksegs[len(ksegs)-1])[0] == "vali.cp31.ott.cibntv.net"):
-                            ksegs.pop(len(ksegs)-1)
            except error.HTTPError as e:
                # Use fallback stream data in case of HTTP 404
                log.e('[Error] ' + str(e))
--- a/src/you_get/extractors/youtube.py
+++ b/src/you_get/extractors/youtube.py
@ -155,6 +155,8 @@ class YouTube(VideoExtractor):
                try:
                    ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+?});', video_page).group(1))
                    self.html5player = 'https:' + ytplayer_config['assets']['js']
+                    # Workaround: get_video_info returns bad s. Why?
+                    stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
                except:
                    self.html5player = None

@ -236,7 +238,7 @@ class YouTube(VideoExtractor):
                    start = '{:0>2}:{:0>2}:{:06.3f}'.format(int(h), int(m), s).replace('.', ',')
                    m, s = divmod(finish, 60); h, m = divmod(m, 60)
                    finish = '{:0>2}:{:0>2}:{:06.3f}'.format(int(h), int(m), s).replace('.', ',')
-                    content = text.firstChild.nodeValue
+                    content = unescape_html(text.firstChild.nodeValue)

                    srt += '%s\n' % str(seq)
                    srt += '%s --> %s\n' % (start, finish)
--- a/src/you_get/processor/ffmpeg.py
+++ b/src/you_get/processor/ffmpeg.py
@ -125,7 +125,7 @@ def ffmpeg_concat_flv_to_mp4(files, output='output.mp4'):

        params = [FFMPEG] + LOGLEVEL + ['-f', 'concat', '-safe', '-1', '-y', '-i']
        params.append(output + '.txt')
-        params += ['-c', 'copy', output]
+        params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc', output]

        subprocess.check_call(params)
        os.remove(output + '.txt')
@ -212,15 +212,6 @@ def ffmpeg_download_stream(files, title, ext, params={}, output_dir='.'):
    if not (output_dir == '.'):
        output = output_dir + '/' + output

-    ffmpeg_params = []
-    #should these exist...
-    if params is not None:
-        if len(params) > 0:
-            for k, v in params:
-                ffmpeg_params.append(k)
-                ffmpeg_params.append(v)
-
-
    print('Downloading streaming content with FFmpeg, press q to stop recording...')
    ffmpeg_params = [FFMPEG] + ['-y', '-re', '-i']
    ffmpeg_params.append(files)  #not the same here!!!!
@ -230,6 +221,12 @@ def ffmpeg_download_stream(files, title, ext, params={}, output_dir='.'):
    else:
        ffmpeg_params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc']

+    if params is not None:
+        if len(params) > 0:
+            for k, v in params:
+                ffmpeg_params.append(k)
+                ffmpeg_params.append(v)
+
    ffmpeg_params.append(output)

    print(' '.join(ffmpeg_params))
--- a/src/you_get/util/fs.py
+++ b/src/you_get/util/fs.py
@ -10,6 +10,7 @@ def legitimize(text, os=platform.system()):
    text = text.translate({
        0: None,
        ord('/'): '-',
+        ord('|'): '-',
    })

    if os == 'Windows':
@ -20,7 +21,6 @@ def legitimize(text, os=platform.system()):
            ord('*'): '-',
            ord('?'): '-',
            ord('\\'): '-',
-            ord('|'): '-',
            ord('\"'): '\'',
            # Reserved in Windows VFAT
            ord('+'): '-',
--- a/src/you_get/version.py
+++ b/src/you_get/version.py
@ -1,4 +1,4 @@
 #!/usr/bin/env python

 script_name = 'you-get'
-__version__ = '0.4.555'
+__version__ = '0.4.595'
--- a/tests/test.py
+++ b/tests/test.py
@ -21,9 +21,6 @@ class YouGetTests(unittest.TestCase):
    def test_mixcloud(self):
        mixcloud.download("http://www.mixcloud.com/DJVadim/north-america-are-you-ready/", info_only=True)

-    def test_vimeo(self):
-        vimeo.download("http://vimeo.com/56810854", info_only=True)
-
    def test_youtube(self):
        youtube.download("http://www.youtube.com/watch?v=pzKerr0JIPA", info_only=True)
        youtube.download("http://youtu.be/pzKerr0JIPA", info_only=True)