From fb97d7d0c3a4b5c4db28b2f08dee82faa914b7c4 Mon Sep 17 00:00:00 2001
From: Bochun Bai <bbc@sinofool.net>
Date: Mon, 3 Oct 2016 00:18:50 +0800
Subject: [PATCH 01/29] Tencent Video use best quality

Prefer 1080p and 720p if available
---
 src/you_get/extractors/qq.py | 75 ++++++++++++++++++++++++++++--------
 1 file changed, 60 insertions(+), 15 deletions(-)

diff --git a/src/you_get/extractors/qq.py b/src/you_get/extractors/qq.py
index 9ca8af82..f1707527 100644
--- a/src/you_get/extractors/qq.py
+++ b/src/you_get/extractors/qq.py
@@ -7,22 +7,67 @@ from .qie import download as qieDownload
 from urllib.parse import urlparse,parse_qs
 
 def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):
-    api = "http://h5vv.video.qq.com/getinfo?otype=json&platform=10901&vid=%s" % vid
-    content = get_html(api)
-    output_json = json.loads(match1(content, r'QZOutputJson=(.*)')[:-1])
-    url = output_json['vl']['vi'][0]['ul']['ui'][0]['url']
-    fvkey = output_json['vl']['vi'][0]['fvkey']
-    mp4 = output_json['vl']['vi'][0]['cl'].get('ci', None)
-    if mp4:
-        mp4 = mp4[0]['keyid'].replace('.10', '.p') + '.mp4'
-    else:
-        mp4 = output_json['vl']['vi'][0]['fn']
-    url = '%s/%s?vkey=%s' % ( url, mp4, fvkey )
-    _, ext, size = url_info(url, faker=True)
+    info_api = 'http://vv.video.qq.com/getinfo?otype=json&appver=3%2E2%2E19%2E333&platform=11&defnpayver=1&vid=' + vid
+    info = get_html(info_api)
+    video_json = json.loads(match1(info, r'QZOutputJson=(.*)')[:-1])
+    parts_vid = video_json['vl']['vi'][0]['vid']
+    parts_ti = video_json['vl']['vi'][0]['ti']
+    parts_prefix = video_json['vl']['vi'][0]['ul']['ui'][0]['url']
+    parts_formats = video_json['fl']['fi']
+    # find best quality
+    # only looking for fhd(1080p) and shd(720p) here.
+    # 480p usually come with a single file, will be downloaded as fallback.
+    best_quality = ''
+    for part_format in parts_formats:
+        if part_format['name'] == 'fhd':
+            best_quality = 'fhd'
+            break
 
-    print_info(site_info, title, ext, size)
-    if not info_only:
-        download_urls([url], title, ext, size, output_dir=output_dir, merge=merge)
+        if part_format['name'] == 'shd':
+            best_quality = 'shd'
+
+    for part_format in parts_formats:
+        if (not best_quality == '') and (not part_format['name'] == best_quality):
+            continue
+        part_format_id = part_format['id']
+        part_format_sl = part_format['sl']
+        if part_format_sl == 0:
+            part_urls= []
+            total_size = 0
+            try:
+                # For fhd(1080p), every part is about 100M and 6 minutes
+                # try 100 parts here limited download longest single video of 10 hours.
+                for part in range(1,100):
+                    filename = vid + '.p' + str(part_format_id % 1000) + '.' + str(part) + '.mp4'
+                    key_api = "http://vv.video.qq.com/getkey?otype=json&platform=11&format=%s&vid=%s&filename=%s" % (part_format_id, parts_vid, filename)
+                    #print(filename)
+                    #print(key_api)
+                    part_info = get_html(key_api)
+                    key_json = json.loads(match1(part_info, r'QZOutputJson=(.*)')[:-1])
+                    #print(key_json)
+                    vkey = key_json['key']
+                    url = '%s/%s?vkey=%s' % (parts_prefix, filename, vkey)
+                    part_urls.append(url)
+                    _, ext, size = url_info(url, faker=True)
+                    total_size += size
+            except:
+                pass
+            print_info(site_info, parts_ti, ext, total_size)
+            if not info_only:
+                download_urls(part_urls, parts_ti, ext, total_size, output_dir=output_dir, merge=merge)
+        else:
+            fvkey = output_json['vl']['vi'][0]['fvkey']
+            mp4 = output_json['vl']['vi'][0]['cl'].get('ci', None)
+            if mp4:
+                mp4 = mp4[0]['keyid'].replace('.10', '.p') + '.mp4'
+            else:
+                mp4 = output_json['vl']['vi'][0]['fn']
+            url = '%s/%s?vkey=%s' % ( parts_prefix, mp4, fvkey )
+            _, ext, size = url_info(url, faker=True)
+
+            print_info(site_info, title, ext, size)
+            if not info_only:
+                download_urls([url], title, ext, size, output_dir=output_dir, merge=merge)
 
 
 def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs):

From 3b3e5cfe38fde46afe0ebf2717802c44e8028706 Mon Sep 17 00:00:00 2001
From: Mort Yao <soi@mort.ninja>
Date: Wed, 5 Oct 2016 14:38:02 +0200
Subject: [PATCH 02/29] update README (close #1422)

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index a99e57fd..1b653308 100644
--- a/README.md
+++ b/README.md
@@ -339,6 +339,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
 | **Tumblr**  | <https://www.tumblr.com/>     |✓|✓|✓|
 | TED         | <http://www.ted.com/>         |✓| | |
 | SoundCloud  | <https://soundcloud.com/>     | | |✓|
+| SHOWROOM    | <https://www.showroom-live.com/> |✓| | |
 | Pinterest   | <https://www.pinterest.com/>  | |✓| |
 | MusicPlayOn | <http://en.musicplayon.com/>  |✓| | |
 | MTV81       | <http://www.mtv81.com/>       |✓| | |

From c9ffae970e3ebc6131c4b4a6593320ab6fe65675 Mon Sep 17 00:00:00 2001
From: Vicent Tsai <vayn@vayn.de>
Date: Mon, 10 Oct 2016 00:13:01 +0800
Subject: [PATCH 03/29] [AcFun] fix #1429

---
 src/you_get/extractors/acfun.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/you_get/extractors/acfun.py b/src/you_get/extractors/acfun.py
index 4638cb8f..87e005fb 100644
--- a/src/you_get/extractors/acfun.py
+++ b/src/you_get/extractors/acfun.py
@@ -73,14 +73,14 @@ def acfun_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
     assert re.match(r'http://[^\.]+.acfun.[^\.]+/\D/\D\D(\d+)', url)
     html = get_html(url)
 
-    title = r1(r'<h1 id="txt-title-view">([^<>]+)<', html)
+    title = r1(r'data-title="([^"]+)"', html)
     title = unescape_html(title)
     title = escape_file_path(title)
     assert title
 
-    video = re.search('data-vid="(\d+)"\s*data-scode=""[^<]*title="([^"]+)"', html)
-    vid = video.group(1)
-    title = title + ' - ' + video.group(2)
+    vid = r1('data-vid="(\d+)"', html)
+    up = r1('data-name="([^"]+)"', html)
+    title = title + ' - ' + up
     acfun_download_by_vid(vid, title,
                           output_dir=output_dir,
                           merge=merge,

From 21fc4d4a0999e70b7886d3abd0cfaa6e4244dbae Mon Sep 17 00:00:00 2001
From: chinat <chinaxiahaifeng@gmail.com>
Date: Mon, 10 Oct 2016 11:28:45 +0800
Subject: [PATCH 04/29] translate char | to - in filename for ffmpeg concat

---
 src/you_get/util/fs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/you_get/util/fs.py b/src/you_get/util/fs.py
index 36e0b29d..4f415bf0 100644
--- a/src/you_get/util/fs.py
+++ b/src/you_get/util/fs.py
@@ -10,6 +10,7 @@ def legitimize(text, os=platform.system()):
     text = text.translate({
         0: None,
         ord('/'): '-',
+        ord('|'): '-',
     })
 
     if os == 'Windows':
@@ -20,7 +21,6 @@ def legitimize(text, os=platform.system()):
             ord('*'): '-',
             ord('?'): '-',
             ord('\\'): '-',
-            ord('|'): '-',
             ord('\"'): '\'',
             # Reserved in Windows VFAT
             ord('+'): '-',

From e2f86641ab5354fa103f29709a21ffdb4cf574f8 Mon Sep 17 00:00:00 2001
From: Mort Yao <soi@mort.ninja>
Date: Fri, 14 Oct 2016 20:32:05 +0200
Subject: [PATCH 05/29] update README: use https

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 1b653308..b994ebd1 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ Interested? [Install it](#installation) now and [get started by examples](#getti
 
 Are you a Python programmer? Then check out [the source](https://github.com/soimort/you-get) and fork it!
 
-![](http://i.imgur.com/GfthFAz.png)
+![](https://i.imgur.com/GfthFAz.png)
 
 ## Installation
 

From 669d7b558655f4ffa530ad24573936f90119ced2 Mon Sep 17 00:00:00 2001
From: Mort Yao <soi@mort.ninja>
Date: Wed, 19 Oct 2016 20:47:17 +0200
Subject: [PATCH 06/29] [youtube] unescape HTML entities, fix #1462

---
 src/you_get/extractors/youtube.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py
index 0ef390ed..33e3923e 100644
--- a/src/you_get/extractors/youtube.py
+++ b/src/you_get/extractors/youtube.py
@@ -236,7 +236,7 @@ class YouTube(VideoExtractor):
                     start = '{:0>2}:{:0>2}:{:06.3f}'.format(int(h), int(m), s).replace('.', ',')
                     m, s = divmod(finish, 60); h, m = divmod(m, 60)
                     finish = '{:0>2}:{:0>2}:{:06.3f}'.format(int(h), int(m), s).replace('.', ',')
-                    content = text.firstChild.nodeValue
+                    content = unescape_html(text.firstChild.nodeValue)
 
                     srt += '%s\n' % str(seq)
                     srt += '%s --> %s\n' % (start, finish)

From 95a8d1e8afdd6df60b3e87ac739cf836b0d0d837 Mon Sep 17 00:00:00 2001
From: Cheng Gu <guchengf@gmail.com>
Date: Thu, 20 Oct 2016 14:19:45 +0800
Subject: [PATCH 07/29] [huomaotv] add huomao.com suppport

---
 src/you_get/common.py              |  1 +
 src/you_get/extractors/huomaotv.py | 36 ++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+)
 create mode 100644 src/you_get/extractors/huomaotv.py

diff --git a/src/you_get/common.py b/src/you_get/common.py
index 7f76aaac..3a60bf12 100755
--- a/src/you_get/common.py
+++ b/src/you_get/common.py
@@ -27,6 +27,7 @@ SITES = {
     'google'           : 'google',
     'heavy-music'      : 'heavymusic',
     'huaban'           : 'huaban',
+    'huomao'           : 'huomaotv',
     'iask'             : 'sina',
     'ifeng'            : 'ifeng',
     'imgur'            : 'imgur',
diff --git a/src/you_get/extractors/huomaotv.py b/src/you_get/extractors/huomaotv.py
new file mode 100644
index 00000000..4852ff06
--- /dev/null
+++ b/src/you_get/extractors/huomaotv.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+
+__all__ = ['huomaotv_download']
+
+from ..common import *
+
+
+def get_mobile_room_url(room_id):
+    return 'http://www.huomao.com/mobile/mob_live?cid=%s' % room_id
+
+
+def get_m3u8_url(stream_id):
+    return 'http://live-ws.huomaotv.cn/live/%s/playlist.m3u8' % stream_id
+
+
+def huomaotv_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
+    room_id_pattern = r'huomao.com/(\d+)'
+    room_id = match1(url, room_id_pattern)
+    html = get_content(get_mobile_room_url(room_id))
+
+    stream_id_pattern = r'id="html_stream" value="(\w+)"'
+    stream_id = match1(html, stream_id_pattern)
+
+    m3u8_url = get_m3u8_url(stream_id)
+
+    title = match1(html, r'<title>([^<]{1,9999})</title>')
+
+    print_info(site_info, title, 'm3u8', float('inf'))
+
+    if not info_only:
+        download_url_ffmpeg(m3u8_url, title, 'm3u8', None, output_dir=output_dir, merge=merge)
+
+
+site_info = 'huomao.com'
+download = huomaotv_download
+download_playlist = playlist_not_supported('huomao')

From 78fa1161310216e102f84f950b01d43c08899550 Mon Sep 17 00:00:00 2001
From: liushuyu <liushuyu011@gmail.com>
Date: Fri, 14 Oct 2016 00:03:56 -0600
Subject: [PATCH 08/29] Add POST method to common.py

---
 src/you_get/common.py | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/src/you_get/common.py b/src/you_get/common.py
index 6c65bd49..3e152732 100755
--- a/src/you_get/common.py
+++ b/src/you_get/common.py
@@ -327,6 +327,45 @@ def get_content(url, headers={}, decoded=True):
 
     return data
 
+def post_content(url, headers={}, post_data={}, decoded=True):
+    """Post the content of a URL via sending a HTTP POST request.
+
+    Args:
+        url: A URL.
+        headers: Request headers used by the client.
+        decoded: Whether decode the response body using UTF-8 or the charset specified in Content-Type.
+
+    Returns:
+        The content as a string.
+    """
+
+    logging.debug('post_content: %s \n post_data: %s' % (url, post_data))
+
+    req = request.Request(url, headers=headers)
+    if cookies:
+        cookies.add_cookie_header(req)
+        req.headers.update(req.unredirected_hdrs)
+    post_data_enc = bytes(parse.urlencode(post_data), 'utf-8')
+    response = request.urlopen(req, data = post_data_enc)
+    data = response.read()
+
+    # Handle HTTP compression for gzip and deflate (zlib)
+    content_encoding = response.getheader('Content-Encoding')
+    if content_encoding == 'gzip':
+        data = ungzip(data)
+    elif content_encoding == 'deflate':
+        data = undeflate(data)
+
+    # Decode the response body
+    if decoded:
+        charset = match1(response.getheader('Content-Type'), r'charset=([\w-]+)')
+        if charset is not None:
+            data = data.decode(charset)
+        else:
+            data = data.decode('utf-8')
+
+    return data
+
 def url_size(url, faker = False, headers = {}):
     if faker:
         response = request.urlopen(request.Request(url, headers = fake_headers), None)

From aef17dcb9926f9e2d056a92796dcf58537a42d63 Mon Sep 17 00:00:00 2001
From: liushuyu <liushuyu011@gmail.com>
Date: Fri, 14 Oct 2016 00:03:56 -0600
Subject: [PATCH 09/29] Add Baidu Cloud support

---
 src/you_get/extractors/baidu.py | 225 ++++++++++++++++++++++++++++----
 1 file changed, 197 insertions(+), 28 deletions(-)
 mode change 100755 => 100644 src/you_get/extractors/baidu.py

diff --git a/src/you_get/extractors/baidu.py b/src/you_get/extractors/baidu.py
old mode 100755
new mode 100644
index aa9caa0c..d5efaf0b
--- a/src/you_get/extractors/baidu.py
+++ b/src/you_get/extractors/baidu.py
@@ -7,8 +7,10 @@ from ..common import *
 from .embed import *
 from .universal import *
 
+
 def baidu_get_song_data(sid):
-    data = json.loads(get_html('http://music.baidu.com/data/music/fmlink?songIds=%s' % sid, faker = True))['data']
+    data = json.loads(get_html(
+        'http://music.baidu.com/data/music/fmlink?songIds=%s' % sid, faker=True))['data']
 
     if data['xcode'] != '':
         # inside china mainland
@@ -17,22 +19,28 @@ def baidu_get_song_data(sid):
         # outside china mainland
         return None
 
+
 def baidu_get_song_url(data):
     return data['songLink']
 
+
 def baidu_get_song_artist(data):
     return data['artistName']
 
+
 def baidu_get_song_album(data):
     return data['albumName']
 
+
 def baidu_get_song_title(data):
     return data['songName']
 
+
 def baidu_get_song_lyric(data):
     lrc = data['lrcLink']
     return None if lrc is '' else "http://music.baidu.com%s" % lrc
 
+
 def baidu_download_song(sid, output_dir='.', merge=True, info_only=False):
     data = baidu_get_song_data(sid)
     if data is not None:
@@ -51,7 +59,8 @@ def baidu_download_song(sid, output_dir='.', merge=True, info_only=False):
     type, ext, size = url_info(url, faker=True)
     print_info(site_info, title, type, size)
     if not info_only:
-        download_urls([url], file_name, ext, size, output_dir, merge=merge, faker=True)
+        download_urls([url], file_name, ext, size,
+                      output_dir, merge=merge, faker=True)
 
     try:
         type, ext, size = url_info(lrc, faker=True)
@@ -61,12 +70,14 @@ def baidu_download_song(sid, output_dir='.', merge=True, info_only=False):
     except:
         pass
 
-def baidu_download_album(aid, output_dir = '.', merge = True, info_only = False):
-    html = get_html('http://music.baidu.com/album/%s' % aid, faker = True)
+
+def baidu_download_album(aid, output_dir='.', merge=True, info_only=False):
+    html = get_html('http://music.baidu.com/album/%s' % aid, faker=True)
     album_name = r1(r'<h2 class="album-name">(.+?)<\/h2>', html)
     artist = r1(r'<span class="author_list" title="(.+?)">', html)
     output_dir = '%s/%s - %s' % (output_dir, artist, album_name)
-    ids = json.loads(r1(r'<span class="album-add" data-adddata=\'(.+?)\'>', html).replace('&quot', '').replace(';', '"'))['ids']
+    ids = json.loads(r1(r'<span class="album-add" data-adddata=\'(.+?)\'>',
+                        html).replace('&quot', '').replace(';', '"'))['ids']
     track_nr = 1
     for id in ids:
         song_data = baidu_get_song_data(id)
@@ -75,38 +86,29 @@ def baidu_download_album(aid, output_dir = '.', merge = True, info_only = False)
         song_lrc = baidu_get_song_lyric(song_data)
         file_name = '%02d.%s' % (track_nr, song_title)
 
-        type, ext, size = url_info(song_url, faker = True)
+        type, ext, size = url_info(song_url, faker=True)
         print_info(site_info, song_title, type, size)
         if not info_only:
-            download_urls([song_url], file_name, ext, size, output_dir, merge = merge, faker = True)
+            download_urls([song_url], file_name, ext, size,
+                          output_dir, merge=merge, faker=True)
 
         if song_lrc:
-            type, ext, size = url_info(song_lrc, faker = True)
+            type, ext, size = url_info(song_lrc, faker=True)
             print_info(site_info, song_title, type, size)
             if not info_only:
-                download_urls([song_lrc], file_name, ext, size, output_dir, faker = True)
+                download_urls([song_lrc], file_name, ext,
+                              size, output_dir, faker=True)
 
         track_nr += 1
 
-def baidu_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False, **kwargs):
-    if re.match(r'http://imgsrc.baidu.com', url):
-        universal_download(url, output_dir, merge=merge, info_only=info_only)
-        return
 
-    elif re.match(r'http://pan.baidu.com', url):
-        html = get_html(url)
+def baidu_download(url, output_dir='.', stream_type=None, merge=True, info_only=False, **kwargs):
 
-        title = r1(r'server_filename="([^"]+)"', html)
-        if len(title.split('.')) > 1:
-            title = ".".join(title.split('.')[:-1])
-
-        real_url = r1(r'\\"dlink\\":\\"([^"]*)\\"', html).replace('\\\\/', '/')
-        type, ext, size = url_info(real_url, faker = True)
-
-        print_info(site_info, title, ext, size)
+    if re.match(r'http://pan.baidu.com', url):
+        real_url, title, ext, size = baidu_pan_download(url)
         if not info_only:
-            download_urls([real_url], title, ext, size, output_dir, merge = merge)
-
+            download_urls([real_url], title, ext, size,
+                          output_dir, url, merge=merge, faker=True)
     elif re.match(r'http://music.baidu.com/album/\d+', url):
         id = r1(r'http://music.baidu.com/album/(\d+)', url)
         baidu_download_album(id, output_dir, merge, info_only)
@@ -124,17 +126,20 @@ def baidu_download(url, output_dir = '.', stream_type = None, merge = True, info
             html = get_html(url)
             title = r1(r'title:"([^"]+)"', html)
 
-            items = re.findall(r'//imgsrc.baidu.com/forum/w[^"]+/([^/"]+)', html)
+            items = re.findall(
+                r'//imgsrc.baidu.com/forum/w[^"]+/([^/"]+)', html)
             urls = ['http://imgsrc.baidu.com/forum/pic/item/' + i
                     for i in set(items)]
 
             # handle albums
             kw = r1(r'kw=([^&]+)', html) or r1(r"kw:'([^']+)'", html)
             tid = r1(r'tid=(\d+)', html) or r1(r"tid:'([^']+)'", html)
-            album_url = 'http://tieba.baidu.com/photo/g/bw/picture/list?kw=%s&tid=%s' % (kw, tid)
+            album_url = 'http://tieba.baidu.com/photo/g/bw/picture/list?kw=%s&tid=%s' % (
+                kw, tid)
             album_info = json.loads(get_content(album_url))
             for i in album_info['data']['pic_list']:
-                urls.append('http://imgsrc.baidu.com/forum/pic/item/' + i['pic_id'] + '.jpg')
+                urls.append(
+                    'http://imgsrc.baidu.com/forum/pic/item/' + i['pic_id'] + '.jpg')
 
             ext = 'jpg'
             size = float('Inf')
@@ -144,6 +149,170 @@ def baidu_download(url, output_dir = '.', stream_type = None, merge = True, info
                 download_urls(urls, title, ext, size,
                               output_dir=output_dir, merge=False)
 
+
+def baidu_pan_download(url):
+    errno_patt = r'errno":([^"]+),'
+    refer_url = ""
+    fake_headers = {
+        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+        'Accept-Charset': 'UTF-8,*;q=0.5',
+        'Accept-Encoding': 'gzip,deflate,sdch',
+        'Accept-Language': 'en-US,en;q=0.8',
+        'Host': 'pan.baidu.com',
+        'Origin': 'http://pan.baidu.com',
+        'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:13.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2500.0 Safari/537.36',
+        'Referer': refer_url
+    }
+    if cookies:
+        print('Use user specified cookies')
+    else:
+        print('Generating cookies...')
+        fake_headers['Cookie'] = baidu_pan_gen_cookies(url)
+    refer_url = "http://pan.baidu.com"
+    html = get_content(url, fake_headers, decoded=True)
+    isprotected = False
+    sign, timestamp, bdstoken, appid, primary_id, fs_id, uk = baidu_pan_parse(
+        html)
+    if sign == None:
+        if re.findall(r'\baccess-code\b', html):
+            isprotected = True
+            sign, timestamp, bdstoken, appid, primary_id, fs_id, uk, fake_headers, psk = baidu_pan_protected_share(
+                url)
+            # raise NotImplementedError("Password required!")
+        if isprotected != True:
+            raise AssertionError("Share not found or canceled: %s" % url)
+    if bdstoken == None:
+        bdstoken = ""
+    if isprotected != True:
+        sign, timestamp, bdstoken, appid, primary_id, fs_id, uk = baidu_pan_parse(
+            html)
+    request_url = "http://pan.baidu.com/api/sharedownload?sign=%s&timestamp=%s&bdstoken=%s&channel=chunlei&clienttype=0&web=1&app_id=%s" % (
+        sign, timestamp, bdstoken, appid)
+    refer_url = url
+    post_data = {
+        'encrypt': 0,
+        'product': 'share',
+        'uk': uk,
+        'primaryid': primary_id,
+        'fid_list': '[' + fs_id + ']'
+    }
+    if isprotected == True:
+        post_data['sekey'] = psk
+    response_content = post_content(request_url, fake_headers, post_data, True)
+    errno = match1(response_content, errno_patt)
+    if errno != "0":
+        raise AssertionError(
+            "Server refused to provide download link! (Errno:%s)" % errno)
+    real_url = r1(r'dlink":"([^"]+)"', response_content).replace('\\/', '/')
+    title = r1(r'server_filename":"([^"]+)"', response_content)
+    assert real_url
+    type, ext, size = url_info(real_url, faker=True)
+    title_wrapped = json.loads('{"wrapper":"%s"}' % title)
+    title = title_wrapped['wrapper']
+    logging.debug(real_url)
+    print_info(site_info, title, ext, size)
+    print('Hold on...')
+    time.sleep(5)
+    return real_url, title, ext, size
+
+
+def baidu_pan_parse(html):
+    sign_patt = r'sign":"([^"]+)"'
+    timestamp_patt = r'timestamp":([^"]+),'
+    appid_patt = r'app_id":"([^"]+)"'
+    bdstoken_patt = r'bdstoken":"([^"]+)"'
+    fs_id_patt = r'fs_id":([^"]+),'
+    uk_patt = r'uk":([^"]+),'
+    errno_patt = r'errno":([^"]+),'
+    primary_id_patt = r'shareid":([^"]+),'
+    sign = match1(html, sign_patt)
+    timestamp = match1(html, timestamp_patt)
+    appid = match1(html, appid_patt)
+    bdstoken = match1(html, bdstoken_patt)
+    fs_id = match1(html, fs_id_patt)
+    uk = match1(html, uk_patt)
+    primary_id = match1(html, primary_id_patt)
+    return sign, timestamp, bdstoken, appid, primary_id, fs_id, uk
+
+
+def baidu_pan_gen_cookies(url, post_data=None):
+    from http import cookiejar
+    cookiejar = cookiejar.CookieJar()
+    opener = request.build_opener(request.HTTPCookieProcessor(cookiejar))
+    resp = opener.open('http://pan.baidu.com')
+    if post_data != None:
+        resp = opener.open(url, bytes(parse.urlencode(post_data), 'utf-8'))
+    return cookjar2hdr(cookiejar)
+
+
+def baidu_pan_protected_share(url):
+    print('This share is protected by password!')
+    inpwd = input('Please provide unlock password: ')
+    inpwd = inpwd.replace(' ', '').replace('\t', '')
+    print('Please wait...')
+    post_pwd = {
+        'pwd': inpwd,
+        'vcode': None,
+        'vstr': None
+    }
+    from http import cookiejar
+    import time
+    cookiejar = cookiejar.CookieJar()
+    opener = request.build_opener(request.HTTPCookieProcessor(cookiejar))
+    resp = opener.open('http://pan.baidu.com')
+    resp = opener.open(url)
+    init_url = resp.geturl()
+    verify_url = 'http://pan.baidu.com/share/verify?%s&t=%s&channel=chunlei&clienttype=0&web=1' % (
+        init_url.split('?', 1)[1], int(time.time()))
+    refer_url = init_url
+    fake_headers = {
+        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+        'Accept-Charset': 'UTF-8,*;q=0.5',
+        'Accept-Encoding': 'gzip,deflate,sdch',
+        'Accept-Language': 'en-US,en;q=0.8',
+        'Host': 'pan.baidu.com',
+        'Origin': 'http://pan.baidu.com',
+        'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:13.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2500.0 Safari/537.36',
+        'Referer': refer_url
+    }
+    opener.addheaders = dict2triplet(fake_headers)
+    pwd_resp = opener.open(verify_url, bytes(
+        parse.urlencode(post_pwd), 'utf-8'))
+    pwd_resp_str = ungzip(pwd_resp.read()).decode('utf-8')
+    pwd_res = json.loads(pwd_resp_str)
+    if pwd_res['errno'] != 0:
+        raise AssertionError(
+            'Server returned an error: %s (Incorrect password?)' % pwd_res['errno'])
+    pg_resp = opener.open('http://pan.baidu.com/share/link?%s' %
+                          init_url.split('?', 1)[1])
+    content = ungzip(pg_resp.read()).decode('utf-8')
+    sign, timestamp, bdstoken, appid, primary_id, fs_id, uk = baidu_pan_parse(
+        content)
+    psk = query_cookiejar(cookiejar, 'BDCLND')
+    psk = parse.unquote(psk)
+    fake_headers['Cookie'] = cookjar2hdr(cookiejar)
+    return sign, timestamp, bdstoken, appid, primary_id, fs_id, uk, fake_headers, psk
+
+
+def cookjar2hdr(cookiejar):
+    cookie_str = ''
+    for i in cookiejar:
+        cookie_str = cookie_str + i.name + '=' + i.value + ';'
+    return cookie_str[:-1]
+
+
+def query_cookiejar(cookiejar, name):
+    for i in cookiejar:
+        if i.name == name:
+            return i.value
+
+
+def dict2triplet(dictin):
+    out_triplet = []
+    for i in dictin:
+        out_triplet.append((i, dictin[i]))
+    return out_triplet
+
 site_info = "Baidu.com"
 download = baidu_download
 download_playlist = playlist_not_supported("baidu")

From 4bbafeb9e48e76b7b622f2133685905b362a9096 Mon Sep 17 00:00:00 2001
From: liushuyu <liushuyu011@gmail.com>
Date: Thu, 20 Oct 2016 13:09:30 -0600
Subject: [PATCH 10/29] icourse: add supprt

---
 src/you_get/common.py              |   1 +
 src/you_get/extractors/__init__.py |   1 +
 src/you_get/extractors/icourses.py | 129 +++++++++++++++++++++++++++++
 3 files changed, 131 insertions(+)
 create mode 100644 src/you_get/extractors/icourses.py

diff --git a/src/you_get/common.py b/src/you_get/common.py
index 948b0ca2..ca867673 100755
--- a/src/you_get/common.py
+++ b/src/you_get/common.py
@@ -29,6 +29,7 @@ SITES = {
     'huaban'           : 'huaban',
     'huomao'           : 'huomaotv',
     'iask'             : 'sina',
+    'icourses'         : 'icourses',
     'ifeng'            : 'ifeng',
     'imgur'            : 'imgur',
     'in'               : 'alive',
diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py
index e69bc2fd..61b6a0d1 100755
--- a/src/you_get/extractors/__init__.py
+++ b/src/you_get/extractors/__init__.py
@@ -24,6 +24,7 @@ from .funshion import *
 from .google import *
 from .heavymusic import *
 from .huaban import *
+from .icourses import *
 from .ifeng import *
 from .imgur import *
 from .infoq import *
diff --git a/src/you_get/extractors/icourses.py b/src/you_get/extractors/icourses.py
new file mode 100644
index 00000000..5f9b8edf
--- /dev/null
+++ b/src/you_get/extractors/icourses.py
@@ -0,0 +1,129 @@
+#!/usr/bin/env python
+from ..common import *
+from urllib import parse
+import xml.etree.ElementTree as ET
+import datetime
+import hashlib
+import base64
+import logging
+from urllib import error
+import re
+
+__all__ = ['icourses_download']
+
+
+def icourses_download(url, info_only, merge=False, output_dir='.', **kwargs):
+    title, real_url = icourses_cn_url_parser(
+        url, info_only=info_only, **kwargs)
+    if real_url is not None:
+        for tries in range(0, 3):
+            try:
+                _, type_, size = url_info(real_url, faker=True)
+                break
+            except error.HTTPError:
+                logging.warning('Failed to fetch the video file! Retrying...')
+                title, real_url = icourses_cn_url_parser(url)
+        print_info(site_info, title, type_, size)
+        if not info_only:
+            download_urls([real_url], title, 'flv',
+                          total_size=size, output_dir=output_dir, refer=url, merge=merge, faker=True)
+
+
+def icourses_playlist_download(url, **kwargs):
+    import random
+    from time import sleep
+    html = get_content(url)
+    page_type_patt = r'showSectionNode\(this,(\d+),(\d+)\)'
+    video_js_number = r'changeforvideo\((.*?)\)'
+    fs_flag = r'<input type="hidden" value=(\w+) id="firstShowFlag">'
+    page_navi_vars = re.search(pattern=page_type_patt, string=html)
+    dummy_page = 'http://www.icourses.cn/jpk/viewCharacterDetail.action?sectionId={}&courseId={}'.format(
+        page_navi_vars.group(2), page_navi_vars.group(1))
+    html = get_content(dummy_page)
+    fs_status = match1(html, fs_flag)
+    video_list = re.findall(pattern=video_js_number, string=html)
+    for video in video_list:
+        video_args = video.replace('\'', '').split(',')
+        video_url = 'http://www.icourses.cn/jpk/changeforVideo.action?resId={}&courseId={}&firstShowFlag={}'.format(
+            video_args[0], video_args[1], fs_status or '1')
+        sleep(random.Random().randint(0, 5))  # Prevent from blockage
+        icourses_download(url=video_url, **kwargs)
+
+
+def icourses_cn_url_parser(url, **kwargs):
+    PLAYER_BASE_VER = '150606-1'
+    ENCRYPT_MOD_VER = '151020'
+    ENCRYPT_SALT = '3DAPmXsZ4o'  # It took really long time to find this...
+    html = get_content(url)
+    if re.search(pattern=r'showSectionNode\(.*\)', string=html):
+        logging.warning('Switching to playlist mode!')
+        return icourses_playlist_download(url, **kwargs)
+    flashvars_patt = r'var\ flashvars\=((.|\n)*)};'
+    server_time_patt = r'MPlayer.swf\?v\=(\d+)'
+    uuid_patt = r'uuid:(\d+)'
+    other_args_patt = r'other:"(.*)"'
+    res_url_patt = r'IService:\'([^\']+)'
+    title_a_patt = r'<div class="con"> <a.*?>(.*?)</a>'
+    title_b_patt = r'<div class="con"> <a.*?/a>((.|\n)*?)</div>'
+    title_a = match1(html, title_a_patt).strip()
+    title_b = match1(html, title_b_patt).strip()
+    title = title_a + title_b  # WIP, FIXME
+    title = re.sub('( +|\n|\t|\r|\&nbsp\;)', '',
+                   unescape_html(title).replace(' ', ''))
+    server_time = match1(html, server_time_patt)
+    flashvars = match1(html, flashvars_patt)
+    uuid = match1(flashvars, uuid_patt)
+    other_args = match1(flashvars, other_args_patt)
+    res_url = match1(flashvars, res_url_patt)
+    url_parts = {'v': server_time, 'other': other_args,
+                 'uuid': uuid, 'IService': res_url}
+    req_url = '%s?%s' % (res_url, parse.urlencode(url_parts))
+    logging.debug('Requesting video resource location...')
+    xml_resp = get_html(req_url)
+    xml_obj = ET.fromstring(xml_resp)
+    logging.debug('The result was {}'.format(xml_obj.get('status')))
+    if xml_obj.get('status') != 'success':
+        raise ValueError('Server returned error!')
+    common_args = {'lv': PLAYER_BASE_VER, 'ls': 'play',
+                   'lt': datetime.datetime.now().strftime('%m-%d/%H:%M:%S'),
+                   'start': 0}
+    media_host = xml_obj.find(".//*[@name='host']").text
+    media_url = media_host + xml_obj.find(".//*[@name='url']").text
+    # This is what they called `SSLModule`... But obviously, just a kind of
+    # encryption, takes absolutely no effect in protecting data intergrity
+    if xml_obj.find(".//*[@name='ssl']").text != 'true':
+        logging.debug('The encryption mode is disabled')
+        # when the so-called `SSLMode` is not activated, the parameters, `h`
+        # and `p` can be found in response
+        arg_h = xml_obj.find(".//*[@name='h']").text
+        assert arg_h
+        arg_r = xml_obj.find(".//*[@name='p']").text or ENCRYPT_MOD_VER
+        url_args = common_args.copy()
+        url_args.update({'h': arg_h, 'r': arg_r})
+        final_url = '{}?{}'.format(
+            media_url, parse.urlencode(url_args))
+        return title, final_url
+    # when the `SSLMode` is activated, we need to receive the timestamp and the
+    # time offset (?) value from the server
+    logging.debug('The encryption mode is in effect')
+    ssl_callback = get_html('{}/ssl/ssl.shtml'.format(media_host)).split(',')
+    ssl_timestamp = int(datetime.datetime.strptime(
+        ssl_callback[1], "%b %d %H:%M:%S %Y").timestamp() + int(ssl_callback[0]))
+    sign_this = ENCRYPT_SALT + \
+        parse.urlparse(media_url).path + str(ssl_timestamp)
+    arg_h = base64.b64encode(hashlib.md5(bytes(sign_this, 'utf-8')).digest())
+    # Post-processing, may subject to change, so leaving this alone...
+    arg_h = arg_h.decode('utf-8').strip('=').replace('+',
+                                                     '-').replace('/', '_')
+    arg_r = ssl_timestamp
+    url_args = common_args.copy()
+    url_args.update({'h': arg_h, 'r': arg_r, 'p': ENCRYPT_MOD_VER})
+    final_url = '{}?{}'.format(
+        media_url, parse.urlencode(url_args))
+    logging.debug('Concat`ed URL: {}'.format(final_url))
+    return title, final_url
+
+
+site_info = 'icourses.cn'
+download = icourses_download
+download_playlist = icourses_playlist_download

From 5351121186c2c8c94bc7b24419ea5ca305582462 Mon Sep 17 00:00:00 2001
From: liushuyu <liushuyu011@gmail.com>
Date: Tue, 25 Oct 2016 12:52:30 -0600
Subject: [PATCH 11/29] icouses: Code clean up

---
 src/you_get/extractors/icourses.py | 197 +++++++++++++++--------------
 1 file changed, 105 insertions(+), 92 deletions(-)

diff --git a/src/you_get/extractors/icourses.py b/src/you_get/extractors/icourses.py
index 5f9b8edf..5c2f8cda 100644
--- a/src/you_get/extractors/icourses.py
+++ b/src/you_get/extractors/icourses.py
@@ -13,8 +13,9 @@ __all__ = ['icourses_download']
 
 
 def icourses_download(url, info_only, merge=False, output_dir='.', **kwargs):
-    title, real_url = icourses_cn_url_parser(
-        url, info_only=info_only, **kwargs)
+    icourses_parser = ICousesExactor(url=url)
+    real_url = icourses_parser.icourses_cn_url_parser(**kwargs)
+    title = icourses_parser.title
     if real_url is not None:
         for tries in range(0, 3):
             try:
@@ -22,108 +23,120 @@ def icourses_download(url, info_only, merge=False, output_dir='.', **kwargs):
                 break
             except error.HTTPError:
                 logging.warning('Failed to fetch the video file! Retrying...')
-                title, real_url = icourses_cn_url_parser(url)
+                real_url = icourses_parser.icourses_cn_url_parser()
+                title = icourses_parser.title
         print_info(site_info, title, type_, size)
         if not info_only:
             download_urls([real_url], title, 'flv',
                           total_size=size, output_dir=output_dir, refer=url, merge=merge, faker=True)
 
 
-def icourses_playlist_download(url, **kwargs):
-    import random
-    from time import sleep
-    html = get_content(url)
-    page_type_patt = r'showSectionNode\(this,(\d+),(\d+)\)'
-    video_js_number = r'changeforvideo\((.*?)\)'
-    fs_flag = r'<input type="hidden" value=(\w+) id="firstShowFlag">'
-    page_navi_vars = re.search(pattern=page_type_patt, string=html)
-    dummy_page = 'http://www.icourses.cn/jpk/viewCharacterDetail.action?sectionId={}&courseId={}'.format(
-        page_navi_vars.group(2), page_navi_vars.group(1))
-    html = get_content(dummy_page)
-    fs_status = match1(html, fs_flag)
-    video_list = re.findall(pattern=video_js_number, string=html)
-    for video in video_list:
-        video_args = video.replace('\'', '').split(',')
-        video_url = 'http://www.icourses.cn/jpk/changeforVideo.action?resId={}&courseId={}&firstShowFlag={}'.format(
-            video_args[0], video_args[1], fs_status or '1')
-        sleep(random.Random().randint(0, 5))  # Prevent from blockage
-        icourses_download(url=video_url, **kwargs)
+# Why not using VideoExtractor: This site needs specical download method
+class ICousesExactor(object):
 
+    def __init__(self, url):
+        self.url = url
+        self.title = ''
+        return
 
-def icourses_cn_url_parser(url, **kwargs):
-    PLAYER_BASE_VER = '150606-1'
-    ENCRYPT_MOD_VER = '151020'
-    ENCRYPT_SALT = '3DAPmXsZ4o'  # It took really long time to find this...
-    html = get_content(url)
-    if re.search(pattern=r'showSectionNode\(.*\)', string=html):
-        logging.warning('Switching to playlist mode!')
-        return icourses_playlist_download(url, **kwargs)
-    flashvars_patt = r'var\ flashvars\=((.|\n)*)};'
-    server_time_patt = r'MPlayer.swf\?v\=(\d+)'
-    uuid_patt = r'uuid:(\d+)'
-    other_args_patt = r'other:"(.*)"'
-    res_url_patt = r'IService:\'([^\']+)'
-    title_a_patt = r'<div class="con"> <a.*?>(.*?)</a>'
-    title_b_patt = r'<div class="con"> <a.*?/a>((.|\n)*?)</div>'
-    title_a = match1(html, title_a_patt).strip()
-    title_b = match1(html, title_b_patt).strip()
-    title = title_a + title_b  # WIP, FIXME
-    title = re.sub('( +|\n|\t|\r|\&nbsp\;)', '',
-                   unescape_html(title).replace(' ', ''))
-    server_time = match1(html, server_time_patt)
-    flashvars = match1(html, flashvars_patt)
-    uuid = match1(flashvars, uuid_patt)
-    other_args = match1(flashvars, other_args_patt)
-    res_url = match1(flashvars, res_url_patt)
-    url_parts = {'v': server_time, 'other': other_args,
-                 'uuid': uuid, 'IService': res_url}
-    req_url = '%s?%s' % (res_url, parse.urlencode(url_parts))
-    logging.debug('Requesting video resource location...')
-    xml_resp = get_html(req_url)
-    xml_obj = ET.fromstring(xml_resp)
-    logging.debug('The result was {}'.format(xml_obj.get('status')))
-    if xml_obj.get('status') != 'success':
-        raise ValueError('Server returned error!')
-    common_args = {'lv': PLAYER_BASE_VER, 'ls': 'play',
-                   'lt': datetime.datetime.now().strftime('%m-%d/%H:%M:%S'),
-                   'start': 0}
-    media_host = xml_obj.find(".//*[@name='host']").text
-    media_url = media_host + xml_obj.find(".//*[@name='url']").text
-    # This is what they called `SSLModule`... But obviously, just a kind of
-    # encryption, takes absolutely no effect in protecting data intergrity
-    if xml_obj.find(".//*[@name='ssl']").text != 'true':
-        logging.debug('The encryption mode is disabled')
-        # when the so-called `SSLMode` is not activated, the parameters, `h`
-        # and `p` can be found in response
-        arg_h = xml_obj.find(".//*[@name='h']").text
-        assert arg_h
-        arg_r = xml_obj.find(".//*[@name='p']").text or ENCRYPT_MOD_VER
+    def icourses_playlist_download(self, **kwargs):
+        import random
+        from time import sleep
+        html = get_content(url)
+        page_type_patt = r'showSectionNode\(this,(\d+),(\d+)\)'
+        video_js_number = r'changeforvideo\((.*?)\)'
+        fs_flag = r'<input type="hidden" value=(\w+) id="firstShowFlag">'
+        page_navi_vars = re.search(pattern=page_type_patt, string=html)
+        dummy_page = 'http://www.icourses.cn/jpk/viewCharacterDetail.action?sectionId={}&courseId={}'.format(
+            page_navi_vars.group(2), page_navi_vars.group(1))
+        html = get_content(dummy_page)
+        fs_status = match1(html, fs_flag)
+        video_list = re.findall(pattern=video_js_number, string=html)
+        for video in video_list:
+            video_args = video.replace('\'', '').split(',')
+            video_url = 'http://www.icourses.cn/jpk/changeforVideo.action?resId={}&courseId={}&firstShowFlag={}'.format(
+                video_args[0], video_args[1], fs_status or '1')
+            sleep(random.Random().randint(0, 5))  # Prevent from blockage
+            icourses_download(video_url, **kwargs)
+
+    def icourses_cn_url_parser(self, **kwargs):
+        PLAYER_BASE_VER = '150606-1'
+        ENCRYPT_MOD_VER = '151020'
+        ENCRYPT_SALT = '3DAPmXsZ4o'  # It took really long time to find this...
+        html = get_content(self.url)
+        if re.search(pattern=r'showSectionNode\(.*\)', string=html):
+            logging.warning('Switching to playlist mode!')
+            return self.icourses_playlist_download(**kwargs)
+        flashvars_patt = r'var\ flashvars\=((.|\n)*)};'
+        server_time_patt = r'MPlayer.swf\?v\=(\d+)'
+        uuid_patt = r'uuid:(\d+)'
+        other_args_patt = r'other:"(.*)"'
+        res_url_patt = r'IService:\'([^\']+)'
+        title_a_patt = r'<div class="con"> <a.*?>(.*?)</a>'
+        title_b_patt = r'<div class="con"> <a.*?/a>((.|\n)*?)</div>'
+        title_a = match1(html, title_a_patt).strip()
+        title_b = match1(html, title_b_patt).strip()
+        title = title_a + title_b  # WIP, FIXME
+        title = re.sub('( +|\n|\t|\r|\&nbsp\;)', '',
+                       unescape_html(title).replace(' ', ''))
+        server_time = match1(html, server_time_patt)
+        flashvars = match1(html, flashvars_patt)
+        uuid = match1(flashvars, uuid_patt)
+        other_args = match1(flashvars, other_args_patt)
+        res_url = match1(flashvars, res_url_patt)
+        url_parts = {'v': server_time, 'other': other_args,
+                     'uuid': uuid, 'IService': res_url}
+        req_url = '%s?%s' % (res_url, parse.urlencode(url_parts))
+        logging.debug('Requesting video resource location...')
+        xml_resp = get_html(req_url)
+        xml_obj = ET.fromstring(xml_resp)
+        logging.debug('The result was {}'.format(xml_obj.get('status')))
+        if xml_obj.get('status') != 'success':
+            raise ValueError('Server returned error!')
+        common_args = {'lv': PLAYER_BASE_VER, 'ls': 'play',
+                       'lt': datetime.datetime.now().strftime('%m-%d/%H:%M:%S'),
+                       'start': 0}
+        media_host = xml_obj.find(".//*[@name='host']").text
+        media_url = media_host + xml_obj.find(".//*[@name='url']").text
+        # This is what they called `SSLModule`... But obviously, just a kind of
+        # encryption, takes absolutely no effect in protecting data intergrity
+        if xml_obj.find(".//*[@name='ssl']").text != 'true':
+            logging.debug('The encryption mode is disabled')
+            # when the so-called `SSLMode` is not activated, the parameters, `h`
+            # and `p` can be found in response
+            arg_h = xml_obj.find(".//*[@name='h']").text
+            assert arg_h
+            arg_r = xml_obj.find(".//*[@name='p']").text or ENCRYPT_MOD_VER
+            url_args = common_args.copy()
+            url_args.update({'h': arg_h, 'r': arg_r})
+            final_url = '{}?{}'.format(
+                media_url, parse.urlencode(url_args))
+            self.title = title
+            return final_url
+        # when the `SSLMode` is activated, we need to receive the timestamp and the
+        # time offset (?) value from the server
+        logging.debug('The encryption mode is in effect')
+        ssl_callback = get_html(
+            '{}/ssl/ssl.shtml'.format(media_host)).split(',')
+        ssl_timestamp = int(datetime.datetime.strptime(
+            ssl_callback[1], "%b %d %H:%M:%S %Y").timestamp() + int(ssl_callback[0]))
+        sign_this = ENCRYPT_SALT + \
+            parse.urlparse(media_url).path + str(ssl_timestamp)
+        arg_h = base64.b64encode(hashlib.md5(
+            bytes(sign_this, 'utf-8')).digest())
+        # Post-processing, may subject to change, so leaving this alone...
+        arg_h = arg_h.decode('utf-8').strip('=').replace('+',
+                                                         '-').replace('/', '_')
+        arg_r = ssl_timestamp
         url_args = common_args.copy()
-        url_args.update({'h': arg_h, 'r': arg_r})
+        url_args.update({'h': arg_h, 'r': arg_r, 'p': ENCRYPT_MOD_VER})
         final_url = '{}?{}'.format(
             media_url, parse.urlencode(url_args))
-        return title, final_url
-    # when the `SSLMode` is activated, we need to receive the timestamp and the
-    # time offset (?) value from the server
-    logging.debug('The encryption mode is in effect')
-    ssl_callback = get_html('{}/ssl/ssl.shtml'.format(media_host)).split(',')
-    ssl_timestamp = int(datetime.datetime.strptime(
-        ssl_callback[1], "%b %d %H:%M:%S %Y").timestamp() + int(ssl_callback[0]))
-    sign_this = ENCRYPT_SALT + \
-        parse.urlparse(media_url).path + str(ssl_timestamp)
-    arg_h = base64.b64encode(hashlib.md5(bytes(sign_this, 'utf-8')).digest())
-    # Post-processing, may subject to change, so leaving this alone...
-    arg_h = arg_h.decode('utf-8').strip('=').replace('+',
-                                                     '-').replace('/', '_')
-    arg_r = ssl_timestamp
-    url_args = common_args.copy()
-    url_args.update({'h': arg_h, 'r': arg_r, 'p': ENCRYPT_MOD_VER})
-    final_url = '{}?{}'.format(
-        media_url, parse.urlencode(url_args))
-    logging.debug('Concat`ed URL: {}'.format(final_url))
-    return title, final_url
+        logging.debug('Crafted URL: {}'.format(final_url))
+        self.title = title
+        return final_url
 
 
 site_info = 'icourses.cn'
 download = icourses_download
-download_playlist = icourses_playlist_download
+# download_playlist = icourses_playlist_download

From ae4e533ec9d28fb1598fb91dfa87ce16cb06bc92 Mon Sep 17 00:00:00 2001
From: liushuyu <liushuyu011@gmail.com>
Date: Tue, 25 Oct 2016 14:03:21 -0600
Subject: [PATCH 12/29] common: add dynamic url support for `url_save_chunked`

---
 src/you_get/common.py | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/src/you_get/common.py b/src/you_get/common.py
index 948b0ca2..0f7fd0e3 100755
--- a/src/you_get/common.py
+++ b/src/you_get/common.py
@@ -547,7 +547,11 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False, h
         os.remove(filepath) # on Windows rename could fail if destination filepath exists
     os.rename(temp_filepath, filepath)
 
-def url_save_chunked(url, filepath, bar, refer = None, is_part = False, faker = False, headers = {}):
+def url_save_chunked(url, filepath, bar, dyn_callback=None, chunk_size=0, ignore_range=False, refer=None, is_part=False, faker=False, headers={}):
+    def dyn_update_url(received):
+        if callable(dyn_callback):
+            logging.debug('Calling callback %s for new URL from %s' % (dyn_callback.__name__, received))
+            return dyn_callback(received)
     if os.path.exists(filepath):
         if not force:
             if not is_part:
@@ -585,19 +589,26 @@ def url_save_chunked(url, filepath, bar, refer = None, is_part = False, faker =
     else:
         headers = {}
     if received:
-        headers['Range'] = 'bytes=' + str(received) + '-'
+        url = dyn_update_url(received)
+        if not ignore_range:
+            headers['Range'] = 'bytes=' + str(received) + '-'
     if refer:
         headers['Referer'] = refer
 
-    response = request.urlopen(request.Request(url, headers = headers), None)
+    response = request.urlopen(request.Request(url, headers=headers), None)
 
     with open(temp_filepath, open_mode) as output:
+        this_chunk = received
         while True:
             buffer = response.read(1024 * 256)
             if not buffer:
                 break
             output.write(buffer)
             received += len(buffer)
+            if chunk_size and (received - this_chunk) >= chunk_size:
+                url = dyn_callback(received)
+                this_chunk = received
+                response = request.urlopen(request.Request(url, headers=headers), None)
             if bar:
                 bar.update_received(len(buffer))
 
@@ -846,7 +857,7 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg
 
     print()
 
-def download_urls_chunked(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False, headers = {}):
+def download_urls_chunked(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False, headers = {}, **kwargs):
     assert urls
     if dry_run:
         print('Real URLs:\n%s\n' % urls)
@@ -860,7 +871,7 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir='.', refer=No
 
     filename = '%s.%s' % (title, ext)
     filepath = os.path.join(output_dir, filename)
-    if total_size and ext in ('ts'):
+    if total_size:
         if not force and os.path.exists(filepath[:-3] + '.mkv'):
             print('Skipping %s: file already exists' % filepath[:-3] + '.mkv')
             print()
@@ -875,7 +886,7 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir='.', refer=No
         print('Downloading %s ...' % tr(filename))
         filepath = os.path.join(output_dir, filename)
         parts.append(filepath)
-        url_save_chunked(url, filepath, bar, refer = refer, faker = faker, headers = headers)
+        url_save_chunked(url, filepath, bar, refer = refer, faker = faker, headers = headers, **kwargs)
         bar.done()
 
         if not merge:

From 2183448c9098c1abd0e9cf47fa305e3775e1e098 Mon Sep 17 00:00:00 2001
From: liushuyu <liushuyu011@gmail.com>
Date: Tue, 25 Oct 2016 14:15:23 -0600
Subject: [PATCH 13/29] icourses: implement fake `keep connection alive`

---
 src/you_get/extractors/icourses.py | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/src/you_get/extractors/icourses.py b/src/you_get/extractors/icourses.py
index 5c2f8cda..cb2ff74a 100644
--- a/src/you_get/extractors/icourses.py
+++ b/src/you_get/extractors/icourses.py
@@ -1,6 +1,8 @@
 #!/usr/bin/env python
 from ..common import *
 from urllib import parse
+import random
+from time import sleep
 import xml.etree.ElementTree as ET
 import datetime
 import hashlib
@@ -12,23 +14,24 @@ import re
 __all__ = ['icourses_download']
 
 
-def icourses_download(url, info_only, merge=False, output_dir='.', **kwargs):
+def icourses_download(url, merge=False, output_dir='.', **kwargs):
     icourses_parser = ICousesExactor(url=url)
     real_url = icourses_parser.icourses_cn_url_parser(**kwargs)
     title = icourses_parser.title
     if real_url is not None:
-        for tries in range(0, 3):
+        for tries in range(0, 5):
             try:
                 _, type_, size = url_info(real_url, faker=True)
                 break
             except error.HTTPError:
                 logging.warning('Failed to fetch the video file! Retrying...')
+                sleep(random.Random().randint(0, 5))  # Prevent from blockage
                 real_url = icourses_parser.icourses_cn_url_parser()
                 title = icourses_parser.title
         print_info(site_info, title, type_, size)
-        if not info_only:
-            download_urls([real_url], title, 'flv',
-                          total_size=size, output_dir=output_dir, refer=url, merge=merge, faker=True)
+        if not kwargs['info_only']:
+            download_urls_chunked([real_url], title, 'flv',
+                          total_size=size, output_dir=output_dir, refer=url, merge=merge, faker=True, ignore_range=True, chunk_size=15000000, dyn_callback=icourses_parser.icourses_cn_url_parser)
 
 
 # Why not using VideoExtractor: This site needs specical download method
@@ -40,9 +43,7 @@ class ICousesExactor(object):
         return
 
     def icourses_playlist_download(self, **kwargs):
-        import random
-        from time import sleep
-        html = get_content(url)
+        html = get_content(self.url)
         page_type_patt = r'showSectionNode\(this,(\d+),(\d+)\)'
         video_js_number = r'changeforvideo\((.*?)\)'
         fs_flag = r'<input type="hidden" value=(\w+) id="firstShowFlag">'
@@ -59,7 +60,7 @@ class ICousesExactor(object):
             sleep(random.Random().randint(0, 5))  # Prevent from blockage
             icourses_download(video_url, **kwargs)
 
-    def icourses_cn_url_parser(self, **kwargs):
+    def icourses_cn_url_parser(self, received=0, **kwargs):
         PLAYER_BASE_VER = '150606-1'
         ENCRYPT_MOD_VER = '151020'
         ENCRYPT_SALT = '3DAPmXsZ4o'  # It took really long time to find this...
@@ -93,9 +94,14 @@ class ICousesExactor(object):
         logging.debug('The result was {}'.format(xml_obj.get('status')))
         if xml_obj.get('status') != 'success':
             raise ValueError('Server returned error!')
-        common_args = {'lv': PLAYER_BASE_VER, 'ls': 'play',
+        if received:
+            play_type = 'seek'
+        else:
+            play_type = 'play'
+            received -= 1
+        common_args = {'lv': PLAYER_BASE_VER, 'ls': play_type,
                        'lt': datetime.datetime.now().strftime('%m-%d/%H:%M:%S'),
-                       'start': 0}
+                       'start': received + 1}
         media_host = xml_obj.find(".//*[@name='host']").text
         media_url = media_host + xml_obj.find(".//*[@name='url']").text
         # This is what they called `SSLModule`... But obviously, just a kind of

From ac33461c88344d86f74b69572f2f27d03fd708b5 Mon Sep 17 00:00:00 2001
From: Cheng Gu <guchengf@gmail.com>
Date: Thu, 27 Oct 2016 17:44:02 +0800
Subject: [PATCH 14/29] fix(huomao): adapt to new url format

---
 src/you_get/extractors/huomaotv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/you_get/extractors/huomaotv.py b/src/you_get/extractors/huomaotv.py
index 4852ff06..6e98c800 100644
--- a/src/you_get/extractors/huomaotv.py
+++ b/src/you_get/extractors/huomaotv.py
@@ -6,7 +6,7 @@ from ..common import *
 
 
 def get_mobile_room_url(room_id):
-    return 'http://www.huomao.com/mobile/mob_live?cid=%s' % room_id
+    return 'http://www.huomao.com/mobile/mob_live/%s' % room_id
 
 
 def get_m3u8_url(stream_id):

From 0f3fe97e9caedf976286193aff5dddf430d80962 Mon Sep 17 00:00:00 2001
From: Cheng Gu <guchengf@gmail.com>
Date: Thu, 27 Oct 2016 17:44:54 +0800
Subject: [PATCH 15/29] update: add huomao.com

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index b994ebd1..182fc12a 100644
--- a/README.md
+++ b/README.md
@@ -407,6 +407,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
 | 花瓣     | <http://huaban.com/>           | |✓| |
 | Naver<br/>네이버 | <http://tvcast.naver.com/>     |✓| | |
 | 芒果TV   | <http://www.mgtv.com/>         |✓| | |
+| 火猫TV   | <http://www.huomao.com/>         |✓| | |
 
 For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page.
 

From feffcb656ad2c33b17fb2e20598f8137fc69789c Mon Sep 17 00:00:00 2001
From: Mort Yao <soi@mort.ninja>
Date: Sun, 30 Oct 2016 00:24:31 +0200
Subject: [PATCH 16/29] [processor.ffmpeg] fix params in ffmpeg_download_stream

---
 src/you_get/processor/ffmpeg.py | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/src/you_get/processor/ffmpeg.py b/src/you_get/processor/ffmpeg.py
index 1c0ba1a3..c6da97f7 100644
--- a/src/you_get/processor/ffmpeg.py
+++ b/src/you_get/processor/ffmpeg.py
@@ -212,15 +212,6 @@ def ffmpeg_download_stream(files, title, ext, params={}, output_dir='.'):
     if not (output_dir == '.'):
         output = output_dir + '/' + output
 
-    ffmpeg_params = []
-    #should these exist...
-    if params is not None:
-        if len(params) > 0:
-            for k, v in params:
-                ffmpeg_params.append(k)
-                ffmpeg_params.append(v)
-
-
     print('Downloading streaming content with FFmpeg, press q to stop recording...')
     ffmpeg_params = [FFMPEG] + ['-y', '-re', '-i']
     ffmpeg_params.append(files)  #not the same here!!!!
@@ -230,6 +221,12 @@ def ffmpeg_download_stream(files, title, ext, params={}, output_dir='.'):
     else:
         ffmpeg_params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc']
 
+    if params is not None:
+        if len(params) > 0:
+            for k, v in params:
+                ffmpeg_params.append(k)
+                ffmpeg_params.append(v)
+
     ffmpeg_params.append(output)
 
     print(' '.join(ffmpeg_params))

From 4b55884e86df68c56ae9fce85293f9b757e97576 Mon Sep 17 00:00:00 2001
From: Mort Yao <soi@mort.ninja>
Date: Sun, 30 Oct 2016 00:26:25 +0200
Subject: [PATCH 17/29] [dailymotion] use ffmpeg_download_stream, fix #1466

---
 src/you_get/extractors/dailymotion.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/you_get/extractors/dailymotion.py b/src/you_get/extractors/dailymotion.py
index 8b701cd1..2e96c160 100644
--- a/src/you_get/extractors/dailymotion.py
+++ b/src/you_get/extractors/dailymotion.py
@@ -4,6 +4,11 @@ __all__ = ['dailymotion_download']
 
 from ..common import *
 
+def extract_m3u(url):
+    content = get_content(url)
+    m3u_url = re.findall(r'http://.*', content)[0]
+    return match1(m3u_url, r'([^#]+)')
+
 def dailymotion_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
     """Downloads Dailymotion videos by URL.
     """
@@ -13,7 +18,7 @@ def dailymotion_download(url, output_dir = '.', merge = True, info_only = False,
     title = match1(html, r'"video_title"\s*:\s*"([^"]+)"') or \
             match1(html, r'"title"\s*:\s*"([^"]+)"')
 
-    for quality in ['720','480','380','240','auto']:
+    for quality in ['1080','720','480','380','240','auto']:
         try:
             real_url = info[quality][0]["url"]
             if real_url:
@@ -21,11 +26,12 @@ def dailymotion_download(url, output_dir = '.', merge = True, info_only = False,
         except KeyError:
             pass
 
-    type, ext, size = url_info(real_url)
+    m3u_url = extract_m3u(real_url)
+    mime, ext, size = 'video/mp4', 'mp4', 0
 
-    print_info(site_info, title, type, size)
+    print_info(site_info, title, mime, size)
     if not info_only:
-        download_urls([real_url], title, ext, size, output_dir, merge = merge)
+        download_url_ffmpeg(m3u_url, title, ext, output_dir=output_dir, merge=merge)
 
 site_info = "Dailymotion.com"
 download = dailymotion_download

From a4f4fb362616862cc283b05122e74be346f1a309 Mon Sep 17 00:00:00 2001
From: Mort Yao <soi@mort.ninja>
Date: Sun, 30 Oct 2016 16:16:04 +0100
Subject: [PATCH 18/29] Revert "fix for #1405" (fix #1485)

This reverts commit 38ba0dbe48ecac4b7a354e4cf5766cf9415fb3c9.
---
 src/you_get/extractors/youku.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/you_get/extractors/youku.py b/src/you_get/extractors/youku.py
index 1fb09e8c..853a75ba 100644
--- a/src/you_get/extractors/youku.py
+++ b/src/you_get/extractors/youku.py
@@ -314,9 +314,6 @@ class Youku(VideoExtractor):
                                 q         = q
                             )
                         ksegs += [i['server'] for i in json.loads(get_content(u))]
-
-                        if (parse_host(ksegs[len(ksegs)-1])[0] == "vali.cp31.ott.cibntv.net"):
-                            ksegs.pop(len(ksegs)-1)
             except error.HTTPError as e:
                 # Use fallback stream data in case of HTTP 404
                 log.e('[Error] ' + str(e))

From e8514d1370bc748946940c7c2f757db5c9cf42c8 Mon Sep 17 00:00:00 2001
From: Mort Yao <soi@mort.ninja>
Date: Thu, 3 Nov 2016 01:44:04 +0100
Subject: [PATCH 19/29] version 0.4.575

---
 src/you_get/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/you_get/version.py b/src/you_get/version.py
index 6d91656c..6d4f6c4f 100644
--- a/src/you_get/version.py
+++ b/src/you_get/version.py
@@ -1,4 +1,4 @@
 #!/usr/bin/env python
 
 script_name = 'you-get'
-__version__ = '0.4.555'
+__version__ = '0.4.575'

From 391ca5643a355c310db786e467c6929fd5dde53f Mon Sep 17 00:00:00 2001
From: Zhiming Wang <zmwangx@gmail.com>
Date: Wed, 2 Nov 2016 20:44:40 -0400
Subject: [PATCH 20/29] [embed] correct tudou pattern

Hyphen-minus (-) is a valid character in Tudou's video ID. It's even
present in the second pattern of tudou_embed_patterns, just not the
first.
---
 src/you_get/extractors/embed.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/you_get/extractors/embed.py b/src/you_get/extractors/embed.py
index a177e663..fc4015c4 100644
--- a/src/you_get/extractors/embed.py
+++ b/src/you_get/extractors/embed.py
@@ -25,7 +25,7 @@ youku_embed_patterns = [ 'youku\.com/v_show/id_([a-zA-Z0-9=]+)',
 """
 http://www.tudou.com/programs/view/html5embed.action?type=0&amp;code=3LS_URGvl54&amp;lcode=&amp;resourceId=0_06_05_99
 """
-tudou_embed_patterns = [ 'tudou\.com[a-zA-Z0-9\/\?=\&\.\;]+code=([a-zA-Z0-9_]+)\&',
+tudou_embed_patterns = [ 'tudou\.com[a-zA-Z0-9\/\?=\&\.\;]+code=([a-zA-Z0-9_-]+)\&',
                          'www\.tudou\.com/v/([a-zA-Z0-9_-]+)/[^"]*v\.swf'
                        ]
 

From 2b0fe3443f844690305caa0a468d1b744c72ced5 Mon Sep 17 00:00:00 2001
From: Mort Yao <soi@mort.ninja>
Date: Thu, 3 Nov 2016 17:03:01 +0100
Subject: [PATCH 21/29] [test] remove test_vimeo

---
 tests/test.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/test.py b/tests/test.py
index 638206af..0fa2979a 100644
--- a/tests/test.py
+++ b/tests/test.py
@@ -21,9 +21,6 @@ class YouGetTests(unittest.TestCase):
     def test_mixcloud(self):
         mixcloud.download("http://www.mixcloud.com/DJVadim/north-america-are-you-ready/", info_only=True)
 
-    def test_vimeo(self):
-        vimeo.download("http://vimeo.com/56810854", info_only=True)
-
     def test_youtube(self):
         youtube.download("http://www.youtube.com/watch?v=pzKerr0JIPA", info_only=True)
         youtube.download("http://youtu.be/pzKerr0JIPA", info_only=True)

From bc590cbd62ca4350598551e41910c719864f0c36 Mon Sep 17 00:00:00 2001
From: Mort Yao <soi@mort.ninja>
Date: Thu, 3 Nov 2016 21:32:13 +0100
Subject: [PATCH 22/29] [douban] add support: movie.douban.com

---
 README.md                        |  4 ++--
 src/you_get/extractors/douban.py | 23 +++++++++++++++++------
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index 182fc12a..40a26803 100644
--- a/README.md
+++ b/README.md
@@ -128,7 +128,7 @@ $ you-get https://github.com/soimort/you-get/archive/master.zip
 or use [chocolatey package manager](https://chocolatey.org):
 
 ```
-> choco upgrade you-get 
+> choco upgrade you-get
 ```
 
 In order to get the latest ```develop``` branch without messing up the PIP, you can try:
@@ -373,7 +373,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
 | 爆米花网 | <http://www.baomihua.com/>     |✓| | |
 | **bilibili<br/>哔哩哔哩** | <http://www.bilibili.com/> |✓| | |
 | Dilidili | <http://www.dilidili.com/>     |✓| | |
-| 豆瓣     | <http://www.douban.com/>       | | |✓|
+| 豆瓣     | <http://www.douban.com/>       |✓| |✓|
 | 斗鱼     | <http://www.douyutv.com/>      |✓| | |
 | Panda<br/>熊猫 | <http://www.panda.tv/>      |✓| | |
 | 凤凰视频 | <http://v.ifeng.com/>          |✓| | |
diff --git a/src/you_get/extractors/douban.py b/src/you_get/extractors/douban.py
index 187e99c0..1a4a67d1 100644
--- a/src/you_get/extractors/douban.py
+++ b/src/you_get/extractors/douban.py
@@ -7,12 +7,23 @@ from ..common import *
 
 def douban_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
     html = get_html(url)
-    if 'subject' in url:
+
+    if re.match(r'https?://movie', url):
+        title = match1(html, 'name="description" content="([^"]+)')
+        tid = match1(url, 'trailer/(\d+)')
+        real_url = 'https://movie.douban.com/trailer/video_url?tid=%s' % tid
+        type, ext, size = url_info(real_url)
+
+        print_info(site_info, title, type, size)
+        if not info_only:
+            download_urls([real_url], title, ext, size, output_dir, merge = merge)
+
+    elif 'subject' in url:
         titles = re.findall(r'data-title="([^"]*)">', html)
         song_id = re.findall(r'<li class="song-item" id="([^"]*)"', html)
         song_ssid = re.findall(r'data-ssid="([^"]*)"', html)
         get_song_url = 'http://music.douban.com/j/songlist/get_song_url'
-        
+
         for i in range(len(titles)):
             title = titles[i]
             datas = {
@@ -35,16 +46,16 @@ def douban_download(url, output_dir = '.', merge = True, info_only = False, **kw
                 except:
                     pass
 
-    else: 
+    else:
         titles = re.findall(r'"name":"([^"]*)"', html)
         real_urls = [re.sub('\\\\/', '/', i) for i in re.findall(r'"rawUrl":"([^"]*)"', html)]
-        
+
         for i in range(len(titles)):
             title = titles[i]
             real_url = real_urls[i]
-            
+
             type, ext, size = url_info(real_url)
-            
+
             print_info(site_info, title, type, size)
             if not info_only:
                 download_urls([real_url], title, ext, size, output_dir, merge = merge)

From 5601e1fe30316d02ff6f51b4d77689d004ba2f13 Mon Sep 17 00:00:00 2001
From: Mort Yao <soi@mort.ninja>
Date: Thu, 3 Nov 2016 22:03:56 +0100
Subject: [PATCH 23/29] [bilibili] fix support for bangumi

---
 src/you_get/extractors/bilibili.py | 108 +++++++++++++++--------------
 1 file changed, 56 insertions(+), 52 deletions(-)

diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py
index c18290b8..122dea0b 100644
--- a/src/you_get/extractors/bilibili.py
+++ b/src/you_get/extractors/bilibili.py
@@ -119,66 +119,70 @@ def bilibili_live_download_by_cid(cid, title, output_dir='.', merge=True, info_o
 def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
     html = get_content(url)
 
-    if re.match(r'https?://bangumi\.bilibili\.com/', url):
-        # quick hack for bangumi URLs
-        url = r1(r'"([^"]+)" class="v-av-link"', html)
-        html = get_content(url)
-
     title = r1_of([r'<meta name="title" content="\s*([^<>]{1,999})\s*" />',
                    r'<h1[^>]*>\s*([^<>]+)\s*</h1>'], html)
     if title:
         title = unescape_html(title)
         title = escape_file_path(title)
 
-    flashvars = r1_of([r'(cid=\d+)', r'(cid: \d+)', r'flashvars="([^"]+)"',
-                       r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
-    assert flashvars
-    flashvars = flashvars.replace(': ', '=')
-    t, cid = flashvars.split('=', 1)
-    cid = cid.split('&')[0]
-    if t == 'cid':
-        if re.match(r'https?://live\.bilibili\.com/', url):
-            title = r1(r'<title>\s*([^<>]+)\s*</title>', html)
-            bilibili_live_download_by_cid(cid, title, output_dir=output_dir, merge=merge, info_only=info_only)
+    if re.match(r'https?://bangumi\.bilibili\.com/', url):
+        # quick hack for bangumi URLs
+        episode_id = r1(r'data-current-episode-id="(\d+)"', html)
+        cont = post_content('http://bangumi.bilibili.com/web_api/get_source',
+                            post_data={'episode_id': episode_id})
+        cid = json.loads(cont)['result']['cid']
+        bilibili_download_by_cid(str(cid), title, output_dir=output_dir, merge=merge, info_only=info_only)
 
-        else:
-            # multi-P
-            cids = []
-            pages = re.findall('<option value=\'([^\']*)\'', html)
-            titles = re.findall('<option value=.*>\s*([^<>]+)\s*</option>', html)
-            for i, page in enumerate(pages):
-                html = get_html("http://www.bilibili.com%s" % page)
-                flashvars = r1_of([r'(cid=\d+)',
-                                   r'flashvars="([^"]+)"',
-                                   r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
-                if flashvars:
-                    t, cid = flashvars.split('=', 1)
-                    cids.append(cid.split('&')[0])
-                if url.endswith(page):
-                    cids = [cid.split('&')[0]]
-                    titles = [titles[i]]
-                    break
-
-            # no multi-P
-            if not pages:
-                cids = [cid]
-                titles = [r1(r'<option value=.* selected>\s*([^<>]+)\s*</option>', html) or title]
-
-            for i in range(len(cids)):
-                bilibili_download_by_cid(cids[i],
-                                         titles[i],
-                                         output_dir=output_dir,
-                                         merge=merge,
-                                         info_only=info_only)
-
-    elif t == 'vid':
-        sina_download_by_vid(cid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
-    elif t == 'ykid':
-        youku_download_by_vid(cid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
-    elif t == 'uid':
-        tudou_download_by_id(cid, title, output_dir=output_dir, merge=merge, info_only=info_only)
     else:
-        raise NotImplementedError(flashvars)
+        flashvars = r1_of([r'(cid=\d+)', r'(cid: \d+)', r'flashvars="([^"]+)"',
+                           r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
+        assert flashvars
+        flashvars = flashvars.replace(': ', '=')
+        t, cid = flashvars.split('=', 1)
+        cid = cid.split('&')[0]
+        if t == 'cid':
+            if re.match(r'https?://live\.bilibili\.com/', url):
+                title = r1(r'<title>\s*([^<>]+)\s*</title>', html)
+                bilibili_live_download_by_cid(cid, title, output_dir=output_dir, merge=merge, info_only=info_only)
+
+            else:
+                # multi-P
+                cids = []
+                pages = re.findall('<option value=\'([^\']*)\'', html)
+                titles = re.findall('<option value=.*>\s*([^<>]+)\s*</option>', html)
+                for i, page in enumerate(pages):
+                    html = get_html("http://www.bilibili.com%s" % page)
+                    flashvars = r1_of([r'(cid=\d+)',
+                                       r'flashvars="([^"]+)"',
+                                       r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
+                    if flashvars:
+                        t, cid = flashvars.split('=', 1)
+                        cids.append(cid.split('&')[0])
+                    if url.endswith(page):
+                        cids = [cid.split('&')[0]]
+                        titles = [titles[i]]
+                        break
+
+                # no multi-P
+                if not pages:
+                    cids = [cid]
+                    titles = [r1(r'<option value=.* selected>\s*([^<>]+)\s*</option>', html) or title]
+
+                for i in range(len(cids)):
+                    bilibili_download_by_cid(cids[i],
+                                             titles[i],
+                                             output_dir=output_dir,
+                                             merge=merge,
+                                             info_only=info_only)
+
+        elif t == 'vid':
+            sina_download_by_vid(cid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
+        elif t == 'ykid':
+            youku_download_by_vid(cid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
+        elif t == 'uid':
+            tudou_download_by_id(cid, title, output_dir=output_dir, merge=merge, info_only=info_only)
+        else:
+            raise NotImplementedError(flashvars)
 
     if not info_only and not dry_run:
         if not kwargs['caption']:

From d04997ec9bc2ce68655334063e5cce840053a0b0 Mon Sep 17 00:00:00 2001
From: Rokic <Rokic.github@gmail.com>
Date: Tue, 8 Nov 2016 02:09:39 +0800
Subject: [PATCH 24/29] fix #1415

Songs from netease cloud music playlist will have a prefix indicates
their order in the list.
---
 src/you_get/extractors/netease.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/you_get/extractors/netease.py b/src/you_get/extractors/netease.py
index 63ee59b8..d5f3b1fa 100644
--- a/src/you_get/extractors/netease.py
+++ b/src/you_get/extractors/netease.py
@@ -54,13 +54,15 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals
                 os.mkdir(new_dir)
             cover_url = j['result']['coverImgUrl']
             download_urls([cover_url], "cover", "jpg", 0, new_dir)
-
-        for i in j['result']['tracks']:
-            netease_song_download(i, output_dir=new_dir, info_only=info_only)
+        
+        prefix_width = len(str(len(j['result']['tracks'])))
+        for n, i in enumerate(j['result']['tracks']):
+            playlist_prefix = '%%.%dd_' % prefix_width % n
+            netease_song_download(i, output_dir=new_dir, info_only=info_only, playlist_prefix=playlist_prefix)
             try: # download lyrics
                 assert kwargs['caption']
                 l = loads(get_content("http://music.163.com/api/song/lyric/?id=%s&lv=-1&csrf_token=" % i['id'], headers={"Referer": "http://music.163.com/"}))
-                netease_lyric_download(i, l["lrc"]["lyric"], output_dir=new_dir, info_only=info_only)
+                netease_lyric_download(i, l["lrc"]["lyric"], output_dir=new_dir, info_only=info_only, playlist_prefix=playlist_prefix)
             except: pass
 
     elif "song" in url:
@@ -85,10 +87,10 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals
         j = loads(get_content("http://music.163.com/api/mv/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"}))
         netease_video_download(j['data'], output_dir=output_dir, info_only=info_only)
 
-def netease_lyric_download(song, lyric, output_dir='.', info_only=False):
+def netease_lyric_download(song, lyric, output_dir='.', info_only=False, playlist_prefix=""):
     if info_only: return
 
-    title = "%s. %s" % (song['position'], song['name'])
+    title = "%s%s. %s" % (playlist_prefix, song['position'], song['name'])
     filename = '%s.lrc' % get_filename(title)
     print('Saving %s ...' % filename, end="", flush=True)
     with open(os.path.join(output_dir, filename),
@@ -103,8 +105,8 @@ def netease_video_download(vinfo, output_dir='.', info_only=False):
     netease_download_common(title, url_best,
                             output_dir=output_dir, info_only=info_only)
 
-def netease_song_download(song, output_dir='.', info_only=False):
-    title = "%s. %s" % (song['position'], song['name'])
+def netease_song_download(song, output_dir='.', info_only=False, playlist_prefix=""):
+    title = "%s%s. %s" % (playlist_prefix, song['position'], song['name'])
     songNet = 'p' + song['mp3Url'].split('/')[2][1:]
 
     if 'hMusic' in song and song['hMusic'] != None:

From 51dd7ad8e6b757687a4c06af7b6b3fb3dfa5f5b1 Mon Sep 17 00:00:00 2001
From: Mort Yao <soi@mort.ninja>
Date: Wed, 9 Nov 2016 17:13:02 +0100
Subject: [PATCH 25/29] [youtube] use url_encoded_fmt_stream_map from video
 page, fix #1502

---
 src/you_get/extractors/youtube.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py
index 33e3923e..64af5c14 100644
--- a/src/you_get/extractors/youtube.py
+++ b/src/you_get/extractors/youtube.py
@@ -155,6 +155,8 @@ class YouTube(VideoExtractor):
                 try:
                     ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+?});', video_page).group(1))
                     self.html5player = 'https:' + ytplayer_config['assets']['js']
+                    # Workaround: get_video_info returns bad s. Why?
+                    stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
                 except:
                     self.html5player = None
 

From 78ba20266c6b5e1fef1398af60ea8361bf57fff0 Mon Sep 17 00:00:00 2001
From: moyo <moyo@uuland.org>
Date: Sun, 13 Nov 2016 17:41:00 +0800
Subject: [PATCH 26/29] 1. Change container from FLV to TS 2. Fix video url
 matcher 3. Use m3u8 ext-info for fast size calculate 4. Use m3u8 url for
 video playing

---
 src/you_get/extractors/mgtv.py | 74 ++++++++++++++++++++++++++--------
 1 file changed, 57 insertions(+), 17 deletions(-)

diff --git a/src/you_get/extractors/mgtv.py b/src/you_get/extractors/mgtv.py
index aeb42490..3ce62efe 100644
--- a/src/you_get/extractors/mgtv.py
+++ b/src/you_get/extractors/mgtv.py
@@ -12,11 +12,11 @@ import re
 class MGTV(VideoExtractor):
     name = "芒果 (MGTV)"
 
-    # Last updated: 2015-11-24
+    # Last updated: 2016-11-13
     stream_types = [
-        {'id': 'hd', 'container': 'flv', 'video_profile': '超清'},
-        {'id': 'sd', 'container': 'flv', 'video_profile': '高清'},
-        {'id': 'ld', 'container': 'flv', 'video_profile': '标清'},
+        {'id': 'hd', 'container': 'ts', 'video_profile': '超清'},
+        {'id': 'sd', 'container': 'ts', 'video_profile': '高清'},
+        {'id': 'ld', 'container': 'ts', 'video_profile': '标清'},
     ]
     
     id_dic = {i['video_profile']:(i['id']) for i in stream_types}
@@ -27,7 +27,7 @@ class MGTV(VideoExtractor):
     def get_vid_from_url(url):
         """Extracts video ID from URL.
         """
-        return match1(url, 'http://www.mgtv.com/v/\d/\d+/\w+/(\d+).html')
+        return match1(url, 'http://www.mgtv.com/b/\d+/(\d+).html')
     
     #----------------------------------------------------------------------
     @staticmethod
@@ -44,10 +44,15 @@ class MGTV(VideoExtractor):
 
         content = get_content(content['info'])  #get the REAL M3U url, maybe to be changed later?
         segment_list = []
+        segments_size = 0
         for i in content.split():
             if not i.startswith('#'):  #not the best way, better we use the m3u8 package
                 segment_list.append(base_url + i)
-        return segment_list
+            # use ext-info for fast size calculate
+            elif i.startswith('#EXT-MGTV-File-SIZE:'):
+                segments_size += int(i[i.rfind(':')+1:])
+
+        return m3u_url, segments_size, segment_list
 
     def download_playlist_by_url(self, url, **kwargs):
         pass
@@ -69,28 +74,25 @@ class MGTV(VideoExtractor):
                 quality_id = self.id_dic[s['video_profile']]
                 url = stream_available[s['video_profile']]
                 url = re.sub( r'(\&arange\=\d+)', '', url)  #Un-Hum
-                segment_list_this = self.get_mgtv_real_url(url)
-                
-                container_this_stream = ''
-                size_this_stream = 0
+                m3u8_url, m3u8_size, segment_list_this = self.get_mgtv_real_url(url)
+
                 stream_fileid_list = []
                 for i in segment_list_this:
-                    _, container_this_stream, size_this_seg = url_info(i)
-                    size_this_stream += size_this_seg
                     stream_fileid_list.append(os.path.basename(i).split('.')[0])
-                    
+
             #make pieces
             pieces = []
             for i in zip(stream_fileid_list, segment_list_this):
                 pieces.append({'fileid': i[0], 'segs': i[1],})
 
                 self.streams[quality_id] = {
-                        'container': 'flv',
+                        'container': s['container'],
                         'video_profile': s['video_profile'],
-                        'size': size_this_stream,
-                        'pieces': pieces
+                        'size': m3u8_size,
+                        'pieces': pieces,
+                        'm3u8_url': m3u8_url
                     }
-                
+
             if not kwargs['info_only']:
                 self.streams[quality_id]['src'] = segment_list_this
 
@@ -107,6 +109,44 @@ class MGTV(VideoExtractor):
             # Extract stream with the best quality
             stream_id = self.streams_sorted[0]['id']
 
+    def download(self, **kwargs):
+
+        if 'stream_id' in kwargs and kwargs['stream_id']:
+            stream_id = kwargs['stream_id']
+        else:
+            stream_id = 'null'
+
+        # print video info only
+        if 'info_only' in kwargs and kwargs['info_only']:
+            if stream_id != 'null':
+                if 'index' not in kwargs:
+                    self.p(stream_id)
+                else:
+                    self.p_i(stream_id)
+            else:
+                # Display all available streams
+                if 'index' not in kwargs:
+                    self.p([])
+                else:
+                    stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag']
+                    self.p_i(stream_id)
+
+        # default to use the best quality
+        if stream_id == 'null':
+            stream_id = self.streams_sorted[0]['id']
+
+        stream_info = self.streams[stream_id]
+
+        if not kwargs['info_only']:
+            if player:
+                # with m3u8 format because some video player can process urls automatically (e.g. mpv)
+                launch_player(player, [stream_info['m3u8_url']])
+            else:
+                download_urls(stream_info['src'], self.title, stream_info['container'], stream_info['size'],
+                              output_dir=kwargs['output_dir'],
+                              merge=kwargs['merge'],
+                              av=stream_id in self.dash_streams)
+
 site = MGTV()
 download = site.download_by_url
 download_playlist = site.download_playlist_by_url
\ No newline at end of file

From 65713cae2cf1c122be72c2d6fdaf854b35260562 Mon Sep 17 00:00:00 2001
From: L <z2d@jifangcheng.com>
Date: Mon, 14 Nov 2016 21:49:13 +0800
Subject: [PATCH 27/29] update yixia_download url match rule

resolved #1346
---
 src/you_get/extractors/yixia.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/you_get/extractors/yixia.py b/src/you_get/extractors/yixia.py
index ca5c4bd6..7d5ba290 100644
--- a/src/you_get/extractors/yixia.py
+++ b/src/you_get/extractors/yixia.py
@@ -51,11 +51,11 @@ def yixia_download(url, output_dir = '.', merge = True, info_only = False, **kwa
         yixia_download_by_scid = yixia_miaopai_download_by_scid
         site_info = "Yixia Miaopai"
         
-        if re.match(r'http://www.miaopai.com/show/channel/\w+', url):  #PC
+        if re.match(r'http://www.miaopai.com/show/channel/.+', url):  #PC
             scid = match1(url, r'http://www.miaopai.com/show/channel/(.+)\.htm')
-        elif re.match(r'http://www.miaopai.com/show/\w+', url):  #PC
+        elif re.match(r'http://www.miaopai.com/show/.+', url):  #PC
             scid = match1(url, r'http://www.miaopai.com/show/(.+)\.htm')
-        elif re.match(r'http://m.miaopai.com/show/channel/\w+', url):  #Mobile
+        elif re.match(r'http://m.miaopai.com/show/channel/.+', url):  #Mobile
             scid = match1(url, r'http://m.miaopai.com/show/channel/(.+)\.htm')
     
     elif 'xiaokaxiu.com' in hostname:  #Xiaokaxiu

From a7635e96a5e20cc4025fbcb236254e7a69c6556c Mon Sep 17 00:00:00 2001
From: Zhang Cheng <stephenpcg@gmail.com>
Date: Thu, 17 Nov 2016 11:18:01 +0800
Subject: [PATCH 28/29] [mgtv] add bsf:a aac_adtstoasc to ffmpeg args, fix
 #1458.

---
 src/you_get/processor/ffmpeg.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/you_get/processor/ffmpeg.py b/src/you_get/processor/ffmpeg.py
index 1c0ba1a3..dcc8e1c8 100644
--- a/src/you_get/processor/ffmpeg.py
+++ b/src/you_get/processor/ffmpeg.py
@@ -125,7 +125,7 @@ def ffmpeg_concat_flv_to_mp4(files, output='output.mp4'):
 
         params = [FFMPEG] + LOGLEVEL + ['-f', 'concat', '-safe', '-1', '-y', '-i']
         params.append(output + '.txt')
-        params += ['-c', 'copy', output]
+        params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc', output]
 
         subprocess.check_call(params)
         os.remove(output + '.txt')

From 250672f42d475eba1b7a69b48683cf0d0576698a Mon Sep 17 00:00:00 2001
From: Mort Yao <soi@mort.ninja>
Date: Sat, 19 Nov 2016 20:47:18 +0100
Subject: [PATCH 29/29] version 0.4.595

---
 src/you_get/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/you_get/version.py b/src/you_get/version.py
index 6d4f6c4f..28919906 100644
--- a/src/you_get/version.py
+++ b/src/you_get/version.py
@@ -1,4 +1,4 @@
 #!/usr/bin/env python
 
 script_name = 'you-get'
-__version__ = '0.4.575'
+__version__ = '0.4.595'