Merge pull request #1 from soimort/master

Master
2025-02-11 20:52:31 +03:00 · 2017-03-09 21:03:21 +08:00 · 2017-03-09 21:03:21 +08:00 · cf4c39ee0a
commit cf4c39ee0a
parent 866876e59f 858435d503
22 changed files with 122 additions and 337 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -5,6 +5,7 @@ python:
  - "3.3"
  - "3.4"
  - "3.5"
  - "3.6"
  - "nightly"
  - "pypy3"
 script: make test
--- a/LICENSE.txt
+++ b/LICENSE.txt
@ -1,7 +1,7 @@
 ==============================================
 This is a copy of the MIT license.
 ==============================================
-Copyright (C) 2012, 2013, 2014, 2015, 2016 Mort Yao <mort.yao@gmail.com>
+Copyright (C) 2012-2017 Mort Yao <mort.yao@gmail.com>
 Copyright (C) 2012 Boyu Guo <iambus@gmail.com>
 Permission is hereby granted, free of charge, to any person obtaining a copy of
--- a/README.md
+++ b/README.md
@ -347,7 +347,6 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
 | Metacafe    | <http://www.metacafe.com/>    |✓| | |
 | Magisto     | <http://www.magisto.com/>     |✓| | |
 | Khan Academy | <https://www.khanacademy.org/> |✓| | |
 | JPopsuki TV | <http://www.jpopsuki.tv/>     |✓| | |
 | Internet Archive | <https://archive.org/>   |✓| | |
 | **Instagram** | <https://instagram.com/>    |✓|✓| |
 | InfoQ       | <http://www.infoq.com/presentations/> |✓| | |
@ -392,11 +391,8 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
 | 齐鲁网   | <http://v.iqilu.com/>          |✓| | |
 | QQ<br/>腾讯视频 | <http://v.qq.com/>      |✓| | |
 | 企鹅直播 | <http://live.qq.com/>          |✓| | |
 | 阡陌视频 | <http://qianmo.com/>           |✓| | |
 | THVideo  | <http://thvideo.tv/>           |✓| | |
 | Sina<br/>新浪视频<br/>微博秒拍视频 | <http://video.sina.com.cn/><br/><http://video.weibo.com/> |✓| | |
 | Sohu<br/>搜狐视频 | <http://tv.sohu.com/> |✓| | |
 | 天天动听 | <http://www.dongting.com/>     | | |✓|
 | **Tudou<br/>土豆** | <http://www.tudou.com/> |✓| | |
 | 虾米     | <http://www.xiami.com/>        | | |✓|
 | 阳光卫视 | <http://www.isuntv.com/>       |✓| | |
--- a/src/you_get/common.py
+++ b/src/you_get/common.py
@ -15,7 +15,6 @@ SITES = {
    'cbs'              : 'cbs',
    'dailymotion'      : 'dailymotion',
    'dilidili'         : 'dilidili',
    'dongting'         : 'dongting',
    'douban'           : 'douban',
    'douyu'            : 'douyutv',
    'ehow'             : 'ehow',
@ -40,7 +39,6 @@ SITES = {
    'iqiyi'            : 'iqiyi',
    'isuntv'           : 'suntv',
    'joy'              : 'joy',
    'jpopsuki'         : 'jpopsuki',
    'kankanews'        : 'bilibili',
    'khanacademy'      : 'khan',
    'ku6'              : 'ku6',
@ -63,7 +61,6 @@ SITES = {
    'pinterest'        : 'pinterest',
    'pixnet'           : 'pixnet',
    'pptv'             : 'pptv',
    'qianmo'           : 'qianmo',
    'qq'               : 'qq',
    'quanmin'          : 'quanmin',
    'showroom-live'    : 'showroom',
@ -73,7 +70,6 @@ SITES = {
    'soundcloud'       : 'soundcloud',
    'ted'              : 'ted',
    'theplatform'      : 'theplatform',
    'thvideo'          : 'thvideo',
    'tucao'            : 'tucao',
    'tudou'            : 'tudou',
    'tumblr'           : 'tumblr',
@ -131,7 +127,7 @@ fake_headers = {
    'Accept-Charset': 'UTF-8,*;q=0.5',
    'Accept-Encoding': 'gzip,deflate,sdch',
    'Accept-Language': 'en-US,en;q=0.8',
-    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:13.0) Gecko/20100101 Firefox/13.0'
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0'
 }
 if sys.stdout.isatty():
@ -298,6 +294,13 @@ def get_location(url):
    # not to do that
    return response.geturl()
 def urlopen_with_retry(*args, **kwargs):
    for i in range(10):
        try:
            return request.urlopen(*args, **kwargs)
        except socket.timeout:
            logging.debug('request attempt %s timeout' % str(i + 1))
 def get_content(url, headers={}, decoded=True):
    """Gets the content of a URL via sending a HTTP GET request.
@ -317,13 +320,7 @@ def get_content(url, headers={}, decoded=True):
        cookies.add_cookie_header(req)
        req.headers.update(req.unredirected_hdrs)
-    for i in range(10):
+    response = urlopen_with_retry(req)
        try:
            response = request.urlopen(req)
            break
        except socket.timeout:
            logging.debug('request attempt %s timeout' % str(i + 1))
    data = response.read()
    # Handle HTTP compression for gzip and deflate (zlib)
@ -362,7 +359,7 @@ def post_content(url, headers={}, post_data={}, decoded=True):
        cookies.add_cookie_header(req)
        req.headers.update(req.unredirected_hdrs)
    post_data_enc = bytes(parse.urlencode(post_data), 'utf-8')
-    response = request.urlopen(req, data = post_data_enc)
+    response = urlopen_with_retry(req, data=post_data_enc)
    data = response.read()
    # Handle HTTP compression for gzip and deflate (zlib)
@ -384,11 +381,11 @@ def post_content(url, headers={}, post_data={}, decoded=True):
 def url_size(url, faker = False, headers = {}):
    if faker:
-        response = request.urlopen(request.Request(url, headers = fake_headers), None)
+        response = urlopen_with_retry(request.Request(url, headers=fake_headers))
    elif headers:
-        response = request.urlopen(request.Request(url, headers = headers), None)
+        response = urlopen_with_retry(request.Request(url, headers=headers))
    else:
-        response = request.urlopen(url)
+        response = urlopen_with_retry(url)
    size = response.headers['content-length']
    return int(size) if size!=None else float('inf')
@ -398,20 +395,20 @@ def urls_size(urls, faker = False, headers = {}):
 def get_head(url, headers = {}, get_method = 'HEAD'):
    if headers:
-        req = request.Request(url, headers = headers)
+        req = request.Request(url, headers=headers)
    else:
        req = request.Request(url)
-    req.get_method = lambda : get_method
+    req.get_method = lambda: get_method
-    res = request.urlopen(req)
+    res = urlopen_with_retry(req)
    return dict(res.headers)
 def url_info(url, faker = False, headers = {}):
    if faker:
-        response = request.urlopen(request.Request(url, headers = fake_headers), None)
+        response = urlopen_with_retry(request.Request(url, headers=fake_headers))
    elif headers:
-        response = request.urlopen(request.Request(url, headers = headers), None)
+        response = urlopen_with_retry(request.Request(url, headers=headers))
    else:
-        response = request.urlopen(request.Request(url))
+        response = urlopen_with_retry(request.Request(url))
    headers = response.headers
@ -460,11 +457,11 @@ def url_locations(urls, faker = False, headers = {}):
    locations = []
    for url in urls:
        if faker:
-            response = request.urlopen(request.Request(url, headers = fake_headers), None)
+            response = urlopen_with_retry(request.Request(url, headers=fake_headers))
        elif headers:
-            response = request.urlopen(request.Request(url, headers = headers), None)
+            response = urlopen_with_retry(request.Request(url, headers=headers))
        else:
-            response = request.urlopen(request.Request(url))
+            response = urlopen_with_retry(request.Request(url))
        locations.append(response.url)
    return locations
@ -514,10 +511,10 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False, h
        if refer:
            headers['Referer'] = refer
-        response = request.urlopen(request.Request(url, headers = headers), None)
+        response = urlopen_with_retry(request.Request(url, headers=headers))
        try:
            range_start = int(response.headers['content-range'][6:].split('/')[0].split('-')[0])
-            end_length = end = int(response.headers['content-range'][6:].split('/')[1])
+            end_length = int(response.headers['content-range'][6:].split('/')[1])
            range_length = end_length - range_start
        except:
            content_length = response.headers['content-length']
@ -537,7 +534,7 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False, h
                        break
                    else: # Unexpected termination. Retry request
                        headers['Range'] = 'bytes=' + str(received) + '-'
-                        response = request.urlopen(request.Request(url, headers = headers), None)
+                        response = urlopen_with_retry(request.Request(url, headers=headers))
                output.write(buffer)
                received += len(buffer)
                if bar:
@ -597,7 +594,7 @@ def url_save_chunked(url, filepath, bar, dyn_callback=None, chunk_size=0, ignore
    if refer:
        headers['Referer'] = refer
-    response = request.urlopen(request.Request(url, headers=headers), None)
+    response = urlopen_with_retry(request.Request(url, headers=headers))
    with open(temp_filepath, open_mode) as output:
        this_chunk = received
@ -610,7 +607,7 @@ def url_save_chunked(url, filepath, bar, dyn_callback=None, chunk_size=0, ignore
            if chunk_size and (received - this_chunk) >= chunk_size:
                url = dyn_callback(received)
                this_chunk = received
-                response = request.urlopen(request.Request(url, headers=headers), None)
+                response = urlopen_with_retry(request.Request(url, headers=headers))
            if bar:
                bar.update_received(len(buffer))
--- a/src/you_get/extractors/init.py
+++ b/src/you_get/extractors/init.py
@ -33,7 +33,6 @@ from .interest import *
 from .iqilu import *
 from .iqiyi import *
 from .joy import *
 from .jpopsuki import *
 from .ku6 import *
 from .kugou import *
 from .kuwo import *
@ -55,7 +54,6 @@ from .panda import *
 from .pinterest import *
 from .pixnet import *
 from .pptv import *
 from .qianmo import *
 from .qie import *
 from .qq import *
 from .showroom import *
@ -64,7 +62,6 @@ from .sohu import *
 from .soundcloud import *
 from .suntv import *
 from .theplatform import *
 from .thvideo import *
 from .tucao import *
 from .tudou import *
 from .tumblr import *
--- a/src/you_get/extractors/acfun.py
+++ b/src/you_get/extractors/acfun.py
@ -77,6 +77,8 @@ def acfun_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
    title = unescape_html(title)
    title = escape_file_path(title)
    assert title
    if match1(url, r'_(\d+)$'): # current P
        title = title + " " + r1(r'active">([^<]*)', html)
    vid = r1('data-vid="(\d+)"', html)
    up = r1('data-name="([^"]+)"', html)
--- a/src/you_get/extractors/bilibili.py
+++ b/src/you_get/extractors/bilibili.py
@ -168,10 +168,14 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs
                if not pages:
                    cids = [cid]
                    titles = [r1(r'<option value=.* selected>\s*([^<>]+)\s*</option>', html) or title]
                for i in range(len(cids)):
                    completeTitle=None
                    if (title == titles[i]):
                        completeTitle=title
                    else:
                        completeTitle=title+"-"+titles[i]#Build Better Title
                    bilibili_download_by_cid(cids[i],
-                                             titles[i],
+                                             completeTitle,
                                             output_dir=output_dir,
                                             merge=merge,
                                             info_only=info_only)
--- a/src/you_get/extractors/dongting.py
+++ b/src/you_get/extractors/dongting.py
@ -1,55 +0,0 @@
 # -*- coding: utf-8 -*-
 __all__ = ['dongting_download']
 from ..common import *
 _unit_prefixes = 'bkmg'
 def parse_size(size):
    m = re.match(r'([\d.]+)(.(?:i?B)?)', size, re.I)
    if m:
        return int(float(m.group(1)) * 1024 **
                   _unit_prefixes.index(m.group(2).lower()))
    else:
        return 0
 def dongting_download_lyric(lrc_url, file_name, output_dir):
    j = get_html(lrc_url)
    info = json.loads(j)
    lrc = j['data']['lrc']
    filename = get_filename(file_name)
    with open(output_dir + "/" + filename + '.lrc', 'w', encoding='utf-8') as x:
        x.write(lrc)
 def dongting_download_song(sid, output_dir = '.', merge = True, info_only = False):
    j = get_html('http://ting.hotchanson.com/detail.do?neid=%s&size=0' % sid)
    info = json.loads(j)
    song_title = info['data']['songName']
    album_name = info['data']['albumName']
    artist = info['data']['singerName']
    ext = 'mp3'
    size = parse_size(info['data']['itemList'][-1]['size'])
    url = info['data']['itemList'][-1]['downUrl']
    print_info(site_info, song_title, ext, size)
    if not info_only:
        file_name = "%s - %s - %s" % (song_title, album_name, artist)
        download_urls([url], file_name, ext, size, output_dir, merge = merge)
        lrc_url = ('http://lp.music.ttpod.com/lrc/down?'
                   'lrcid=&artist=%s&title=%s') % (
                       parse.quote(artist), parse.quote(song_title))
        try:
            dongting_download_lyric(lrc_url, file_name, output_dir)
        except:
            pass
 def dongting_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False, **kwargs):
    if re.match('http://www.dongting.com/\?song_id=\d+', url):
        id = r1(r'http://www.dongting.com/\?song_id=(\d+)', url)
        dongting_download_song(id, output_dir, merge, info_only)
 site_info = "Dongting.com"
 download = dongting_download
 download_playlist = playlist_not_supported("dongting")
--- a/src/you_get/extractors/facebook.py
+++ b/src/you_get/extractors/facebook.py
@ -11,11 +11,11 @@ def facebook_download(url, output_dir='.', merge=True, info_only=False, **kwargs
    title = r1(r'<title id="pageTitle">(.+)</title>', html)
    sd_urls = list(set([
        unicodize(str.replace(i, '\\/', '/'))
-        for i in re.findall(r'"sd_src_no_ratelimit":"([^"]*)"', html)
+        for i in re.findall(r'sd_src_no_ratelimit:"([^"]*)"', html)
    ]))
    hd_urls = list(set([
        unicodize(str.replace(i, '\\/', '/'))
-        for i in re.findall(r'"hd_src_no_ratelimit":"([^"]*)"', html)
+        for i in re.findall(r'hd_src_no_ratelimit:"([^"]*)"', html)
    ]))
    urls = hd_urls if hd_urls else sd_urls
--- a/src/you_get/extractors/google.py
+++ b/src/you_get/extractors/google.py
@ -51,7 +51,7 @@ def google_download(url, output_dir = '.', merge = True, info_only = False, **kw
        # attempt to extract images first
        # TBD: posts with > 4 images
        # TBD: album links
-        html = get_html(parse.unquote(url))
+        html = get_html(parse.unquote(url), faker=True)
        real_urls = []
        for src in re.findall(r'src="([^"]+)"[^>]*itemprop="image"', html):
            t = src.split('/')
@ -65,8 +65,8 @@ def google_download(url, output_dir = '.', merge = True, info_only = False, **kw
        title = post_date + "_" + post_id
        try:
-            url = "https://plus.google.com/" + r1(r'"(photos/\d+/albums/\d+/\d+)', html)
+            url = "https://plus.google.com/" + r1(r'(photos/\d+/albums/\d+/\d+)\?authkey', html)
-            html = get_html(url)
+            html = get_html(url, faker=True)
            temp = re.findall(r'\[(\d+),\d+,\d+,"([^"]+)"\]', html)
            temp = sorted(temp, key = lambda x : fmt_level[x[0]])
            urls = [unicodize(i[1]) for i in temp if i[0] == temp[0][0]]
@ -77,7 +77,7 @@ def google_download(url, output_dir = '.', merge = True, info_only = False, **kw
            post_author = r1(r'/\+([^/]+)/posts', post_url)
            if post_author:
                post_url = "https://plus.google.com/+%s/posts/%s" % (parse.quote(post_author), r1(r'posts/(.+)', post_url))
-            post_html = get_html(post_url)
+            post_html = get_html(post_url, faker=True)
            title = r1(r'<title[^>]*>([^<\n]+)', post_html)
            if title is None:
@ -98,7 +98,7 @@ def google_download(url, output_dir = '.', merge = True, info_only = False, **kw
    elif service in ['docs', 'drive'] : # Google Docs
-        html = get_html(url)
+        html = get_html(url, faker=True)
        title = r1(r'"title":"([^"]*)"', html) or r1(r'<meta itemprop="name" content="([^"]*)"', html)
        if len(title.split('.')) > 1:
--- a/src/you_get/extractors/jpopsuki.py
+++ b/src/you_get/extractors/jpopsuki.py
@ -1,23 +0,0 @@
 #!/usr/bin/env python
 __all__ = ['jpopsuki_download']
 from ..common import *
 def jpopsuki_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
    html = get_html(url, faker=True)
    title = r1(r'<meta name="title" content="([^"]*)"', html)
    if title.endswith(' - JPopsuki TV'):
        title = title[:-14]
    url = "http://jpopsuki.tv%s" % r1(r'<source src="([^"]*)"', html)
    type, ext, size = url_info(url, faker=True)
    print_info(site_info, title, type, size)
    if not info_only:
        download_urls([url], title, ext, size, output_dir, merge=merge, faker=True)
 site_info = "JPopsuki.tv"
 download = jpopsuki_download
 download_playlist = playlist_not_supported('jpopsuki')
--- a/src/you_get/extractors/netease.py
+++ b/src/you_get/extractors/netease.py
@ -22,9 +22,9 @@ def netease_hymn():
    """
 def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
-    rid = match1(url, r'id=(.*)')
+    rid = match1(url, r'\Wid=(.*)')
    if rid is None:
-        rid = match1(url, r'/(\d+)/?$')
+        rid = match1(url, r'/(\d+)/?')
    if "album" in url:
        j = loads(get_content("http://music.163.com/api/album/%s?id=%s&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"}))
--- a/src/you_get/extractors/panda.py
+++ b/src/you_get/extractors/panda.py
@ -8,21 +8,27 @@ import time
 def panda_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
    roomid = url[url.rfind('/')+1:]
-    json_request_url = 'http://www.panda.tv/api_room?roomid={}&pub_key=&_={}'.format(roomid, int(time.time()))
+    json_request_url ="http://www.panda.tv/api_room_v2?roomid={}&__plat=pc_web&_={}".format(roomid, int(time.time()))
    content = get_html(json_request_url)
-    errno = json.loads(content)['errno']
+    api_json = json.loads(content)
-    errmsg = json.loads(content)['errmsg']
+    
    errno = api_json["errno"]
    errmsg = api_json["errmsg"]
    if errno:
        raise ValueError("Errno : {}, Errmsg : {}".format(errno, errmsg))
-
+    data = api_json["data"]
-    data = json.loads(content)['data']
+    title = data["roominfo"]["name"]
-    title = data.get('roominfo')['name']
+    room_key = data["videoinfo"]["room_key"]
-    room_key = data.get('videoinfo')['room_key']
+    plflag = data["videoinfo"]["plflag"].split("_")
-    plflag = data.get('videoinfo')['plflag'].split('_')
+    status = data["videoinfo"]["status"]
    status = data.get('videoinfo')['status']
    if status is not "2":
        raise ValueError("The live stream is not online! (status:%s)" % status)
-    real_url = 'http://pl{}.live.panda.tv/live_panda/{}.flv'.format(plflag[1],room_key)
+
    data2 = json.loads(data["videoinfo"]["plflag_list"])
    rid = data2["auth"]["rid"]
    sign = data2["auth"]["sign"]
    ts = data2["auth"]["time"]
    real_url = "http://pl{}.live.panda.tv/live_panda/{}.flv?sign={}&ts={}&rid={}".format(plflag[1], room_key, sign, ts, rid)
    print_info(site_info, title, 'flv', float('inf'))
    if not info_only:
--- a/src/you_get/extractors/qianmo.py
+++ b/src/you_get/extractors/qianmo.py
@ -1,40 +0,0 @@
 #!/usr/bin/env python
 __all__ = ['qianmo_download']
 from ..common import *
 import urllib.error
 import json
 def qianmo_download(url, output_dir = '.', merge = False, info_only = False, **kwargs):
    if re.match(r'http://qianmo.com/\w+', url):
        html = get_html(url)
        match = re.search(r'(.+?)var video =(.+?);', html)
        if match:
            video_info_json = json.loads(match.group(2))
            title = video_info_json['title']
            ext_video_id = video_info_json['ext_video_id']
        html = get_content('http://v.qianmo.com/player/{ext_video_id}'.format(ext_video_id = ext_video_id))
        c = json.loads(html)
        url_list = []
        for i in c['seg']:  #Cannot do list comprehensions
            for a in c['seg'][i]:
                for b in a['url']:
                    url_list.append(b[0])
        type_ = ''
        size = 0
        for url in url_list:
            _, type_, temp = url_info(url)
            size += temp
        type, ext, size = url_info(url)
        print_info(site_info, title, type_, size)
        if not info_only:
            download_urls(url_list, title, type_, total_size=None, output_dir=output_dir, merge=merge)
 site_info = "qianmo"
 download = qianmo_download
 download_playlist = playlist_not_supported('qianmo')
--- a/src/you_get/extractors/thvideo.py
+++ b/src/you_get/extractors/thvideo.py
@ -1,83 +0,0 @@
 #!/usr/bin/env python
 __all__ = ['thvideo_download']
 from ..common import *
 from xml.dom.minidom import parseString
 #----------------------------------------------------------------------
 def thvideo_cid_to_url(cid, p):
    """int,int->list
    From Biligrab."""
    interface_url = 'http://thvideo.tv/api/playurl.php?cid={cid}-{p}'.format(cid = cid, p = p)
    data = get_content(interface_url)
    rawurl = []
    dom = parseString(data)
    for node in dom.getElementsByTagName('durl'):
        url = node.getElementsByTagName('url')[0]
        rawurl.append(url.childNodes[0].data)
    return rawurl
 #----------------------------------------------------------------------
 def th_video_get_title(url, p):
    """"""
    if re.match(r'http://thvideo.tv/v/\w+', url):
        html = get_content(url)
        title = match1(html, r'<meta property="og:title" content="([^"]*)"').strip()
        video_list = match1(html, r'<li>cid=(.+)</li>').split('**')
        if int(p) > 0:  #not the 1st P or multi part
            title = title + ' - ' + [i.split('=')[-1:][0].split('|')[1] for i in video_list][p]
    return title
 #----------------------------------------------------------------------
 def thvideo_download(url, output_dir = '.', merge = False, info_only = False, **kwargs):
    if re.match(r'http://thvideo.tv/v/\w+', url):
        if 'p' in kwargs and kwargs['p']:
            p = kwargs['p']
        else:
            p = int(match1(url, r'http://thvideo.tv/v/th\d+#(\d+)'))
            p -= 1
            if not p or p < 0:
                p = 0
        if 'title' in kwargs and kwargs['title']:
            title = kwargs['title']
        else:
            title = th_video_get_title(url, p)
        cid = match1(url, r'http://thvideo.tv/v/th(\d+)')
        type_ = ''
        size = 0
        urls = thvideo_cid_to_url(cid, p)
        for url in urls:
            _, type_, temp = url_info(url)
            size += temp
        print_info(site_info, title, type_, size)
        if not info_only:
            download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge)
 #----------------------------------------------------------------------
 def thvideo_download_playlist(url, output_dir = '.', merge = False, info_only = False, **kwargs):
    """"""
    if re.match(r'http://thvideo.tv/v/\w+', url):
        html = get_content(url)
        video_list = match1(html, r'<li>cid=(.+)</li>').split('**')
        title_base = th_video_get_title(url, 0)
        for p, v in video_list:
            part_title = [i.split('=')[-1:][0].split('|')[1] for i in video_list][p]
            title = title_base + part_title
            thvideo_download(url, output_dir, merge, 
                            info_only, p = p, title = title)
 site_info = "THVideo"
 download = thvideo_download
 download_playlist = thvideo_download_playlist
--- a/src/you_get/extractors/xiami.py
+++ b/src/you_get/extractors/xiami.py
@ -153,7 +153,8 @@ def xiami_download(url, output_dir = '.', stream_type = None, merge = True, info
        xiami_download_showcollect(id, output_dir, merge, info_only)
    if re.match('http://www.xiami.com/song/\d+', url):
-        id = r1(r'http://www.xiami.com/song/(\d+)', url)
+        html = get_html(url, faker=True)
        id = r1(r'rel="canonical" href="http://www.xiami.com/song/([^"]+)"', html)
        xiami_download_song(id, output_dir, merge, info_only)
    if re.match('http://www.xiami.com/song/detail/id/\d+', url):
--- a/src/you_get/extractors/youku.py
+++ b/src/you_get/extractors/youku.py
@ -143,6 +143,9 @@ class Youku(VideoExtractor):
            })
        else:
            proxy_handler = request.ProxyHandler({})
        if not request._opener:
            opener = request.build_opener(proxy_handler)
            request.install_opener(opener)
        for handler in (ssl_context, cookie_handler, proxy_handler):
            request._opener.add_handler(handler)
        request._opener.addheaders = [('Cookie','__ysuid={}'.format(time.time()))]
--- a/src/you_get/extractors/youtube.py
+++ b/src/you_get/extractors/youtube.py
@ -52,7 +52,7 @@ class YouTube(VideoExtractor):
            return code
        js = js.replace('\n', ' ')
-        f1 = match1(js, r'\w+\.sig\|\|([$\w]+)\(\w+\.\w+\)')
+        f1 = match1(js, r'"signature",([\w]+)\(\w+\.\w+\)')
        f1def = match1(js, r'function %s(\(\w+\)\{[^\{]+\})' % re.escape(f1)) or \
                match1(js, r'\W%s=function(\(\w+\)\{[^\{]+\})' % re.escape(f1))
        f1def = re.sub(r'([$\w]+\.)([$\w]+\(\w+,\d+\))', r'\2', f1def)
@ -165,7 +165,7 @@ class YouTube(VideoExtractor):
                video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid)
                try:
                    ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+?});', video_page).group(1))
-                    self.html5player = 'https:' + ytplayer_config['assets']['js']
+                    self.html5player = 'https://www.youtube.com' + ytplayer_config['assets']['js']
                    # Workaround: get_video_info returns bad s. Why?
                    stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
                except:
@ -177,7 +177,7 @@ class YouTube(VideoExtractor):
                ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+?});', video_page).group(1))
                self.title = ytplayer_config['args']['title']
-                self.html5player = 'https:' + ytplayer_config['assets']['js']
+                self.html5player = 'https://www.youtube.com' + ytplayer_config['assets']['js']
                stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
        elif video_info['status'] == ['fail']:
@ -193,7 +193,7 @@ class YouTube(VideoExtractor):
                    # 150 Restricted from playback on certain sites
                    # Parse video page instead
                    self.title = ytplayer_config['args']['title']
-                    self.html5player = 'https:' + ytplayer_config['assets']['js']
+                    self.html5player = 'https://www.youtube.com' + ytplayer_config['assets']['js']
                    stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
                else:
                    log.wtf('[Error] The uploader has not made this video available in your country.')
--- a/src/you_get/extractors/zhanqi.py
+++ b/src/you_get/extractors/zhanqi.py
@ -3,73 +3,54 @@
 __all__ = ['zhanqi_download']
 from ..common import *
 import re
 import base64
 import json
 import time
 import hashlib
 def zhanqi_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
-    html = get_content(url)
+    host_name = url.split('/')[2]
-    video_type_patt = r'VideoType":"([^"]+)"'
+    first_folder_path = url.split('/')[3].split('?')[0]
    video_type = match1(html, video_type_patt)
-    #rtmp_base_patt = r'VideoUrl":"([^"]+)"'
+    if first_folder_path != 'videos': #url = "https://www.zhanqi.tv/huashan?param_s=1_0.2.0"
-    rtmp_id_patt = r'videoId":"([^"]+)"'
+        if first_folder_path == 'topic': #https://www.zhanqi.tv/topic/lyingman
-    vod_m3u8_id_patt = r'VideoID":"([^"]+)"'
+            first_folder_path = url.split('/')[4].split('?')[0]
-    title_patt = r'<p class="title-name" title="[^"]+">([^<]+)</p>'
+        api_url = "https://www.zhanqi.tv/api/static/v2.1/room/domain/" + first_folder_path + ".json"
-    title_patt_backup = r'<title>([^<]{1,9999})</title>'
+        api_json = json.loads(get_html(api_url))
-    title = match1(html, title_patt) or match1(html, title_patt_backup)
+        data = api_json['data']
-    title = unescape_html(title)
+        status = data['status']
-    rtmp_base = "http://wshdl.load.cdn.zhanqi.tv/zqlive"
+        if status != '4':
-    vod_base = "http://dlvod.cdn.zhanqi.tv"
+            raise ValueError ("The live stream is not online!")
-    rtmp_real_base = "rtmp://dlrtmp.cdn.zhanqi.tv/zqlive/"
+
-    room_info = "http://www.zhanqi.tv/api/static/live.roomid/"
+        nickname = data['nickname']
-    KEY_MASK = "#{&..?!("
+        title = nickname + ": " + data['title']
-    ak2_pattern = r'ak2":"\d-([^|]+)'
+
        roomid = data['id']
        videoId = data['videoId']
        jump_url = "http://wshdl.load.cdn.zhanqi.tv/zqlive/" + videoId + ".flv?get_url=1"
        jump_url = jump_url.strip('\r\n')
        real_url = get_html(jump_url)
        real_url = real_url.strip('\r\n')
        site_info = "www.zhanqi.tv"
    if video_type == "LIVE":
        rtmp_id = match1(html, rtmp_id_patt).replace('\\/','/')
        #request_url = rtmp_base+'/'+rtmp_id+'.flv?get_url=1'
        #real_url = get_html(request_url)
        html2 = get_content(room_info + rtmp_id.split("_")[0] + ".json")
        json_data = json.loads(html2)
        cdns = json_data["data"]["flashvars"]["cdns"]
        cdns = base64.b64decode(cdns).decode("utf-8")
        cdn = match1(cdns, ak2_pattern)
        cdn = base64.b64decode(cdn).decode("utf-8")
        key = ''
        i = 0
        while(i < len(cdn)):
            key = key + chr(ord(cdn[i]) ^ ord(KEY_MASK[i % 8]))
            i = i + 1
        time_hex = hex(int(time.time()))[2:]
        key = hashlib.md5(bytes(key + "/zqlive/" + rtmp_id + time_hex, "utf-8")).hexdigest()
        real_url = rtmp_real_base + '/' + rtmp_id + "?k=" + key + "&t=" + time_hex
        print_info(site_info, title, 'flv', float('inf'))
        if not info_only:
-            download_rtmp_url(real_url, title, 'flv', {}, output_dir, merge = merge)
+            download_url_ffmpeg(real_url, title, 'flv', {}, output_dir = output_dir, merge = merge)
            #download_urls([real_url], title, 'flv', None, output_dir, merge = merge)
    elif video_type == "VOD":
        vod_m3u8_request = vod_base + match1(html, vod_m3u8_id_patt).replace('\\/','/')
        vod_m3u8 = get_html(vod_m3u8_request)
        part_url = re.findall(r'(/[^#]+)\.ts',vod_m3u8)
        real_url = []
        for i in part_url:
            i = vod_base + i + ".ts"
            real_url.append(i)
        type_ = ''
        size = 0
        for url in real_url:
            _, type_, temp = url_info(url)
            size += temp or 0
-        print_info(site_info, title, type_ or 'ts', size)
+    else: #url = 'https://www.zhanqi.tv/videos/Lyingman/2017/01/182308.html'
        video_id = url.split('/')[-1].split('?')[0].split('.')[0]
        assert video_id
        api_url = "https://www.zhanqi.tv/api/static/v2.1/video/" + video_id + ".json"
        api_json = json.loads(get_html(api_url))
        data = api_json['data']
        title = data['title']
        video_url_id = data['flashvars']['VideoID']
        real_url = "http://dlvod.cdn.zhanqi.tv/" + video_url_id
        site_info = "www.zhanqi.tv/videos"
        print_info(site_info, title, 'flv', float('inf'))
        if not info_only:
-            download_urls(real_url, title, type_ or 'ts', size, output_dir, merge = merge)
+            download_url_ffmpeg(real_url, title, 'flv', {}, output_dir = output_dir, merge = merge)
    else:
        NotImplementedError('Unknown_video_type')
 site_info = "zhanqi.tv"
 download = zhanqi_download
 download_playlist = playlist_not_supported('zhanqi')
--- a/src/you_get/version.py
+++ b/src/you_get/version.py
@ -1,4 +1,4 @@
 #!/usr/bin/env python
 script_name = 'you-get'
-__version__ = '0.4.626'
+__version__ = '0.4.652'
--- a/tests/test.py
+++ b/tests/test.py
@ -8,9 +8,6 @@ from you_get.common import *
 class YouGetTests(unittest.TestCase):
    def test_freesound(self):
        freesound.download("http://www.freesound.org/people/Corsica_S/sounds/184419/", info_only=True)
    def test_imgur(self):
        imgur.download("http://imgur.com/WVLk5nD", info_only=True)
        imgur.download("http://imgur.com/gallery/WVLk5nD", info_only=True)
--- a/you-get.json
+++ b/you-get.json
@ -24,6 +24,7 @@
    "Programming Language :: Python :: 3.3",
    "Programming Language :: Python :: 3.4",
    "Programming Language :: Python :: 3.5",
    "Programming Language :: Python :: 3.6",
    "Topic :: Internet",
    "Topic :: Internet :: WWW/HTTP",
    "Topic :: Multimedia",