Merge pull request #5 from soimort/develop

1
2025-02-10 12:12:26 +03:00 · 2018-12-24 14:13:49 +08:00 · 2018-12-24 14:13:49 +08:00 · 894e17f108
commit 894e17f108
parent bda3b940f4 4e98f7bcae
30 changed files with 754 additions and 215 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -6,11 +6,13 @@ python:
  - "3.4"
  - "3.5"
  - "3.6"
+  - "3.7-dev"
  - "nightly"
  - "pypy3"
-before_install: pip install flake8
+before_install:
+  - if [[ $TRAVIS_PYTHON_VERSION != '3.2'* && $TRAVIS_PYTHON_VERSION != '3.3'* ]]; then pip install flake8; fi
 before_script:
-  - if [[ $TRAVIS_PYTHON_VERSION != '3.2'* ]]; then flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics; fi
+  - if [[ $TRAVIS_PYTHON_VERSION != '3.2'* && $TRAVIS_PYTHON_VERSION != '3.3'* ]]; then flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics; fi
 script: make test
 sudo: false
 notifications:
--- a/README.md
+++ b/README.md
@ -113,6 +113,14 @@ You can install `you-get` easily via:
 $ brew install you-get
 ```

+### Option 8: pkg (FreeBSD only)
+
+You can install `you-get` easily via:
+
+```
+# pkg install you-get
+```
+
 ### Shell completion

 Completion definitions for Bash, Fish and Zsh can be found in [`contrib/completion`](https://github.com/soimort/you-get/tree/develop/contrib/completion). Please consult your shell's manual for how to take advantage of them.
@ -416,7 +424,9 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
 | 西瓜视频 | <https://www.ixigua.com/>      |✓| | |
 | 快手 | <https://www.kuaishou.com/>      |✓|✓| |
 | 抖音 | <https://www.douyin.com/>      |✓| | |
+| TikTok | <https://www.tiktok.com/>      |✓| | |
 | 中国体育(TV) | <http://v.zhibo.tv/> </br><http://video.zhibo.tv/>    |✓| | |
+| 知乎 | <https://www.zhihu.com/>      |✓| | |

 For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page.

--- a/src/you_get/common.py
+++ b/src/you_get/common.py
@ -102,6 +102,7 @@ SITES = {
    'soundcloud'       : 'soundcloud',
    'ted'              : 'ted',
    'theplatform'      : 'theplatform',
+    'tiktok'           : 'tiktok',
    'tucao'            : 'tucao',
    'tudou'            : 'tudou',
    'tumblr'           : 'tumblr',
@ -127,6 +128,7 @@ SITES = {
    'youtube'          : 'youtube',
    'zhanqi'           : 'zhanqi',
    'zhibo'            : 'zhibo',
+    'zhihu'            : 'zhihu',
 }

 dry_run = False
@ -429,7 +431,7 @@ def get_content(url, headers={}, decoded=True):
    # Decode the response body
    if decoded:
        charset = match1(
-            response.getheader('Content-Type'), r'charset=([\w-]+)'
+            response.getheader('Content-Type', ''), r'charset=([\w-]+)'
        )
        if charset is not None:
            data = data.decode(charset)
@ -439,7 +441,7 @@ def get_content(url, headers={}, decoded=True):
    return data


-def post_content(url, headers={}, post_data={}, decoded=True):
+def post_content(url, headers={}, post_data={}, decoded=True, **kwargs):
    """Post the content of a URL via sending a HTTP POST request.

    Args:
@ -450,13 +452,18 @@ def post_content(url, headers={}, post_data={}, decoded=True):
    Returns:
        The content as a string.
    """
-
-    logging.debug('post_content: %s \n post_data: %s' % (url, post_data))
+    if kwargs.get('post_data_raw'):
+        logging.debug('post_content: %s\npost_data_raw: %s' % (url, kwargs['post_data_raw']))
+    else:
+        logging.debug('post_content: %s\npost_data: %s' % (url, post_data))

    req = request.Request(url, headers=headers)
    if cookies:
        cookies.add_cookie_header(req)
        req.headers.update(req.unredirected_hdrs)
+    if kwargs.get('post_data_raw'):
+        post_data_enc = bytes(kwargs['post_data_raw'], 'utf-8')
+    else:
        post_data_enc = bytes(parse.urlencode(post_data), 'utf-8')
    response = urlopen_with_retry(req, data=post_data_enc)
    data = response.read()
@ -602,7 +609,12 @@ def url_save(
    # the key must be 'Referer' for the hack here
    if refer is not None:
        tmp_headers['Referer'] = refer
+    if type(url) is list:
+        file_size = urls_size(url, faker=faker, headers=tmp_headers)
+        is_chunked, urls = True, url
+    else:
        file_size = url_size(url, faker=faker, headers=tmp_headers)
+        is_chunked, urls = False, [url]

    continue_renameing = True
    while continue_renameing:
@ -612,7 +624,7 @@ def url_save(
                if not is_part:
                    if bar:
                        bar.done()
-                    print(
+                    log.w(
                        'Skipping {}: file already exists'.format(
                            tr(os.path.basename(filepath))
                        )
@ -638,7 +650,10 @@ def url_save(
                        print('Changing name to %s' % tr(os.path.basename(filepath)), '...')
                        continue_renameing = True
                        continue
-                    print('Overwriting %s' % tr(os.path.basename(filepath)), '...')
+                    if log.yes_or_no('File with this name already exists. Overwrite?'):
+                        log.w('Overwriting %s ...' % tr(os.path.basename(filepath)))
+                    else:
+                        return
        elif not os.path.exists(os.path.dirname(filepath)):
            os.mkdir(os.path.dirname(filepath))

@ -655,6 +670,8 @@ def url_save(
    else:
        open_mode = 'wb'

+    for url in urls:
+        received_chunk = 0
        if received < file_size:
            if faker:
                tmp_headers = fake_headers
@ -665,7 +682,7 @@ def url_save(
            else:
                headers = {}
            '''
-        if received:
+            if received and not is_chunked:  # only request a range when not chunked
                tmp_headers['Range'] = 'bytes=' + str(received) + '-'
            if refer:
                tmp_headers['Referer'] = refer
@ -693,7 +710,9 @@ def url_save(
                range_length = int(content_length) if content_length is not None \
                    else float('inf')

-        if file_size != received + range_length:
+            if is_chunked:  # always append if chunked
+                open_mode = 'ab'
+            elif file_size != received + range_length:  # is it ever necessary?
                received = 0
                if bar:
                    bar.received = 0
@ -707,9 +726,12 @@ def url_save(
                    except socket.timeout:
                        pass
                    if not buffer:
-                    if received == file_size:  # Download finished
+                        if is_chunked and received_chunk == range_length:
+                            break
+                        elif not is_chunked and received == file_size:  # Download finished
                            break
                        # Unexpected termination. Retry request
+                        if not is_chunked:  # when
                            tmp_headers['Range'] = 'bytes=' + str(received) + '-'
                        response = urlopen_with_retry(
                            request.Request(url, headers=tmp_headers)
@ -717,6 +739,7 @@ def url_save(
                        continue
                    output.write(buffer)
                    received += len(buffer)
+                    received_chunk += len(buffer)
                    if bar:
                        bar.update_received(len(buffer))

@ -907,7 +930,7 @@ def download_urls(
    if total_size:
        if not force and os.path.exists(output_filepath) and not auto_rename\
                and os.path.getsize(output_filepath) >= total_size * 0.9:
-            print('Skipping %s: file already exists' % output_filepath)
+            log.w('Skipping %s: file already exists' % output_filepath)
            print()
            return
        bar = SimpleProgressBar(total_size, len(urls))
@ -1554,9 +1577,9 @@ def google_search(url):
    url = 'https://www.google.com/search?tbm=vid&q=%s' % parse.quote(keywords)
    page = get_content(url, headers=fake_headers)
    videos = re.findall(
-        r'<a href="(https?://[^"]+)" onmousedown="[^"]+">([^<]+)<', page
+        r'<a href="(https?://[^"]+)" onmousedown="[^"]+"><h3 class="[^"]*">([^<]+)<', page
    )
-    vdurs = re.findall(r'<span class="vdur _dwc">([^<]+)<', page)
+    vdurs = re.findall(r'<span class="vdur[^"]*">([^<]+)<', page)
    durs = [r1(r'(\d+:\d+)', unescape_html(dur)) for dur in vdurs]
    print('Google Videos search:')
    for v in zip(videos, durs):
--- a/src/you_get/extractor.py
+++ b/src/you_get/extractor.py
@ -211,7 +211,7 @@ class VideoExtractor():
                ext = self.dash_streams[stream_id]['container']
                total_size = self.dash_streams[stream_id]['size']

-            if ext == 'm3u8':
+            if ext == 'm3u8' or ext == 'm4a':
                ext = 'mp4'

            if not urls:
--- a/src/you_get/extractors/init.py
+++ b/src/you_get/extractors/init.py
@ -67,6 +67,7 @@ from .sohu import *
 from .soundcloud import *
 from .suntv import *
 from .theplatform import *
+from .tiktok import *
 from .tucao import *
 from .tudou import *
 from .tumblr import *
@ -89,3 +90,4 @@ from .khan import *
 from .zhanqi import *
 from .kuaishou import *
 from .zhibo import *
+from .zhihu import *
--- a/src/you_get/extractors/acfun.py
+++ b/src/you_get/extractors/acfun.py
@ -85,9 +85,13 @@ def acfun_download_by_vid(vid, title, output_dir='.', merge=True, info_only=Fals
            _, _, seg_size = url_info(url)
            size += seg_size
 #fallback to flvhd is not quite possible
-        print_info(site_info, title, 'mp4', size)
+        if re.search(r'fid=[0-9A-Z\-]*.flv', preferred[0][0]):
+            ext = 'flv'
+        else:
+            ext = 'mp4'
+        print_info(site_info, title, ext, size)
        if not info_only:
-            download_urls(preferred[0], title, 'mp4', size, output_dir=output_dir, merge=merge)
+            download_urls(preferred[0], title, ext, size, output_dir=output_dir, merge=merge)
    else:
        raise NotImplementedError(sourceType)

@ -105,27 +109,42 @@ def acfun_download_by_vid(vid, title, output_dir='.', merge=True, info_only=Fals
            pass

 def acfun_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
-    assert re.match(r'http://[^\.]*\.*acfun\.[^\.]+/\D/\D\D(\d+)', url)
-    html = get_content(url)
+    assert re.match(r'http://[^\.]*\.*acfun\.[^\.]+/(\D|bangumi)/\D\D(\d+)', url)

+    if re.match(r'http://[^\.]*\.*acfun\.[^\.]+/\D/\D\D(\d+)', url):
+        html = get_content(url)
        title = r1(r'data-title="([^"]+)"', html)
-    title = unescape_html(title)
-    title = escape_file_path(title)
-    assert title
        if match1(url, r'_(\d+)$'):  # current P
            title = title + " " + r1(r'active">([^<]*)', html)
-
        vid = r1('data-vid="(\d+)"', html)
        up = r1('data-name="([^"]+)"', html)
+    # bangumi
+    elif re.match("http://[^\.]*\.*acfun\.[^\.]+/bangumi/ab(\d+)", url):
+        html = get_content(url)
+        title = match1(html, r'"newTitle"\s*:\s*"([^"]+)"')
+        if match1(url, r'_(\d+)$'):  # current P
+            title = title + " " + r1(r'active">([^<]*)', html)
+        vid = match1(html, r'videoId="(\d+)"')
+        up = "acfun"
+    else:
+        raise NotImplemented
+
+    assert title and vid
+    title = unescape_html(title)
+    title = escape_file_path(title)
    p_title = r1('active">([^<]+)', html)
    title = '%s (%s)' % (title, up)
-    if p_title: title = '%s - %s' % (title, p_title)
+    if p_title:
+        title = '%s - %s' % (title, p_title)
+
+
    acfun_download_by_vid(vid, title,
                          output_dir=output_dir,
                          merge=merge,
                          info_only=info_only,
                          **kwargs)

+
 site_info = "AcFun.tv"
 download = acfun_download
 download_playlist = playlist_not_supported('acfun')
--- a/src/you_get/extractors/baidu.py
+++ b/src/you_get/extractors/baidu.py
@ -129,8 +129,9 @@ def baidu_download(url, output_dir='.', stream_type=None, merge=True, info_only=
            html = get_html(url)
            title = r1(r'title:"([^"]+)"', html)

-            vhsrc = re.findall(r'"BDE_Image"[^>]+src="([^"]+)"', html) or re.findall(r'vhsrc="([^"]+)"', html)
-            if vhsrc is not None:
+            vhsrc = re.findall(r'"BDE_Image"[^>]+src="([^"]+\.mp4)"', html) or \
+                re.findall(r'vhsrc="([^"]+)"', html)
+            if len(vhsrc) > 0:
                ext = 'mp4'
                size = url_size(vhsrc[0])
                print_info(site_info, title, ext, size)
--- a/src/you_get/extractors/bilibili.py
+++ b/src/you_get/extractors/bilibili.py
@ -22,7 +22,7 @@ from .youku import youku_download_by_vid

 class Bilibili(VideoExtractor):
    name = 'Bilibili'
-    live_api = 'http://live.bilibili.com/api/playurl?cid={}&otype=json'
+    live_api = 'https://api.live.bilibili.com/room/v1/Room/playUrl?cid={}&quality=0&platform=web'
    api_url = 'http://interface.bilibili.com/v2/playurl?'
    bangumi_api_url = 'http://bangumi.bilibili.com/player/web_api/playurl?'
    live_room_init_api_url = 'https://api.live.bilibili.com/room/v1/Room/room_init?id={}'
@ -115,7 +115,7 @@ class Bilibili(VideoExtractor):
            self.url = 'http://www.bilibili.com/video/av{}/'.format(aid)

        self.ua = fake_headers['User-Agent']
-        self.url = url_locations([self.url])[0]
+        self.url = url_locations([self.url], faker=True)[0]
        frag = urllib.parse.urlparse(self.url).fragment
        # http://www.bilibili.com/video/av3141144/index_2.html#page=3
        if frag:
@ -125,26 +125,27 @@ class Bilibili(VideoExtractor):
                aid = re.search(r'av(\d+)', self.url).group(1)
                self.url = 'http://www.bilibili.com/video/av{}/index_{}.html'.format(aid, page)
        self.referer = self.url
-        self.page = get_content(self.url)
+        self.page = get_content(self.url, headers=fake_headers)

        m = re.search(r'<h1.*?>(.*?)</h1>', self.page) or re.search(r'<h1 title="([^"]+)">', self.page)
        if m is not None:
            self.title = m.group(1)
-            s = re.search(r'<span>([^<]+)</span>', m.group(1))
+            s = re.search(r'<span.*?>([^<]+)</span>', m.group(1))
            if s:
                self.title = unescape_html(s.group(1))
        if self.title is None:
            m = re.search(r'property="og:title" content="([^"]+)"', self.page)
            if m is not None:
                self.title = m.group(1)
-
        if 'subtitle' in kwargs:
            subtitle = kwargs['subtitle']
            self.title = '{} {}'.format(self.title, subtitle)
        else:
            playinfo = re.search(r'__INITIAL_STATE__=(.*?);\(function\(\)', self.page)
            if playinfo is not None:
-                pages = json.loads(playinfo.group(1))['videoData']['pages']
+                jsonPlayinfo = json.loads(playinfo.group(1))
+                if 'videoData' in jsonPlayinfo:
+                    pages = jsonPlayinfo['videoData']['pages']
                    if len(pages) > 1:
                        qs = dict(parse.parse_qsl(urllib.parse.urlparse(self.url).query))
                        page = pages[int(qs.get('p', 1)) - 1]
@ -160,6 +161,8 @@ class Bilibili(VideoExtractor):
            self.live_entry(**kwargs)
        elif 'vc.bilibili.com' in self.url:
            self.vc_entry(**kwargs)
+        elif 'audio/au' in self.url:
+            self.audio_entry(**kwargs)
        else:
            self.entry(**kwargs)

@ -171,6 +174,30 @@ class Bilibili(VideoExtractor):
        self.title = page_list[0]['pagename']
        self.download_by_vid(page_list[0]['cid'], True, bangumi_movie=True, **kwargs)

+    def audio_entry(self, **kwargs):
+        assert re.match(r'https?://www.bilibili.com/audio/au\d+', self.url)
+        patt = r"(\d+)"
+        audio_id = re.search(patt, self.url).group(1)
+        audio_info_url = \
+            'https://www.bilibili.com/audio/music-service-c/web/song/info?sid={}'.format(audio_id)
+        audio_info_response = json.loads(get_content(audio_info_url))
+        if audio_info_response['msg'] != 'success':
+            log.wtf('fetch audio information failed!')
+            sys.exit(2)
+        self.title = audio_info_response['data']['title']
+        # TODO:there is no quality option for now
+        audio_download_url = \
+            'https://www.bilibili.com/audio/music-service-c/web/url?sid={}&privilege=2&quality=2'.format(audio_id)
+        audio_download_response = json.loads(get_content(audio_download_url))
+        if audio_download_response['msg'] != 'success':
+            log.wtf('fetch audio resource failed!')
+            sys.exit(2)
+        self.streams['mp4'] = {}
+        self.streams['mp4']['src'] = [audio_download_response['data']['cdns'][0]]
+        self.streams['mp4']['container'] = 'm4a'
+        self.streams['mp4']['size'] = audio_download_response['data']['size']
+
+
    def entry(self, **kwargs):
        # tencent player
        tc_flashvars = re.search(r'"bili-cid=\d+&bili-aid=\d+&vid=([^"]+)"', self.page)
@ -190,7 +217,12 @@ class Bilibili(VideoExtractor):
            index_id = int(re.search(r'index_(\d+)', self.url).group(1))
            cid = page_list[index_id-1]['cid'] # change cid match rule
        except:
-            cid = re.search(r'"cid":(\d+)', self.page).group(1)
+            page = re.search(r'p=(\d+)', self.url)
+            if page is None:
+                p = 1
+            else:
+                p = int(page.group(1))
+            cid = re.search(r'"cid":(\d+),"page":%s' % p, self.page).group(1)
        if cid is not None:
            self.download_by_vid(cid, re.search('bangumi', self.url) is not None, **kwargs)
        else:
@ -226,7 +258,7 @@ class Bilibili(VideoExtractor):

        api_url = self.live_api.format(self.room_id)
        json_data = json.loads(get_content(api_url))
-        urls = [json_data['durl'][0]['url']]
+        urls = [json_data['data']['durl'][0]['url']]

        self.streams['live'] = {}
        self.streams['live']['src'] = urls
@ -252,28 +284,9 @@ class Bilibili(VideoExtractor):
        self.streams['vc']['size'] = int(item['video_size'])

    def bangumi_entry(self, **kwargs):
-        bangumi_id = re.search(r'(\d+)', self.url).group(1)
-        frag = urllib.parse.urlparse(self.url).fragment
-        if frag:
-            episode_id = frag
-        else:
-            episode_id = re.search(r'first_ep_id\s*=\s*"(\d+)"', self.page) or re.search(r'\/ep(\d+)', self.url).group(1)
-        # cont = post_content('http://bangumi.bilibili.com/web_api/get_source', post_data=dict(episode_id=episode_id))
-        # cid = json.loads(cont)['result']['cid']
-        cont = get_content('http://bangumi.bilibili.com/web_api/episode/{}.json'.format(episode_id))
-        ep_info = json.loads(cont)['result']['currentEpisode']
-
-        bangumi_data = get_bangumi_info(str(ep_info['seasonId']))
-        bangumi_payment = bangumi_data.get('payment')
-        if bangumi_payment and bangumi_payment['price'] != '0':
-            log.w("It's a paid item")
-        # ep_ids = collect_bangumi_epids(bangumi_data)
-
-        index_title = ep_info['indexTitle']
-        long_title = ep_info['longTitle'].strip()
-        cid = ep_info['danmaku']
-
-        self.title = '{} [{} {}]'.format(self.title, index_title, long_title)
+        data = json.loads(re.search(r'__INITIAL_STATE__=(.+);\(function', self.page).group(1))
+        cid = data['epInfo']['cid']
+        # index_title = data['epInfo']['index_title']
        self.download_by_vid(cid, bangumi=True, **kwargs)


@ -376,10 +389,82 @@ def download_video_from_favlist(url, **kwargs):

    else:
        log.wtf("Fail to parse the fav title" + url, "")
+def download_music_from_favlist(url, page, **kwargs):
+    m = re.search(r'https?://www.bilibili.com/audio/mycollection/(\d+)', url)
+    if m is not None:
+        sid = m.group(1)
+        json_result = json.loads(get_content("https://www.bilibili.com/audio/music-service-c/web/song/of-coll?"
+                                             "sid={}&pn={}&ps=100".format(sid, page)))
+        if json_result['msg'] == 'success':
+            music_list = json_result['data']['data']
+            music_count = len(music_list)
+            for i in range(music_count):
+                audio_id = music_list[i]['id']
+                audio_title = music_list[i]['title']
+                audio_url = "https://www.bilibili.com/audio/au{}".format(audio_id)
+                print("Start downloading music ", audio_title)
+                Bilibili().download_by_url(audio_url, **kwargs)
+            if page < json_result['data']['pageCount']:
+                page += 1
+                download_music_from_favlist(url, page, **kwargs)
+        else:
+            log.wtf("Fail to get music list of page " + json_result)
+            sys.exit(2)
+    else:
+        log.wtf("Fail to parse the sid from " + url, "")

+def download_video_from_totallist(url, page, **kwargs):
+    # the url has format: https://space.bilibili.com/64169458/#/video
+    m = re.search(r'space\.bilibili\.com/(\d+)/.*?video', url)
+    mid = ""
+    if m is not None:
+        mid = m.group(1)
+        jsonresult = json.loads(get_content("https://space.bilibili.com/ajax/member/getSubmitVideos?mid={}&pagesize=100&tid=0&page={}&keyword=&order=pubdate&jsonp=jsonp".format(mid, page)))
+        if jsonresult['status']:
+            videos = jsonresult['data']['vlist']
+            videocount = len(videos)
+            for i in range(videocount):
+                videoid = videos[i]["aid"]
+                videotitle = videos[i]["title"]
+                videourl = "https://www.bilibili.com/video/av{}".format(videoid)
+                print("Start downloading ", videotitle, " video ", videotitle)
+                Bilibili().download_by_url(videourl, subtitle=videotitle, **kwargs)
+            if page < jsonresult['data']['pages']:
+                page += 1
+                download_video_from_totallist(url, page, **kwargs)
+        else:
+            log.wtf("Fail to get the files of page " + jsonresult)
+            sys.exit(2)
+
+    else:
+        log.wtf("Fail to parse the video title" + url, "")
+
+def download_music_from_totallist(url, page, **kwargs):
+    m = re.search(r'https?://www.bilibili.com/audio/am(\d+)\?type=\d', url)
+    if m is not None:
+        sid = m.group(1)
+        json_result = json.loads(get_content("https://www.bilibili.com/audio/music-service-c/web/song/of-menu?"
+                                             "sid={}&pn={}&ps=100".format(sid, page)))
+        if json_result['msg'] == 'success':
+            music_list = json_result['data']['data']
+            music_count = len(music_list)
+            for i in range(music_count):
+                audio_id = music_list[i]['id']
+                audio_title = music_list[i]['title']
+                audio_url = "https://www.bilibili.com/audio/au{}".format(audio_id)
+                print("Start downloading music ",audio_title)
+                Bilibili().download_by_url(audio_url, **kwargs)
+            if page < json_result['data']['pageCount']:
+                page += 1
+                download_music_from_totallist(url, page, **kwargs)
+        else:
+            log.wtf("Fail to get music list of page " + json_result)
+            sys.exit(2)
+    else:
+        log.wtf("Fail to parse the sid from " + url, "")

 def bilibili_download_playlist_by_url(url, **kwargs):
-    url = url_locations([url])[0]
+    url = url_locations([url], faker=True)[0]
    kwargs['playlist'] = True
    # a bangumi here? possible?
    if 'live.bilibili' in url:
@ -396,6 +481,12 @@ def bilibili_download_playlist_by_url(url, **kwargs):
    elif 'favlist' in url:
        # this a fav list folder
        download_video_from_favlist(url, **kwargs)
+    elif re.match(r'https?://space.bilibili.com/\d+/#/video', url):
+        download_video_from_totallist(url, 1, **kwargs)
+    elif re.match(r'https://www.bilibili.com/audio/mycollection/\d+', url):
+        download_music_from_favlist(url, 1, **kwargs)
+    elif re.match(r'https?://www.bilibili.com/audio/am\d+\?type=\d', url):
+        download_music_from_totallist(url, 1, **kwargs)
    else:
        aid = re.search(r'av(\d+)', url).group(1)
        page_list = json.loads(get_content('http://www.bilibili.com/widget/getPageList?aid={}'.format(aid)))
--- a/src/you_get/extractors/instagram.py
+++ b/src/you_get/extractors/instagram.py
@ -29,9 +29,9 @@ def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwarg
                image_url = edge['node']['display_url']
                if 'video_url' in edge['node']:
                    image_url = edge['node']['video_url']
-                image_url = image_url.split('?')[0]
-                ext = image_url.split('.')[-1]
+                ext = image_url.split('?')[0].split('.')[-1]
                size = int(get_head(image_url)['Content-Length'])
+
                print_info(site_info, title, ext, size)
                if not info_only:
                    download_urls(urls=[image_url],
@ -44,9 +44,9 @@ def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwarg
            image_url = info['entry_data']['PostPage'][0]['graphql']['shortcode_media']['display_url']
            if 'video_url' in info['entry_data']['PostPage'][0]['graphql']['shortcode_media']:
                image_url =info['entry_data']['PostPage'][0]['graphql']['shortcode_media']['video_url']
-            image_url = image_url.split('?')[0]
-            ext = image_url.split('.')[-1]
+            ext = image_url.split('?')[0].split('.')[-1]
            size = int(get_head(image_url)['Content-Length'])
+
            print_info(site_info, title, ext, size)
            if not info_only:
                download_urls(urls=[image_url],
--- a/src/you_get/extractors/iwara.py
+++ b/src/you_get/extractors/iwara.py
@ -17,20 +17,20 @@ headers = {

 def iwara_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
    global headers
-    video_hash=match1(url, r'http://\w+.iwara.tv/videos/(\w+)')
-    video_url=match1(url, r'(http://\w+.iwara.tv)/videos/\w+')
-    html = get_content(url,headers=headers)
+    video_hash = match1(url, r'https?://\w+.iwara.tv/videos/(\w+)')
+    video_url = match1(url, r'(https?://\w+.iwara.tv)/videos/\w+')
+    html = get_content(url, headers=headers)
    title = r1(r'<title>(.*)</title>', html)
-    api_url=video_url+'/api/video/'+video_hash
-    content=get_content(api_url,headers=headers)
-    data=json.loads(content)
-    type,ext,size=url_info(data[0]['uri'], headers=headers)
-    down_urls=data[0]['uri']
-    print_info(down_urls,title+data[0]['resolution'],type,size)
+    api_url = video_url + '/api/video/' + video_hash
+    content = get_content(api_url, headers=headers)
+    data = json.loads(content)
+    down_urls = 'https:' + data[0]['uri']
+    type, ext, size = url_info(down_urls, headers=headers)
+    print_info(site_info, title+data[0]['resolution'], type, size)

    if not info_only:
-        download_urls([down_urls], title, ext, size, output_dir, merge = merge,headers=headers)
+        download_urls([down_urls], title, ext, size, output_dir, merge=merge, headers=headers)

-site_info = "iwara"
+site_info = "Iwara"
 download = iwara_download
 download_playlist = playlist_not_supported('iwara')
--- a/src/you_get/extractors/ixigua.py
+++ b/src/you_get/extractors/ixigua.py
@ -1,14 +1,132 @@
 #!/usr/bin/env python
-__all__ = ['ixigua_download']
+import base64

-from .toutiao import download as toutiao_download
-from .toutiao import download_playlist as toutiao_download_playlist
+import binascii
+
+from ..common import *
+import random
+import ctypes
+from json import loads
+
+__all__ = ['ixigua_download', 'ixigua_download_playlist_by_url']
+
+headers = {
+    "user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 "
+                  "Safari/537.36",
+}
+
+
+def int_overflow(val):
+    maxint = 2147483647
+    if not -maxint - 1 <= val <= maxint:
+        val = (val + (maxint + 1)) % (2 * (maxint + 1)) - maxint - 1
+    return val
+
+
+def unsigned_right_shitf(n, i):
+    if n < 0:
+        n = ctypes.c_uint32(n).value
+    if i < 0:
+        return -int_overflow(n << abs(i))
+    return int_overflow(n >> i)
+
+
+def get_video_url_from_video_id(video_id):
+    """Splicing URLs according to video ID to get video details"""
+    # from js
+    data = [""] * 256
+    for index, _ in enumerate(data):
+        t = index
+        for i in range(8):
+            t = -306674912 ^ unsigned_right_shitf(t, 1) if 1 & t else unsigned_right_shitf(t, 1)
+        data[index] = t
+
+    def tmp():
+        rand_num = random.random()
+        path = "/video/urls/v/1/toutiao/mp4/{video_id}?r={random_num}".format(video_id=video_id,
+                                                                              random_num=str(rand_num)[2:])
+        e = o = r = -1
+        i, a = 0, len(path)
+        while i < a:
+            e = ord(path[i])
+            i += 1
+            if e < 128:
+                r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ e)]
+            else:
+                if e < 2048:
+                    r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (192 | e >> 6 & 31))]
+                    r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | 63 & e))]
+                else:
+                    if 55296 <= e < 57344:
+                        e = (1023 & e) + 64
+                        i += 1
+                        o = 1023 & t.url(i)
+                        r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (240 | e >> 8 & 7))]
+                        r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | e >> 2 & 63))]
+                        r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | o >> 6 & 15 | (3 & e) << 4))]
+                        r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | 63 & o))]
+                    else:
+                        r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (224 | e >> 12 & 15))]
+                        r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | e >> 6 & 63))]
+                        r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | 63 & e))]
+
+        return "https://ib.365yg.com{path}&s={param}".format(path=path, param=unsigned_right_shitf(r ^ -1, 0))
+
+    while 1:
+        url = tmp()
+        if url.split("=")[-1][0] != "-":  # 参数s不能为负数
+            return url


 def ixigua_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
-    return toutiao_download(url.replace('ixigua', '365yg'))
+    # example url: https://www.ixigua.com/i6631065141750268420/#mid=63024814422
+    html = get_html(url, faker=True)
+    video_id = match1(html, r"videoId\s*:\s*'([^']+)'")
+    title = match1(html, r"title: '(\S+)',")
+    if not video_id:
+        log.e("video_id not found, url:{}".format(url))
+        return
+    video_info_url = get_video_url_from_video_id(video_id)
+    video_info = loads(get_content(video_info_url))
+    if video_info.get("code", 1) != 0:
+        log.e("Get video info from {} error: server return code {}".format(video_info_url, video_info.get("code", 1)))
+        return
+    if not video_info.get("data", None):
+        log.e("Get video info from {} error: The server returns JSON value"
+              " without data or data is empty".format(video_info_url))
+        return
+    if not video_info["data"].get("video_list", None):
+        log.e("Get video info from {} error: The server returns JSON value"
+              " without data.video_list or data.video_list is empty".format(video_info_url))
+        return
+    if not video_info["data"]["video_list"].get("video_1", None):
+        log.e("Get video info from {} error: The server returns JSON value"
+              " without data.video_list.video_1 or data.video_list.video_1 is empty".format(video_info_url))
+        return
+    size = int(video_info["data"]["video_list"]["video_1"]["size"])
+    print_info(site_info=site_info, title=title, type="mp4", size=size)  # 该网站只有mp4类型文件
+    if not info_only:
+        video_url = base64.b64decode(video_info["data"]["video_list"]["video_1"]["main_url"].encode("utf-8"))
+        download_urls([video_url.decode("utf-8")], title, "mp4", size, output_dir, merge=merge, headers=headers, **kwargs)
+
+
+def ixigua_download_playlist_by_url(url, output_dir='.', merge=True, info_only=False, **kwargs):
+    assert "user" in url, "Only support users to publish video list,Please provide a similar url:" \
+                          "https://www.ixigua.com/c/user/6907091136/"
+
+    user_id = url.split("/")[-2] if url[-1] == "/" else url.split("/")[-1]
+    params = {"max_behot_time": "0", "max_repin_time": "0", "count": "20", "page_type": "0", "user_id": user_id}
+    while 1:
+        url = "https://www.ixigua.com/c/user/article/?" + "&".join(["{}={}".format(k, v) for k, v in params.items()])
+        video_list = loads(get_content(url, headers=headers))
+        params["max_behot_time"] = video_list["next"]["max_behot_time"]
+        for video in video_list["data"]:
+            ixigua_download("https://www.ixigua.com/i{}/".format(video["item_id"]), output_dir, merge, info_only,
+                            **kwargs)
+        if video_list["next"]["max_behot_time"] == 0:
+            break


 site_info = "ixigua.com"
 download = ixigua_download
-download_playlist = toutiao_download_playlist
+download_playlist = ixigua_download_playlist_by_url
--- a/src/you_get/extractors/lizhi.py
+++ b/src/you_get/extractors/lizhi.py
@ -2,8 +2,17 @@

 __all__ = ['lizhi_download']
 import json
+import datetime
 from ..common import *

+#
+# Worked well but not perfect.
+# TODO: add option --format={sd|hd}
+#
+def get_url(ep):
+    readable = datetime.datetime.fromtimestamp(int(ep['create_time']) / 1000).strftime('%Y/%m/%d')
+    return 'http://cdn5.lizhi.fm/audio/{}/{}_hd.mp3'.format(readable, ep['id'])
+
 # radio_id: e.g. 549759 from http://www.lizhi.fm/549759/
 #
 # Returns a list of tuples (audio_id, title, url) for each episode
@ -23,7 +32,7 @@ def lizhi_extract_playlist_info(radio_id):
    # (au_cnt), then handle pagination properly.
    api_url = 'http://www.lizhi.fm/api/radio_audios?s=0&l=65535&band=%s' % radio_id
    api_response = json.loads(get_content(api_url))
-    return [(ep['id'], ep['name'], ep['url']) for ep in api_response]
+    return [(ep['id'], ep['name'], get_url(ep)) for ep in api_response]

 def lizhi_download_audio(audio_id, title, url, output_dir='.', info_only=False):
    filetype, ext, size = url_info(url)
--- a/src/you_get/extractors/miaopai.py
+++ b/src/you_get/extractors/miaopai.py
@ -2,9 +2,12 @@

 __all__ = ['miaopai_download']

+import string
+import random
 from ..common import *
 import urllib.error
 import urllib.parse
+from ..util import fs

 fake_headers_mobile = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
@ -20,6 +23,10 @@ def miaopai_download_by_fid(fid, output_dir = '.', merge = False, info_only = Fa

    mobile_page = get_content(page_url, headers=fake_headers_mobile)
    url = match1(mobile_page, r'<video id=.*?src=[\'"](.*?)[\'"]\W')
+    if url is None:
+        wb_mp = re.search(r'<script src=([\'"])(.+?wb_mp\.js)\1>', mobile_page).group(2)
+        return miaopai_download_by_wbmp(wb_mp, fid, output_dir=output_dir, merge=merge,
+                                        info_only=info_only, total_size=None, **kwargs)
    title = match1(mobile_page, r'<title>((.|\n)+?)</title>')
    if not title:
        title = fid
@ -29,14 +36,62 @@ def miaopai_download_by_fid(fid, output_dir = '.', merge = False, info_only = Fa
    if not info_only:
        download_urls([url], title, ext, total_size=None, output_dir=output_dir, merge=merge)

-#----------------------------------------------------------------------
+
+def miaopai_download_by_wbmp(wbmp_url, fid, info_only=False, **kwargs):
+    headers = {}
+    headers.update(fake_headers_mobile)
+    headers['Host'] = 'imgaliyuncdn.miaopai.com'
+    wbmp = get_content(wbmp_url, headers=headers)
+    appid = re.search(r'appid:\s*?([^,]+?),', wbmp).group(1)
+    jsonp = re.search(r'jsonp:\s*?([\'"])(\w+?)\1', wbmp).group(2)
+    population = [i for i in string.ascii_lowercase] + [i for i in string.digits]
+    info_url = '{}?{}'.format('http://p.weibo.com/aj_media/info', parse.urlencode({
+        'appid': appid.strip(),
+        'fid': fid,
+        jsonp.strip(): '_jsonp' + ''.join(random.sample(population, 11))
+    }))
+    headers['Host'] = 'p.weibo.com'
+    jsonp_text = get_content(info_url, headers=headers)
+    jsonp_dict = json.loads(match1(jsonp_text, r'\(({.+})\)'))
+    if jsonp_dict['code'] != 200:
+        log.wtf('[Failed] "%s"' % jsonp_dict['msg'])
+    video_url = jsonp_dict['data']['meta_data'][0]['play_urls']['l']
+    title = jsonp_dict['data']['description']
+    title = title.replace('\n', '_')
+    ext = 'mp4'
+    headers['Host'] = 'f.us.sinaimg.cn'
+    print_info(site_info, title, ext, url_info(video_url, headers=headers)[2])
+    if not info_only:
+        download_urls([video_url], fs.legitimize(title), ext, headers=headers, **kwargs)
+
+
+def miaopai_download_direct(url, info_only, **kwargs):
+    mobile_page = get_content(url, headers=fake_headers_mobile)
+    try:
+        title = re.search(r'([\'"])title\1:\s*([\'"])(.+?)\2,', mobile_page).group(3)
+    except:
+        title = re.search(r'([\'"])status_title\1:\s*([\'"])(.+?)\2,', mobile_page).group(3)
+    title = title.replace('\n', '_')
+    stream_url = re.search(r'([\'"])stream_url\1:\s*([\'"])(.+?)\2,', mobile_page).group(3)
+    ext = 'mp4'
+    print_info(site_info, title, ext, url_info(stream_url, headers=fake_headers_mobile)[2])
+    if not info_only:
+        download_urls([stream_url], fs.legitimize(title), ext, total_size=None, headers=fake_headers_mobile, **kwargs)
+
+
+# ----------------------------------------------------------------------
 def miaopai_download(url, output_dir = '.', merge = False, info_only = False, **kwargs):
+    if match1(url, r'weibo\.com/tv/v/(\w+)'):
+        return miaopai_download_direct(url, info_only=info_only, output_dir=output_dir, merge=merge, **kwargs)
+
    fid = match1(url, r'\?fid=(\d{4}:\w+)')
    if fid is not None:
        miaopai_download_by_fid(fid, output_dir, merge, info_only)
    elif '/p/230444' in url:
        fid = match1(url, r'/p/230444(\w+)')
        miaopai_download_by_fid('1034:'+fid, output_dir, merge, info_only)
+    elif re.match(r'^http[s]://weibo\.com/\d+/.+', url):
+        miaopai_download_direct(url, info_only=info_only, output_dir=output_dir, merge=merge, **kwargs)
    else:
        mobile_page = get_content(url, headers = fake_headers_mobile)
        hit = re.search(r'"page_url"\s*:\s*"([^"]+)"', mobile_page)
@ -46,6 +101,7 @@ def miaopai_download(url, output_dir = '.', merge = False, info_only = False, **
            escaped_url = hit.group(1)
            miaopai_download(urllib.parse.unquote(escaped_url), output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)

+
 site_info = "miaopai"
 download = miaopai_download
 download_playlist = playlist_not_supported('miaopai')
--- a/src/you_get/extractors/naver.py
+++ b/src/you_get/extractors/naver.py
@ -7,17 +7,24 @@ import re

 from ..util import log
 from ..common import get_content, download_urls, print_info, playlist_not_supported, url_size
+from .universal import *

 __all__ = ['naver_download_by_url']


-def naver_download_by_url(url, info_only=False, **kwargs):
+def naver_download_by_url(url, output_dir='.', merge=True, info_only=False, **kwargs):
    ep = 'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{}?key={}'
    page = get_content(url)
-    og_video_url = re.search(r"<meta\s+property=\"og:video:url\"\s+content='(.+?)'>", page).group(1)
+    try:
+        temp = re.search(r"<meta\s+property=\"og:video:url\"\s+content='(.+?)'>", page)
+        if temp is not None:
+            og_video_url = temp.group(1)
            params_dict = urllib.parse.parse_qs(urllib.parse.urlparse(og_video_url).query)
            vid = params_dict['vid'][0]
            key = params_dict['outKey'][0]
+        else:
+            vid = re.search(r"\"videoId\"\s*:\s*\"(.+?)\"", page).group(1)
+            key = re.search(r"\"inKey\"\s*:\s*\"(.+?)\"", page).group(1)
        meta_str = get_content(ep.format(vid, key))
        meta_json = json.loads(meta_str)
        if 'errorCode' in meta_json:
@ -32,6 +39,8 @@ def naver_download_by_url(url, info_only=False, **kwargs):
        print_info(site_info, title, 'mp4', size)
        if not info_only:
            download_urls([video_url], title, 'mp4', size, **kwargs)
+    except:
+        universal_download(url, output_dir, merge=merge, info_only=info_only, **kwargs)

 site_info = "naver.com"
 download = naver_download_by_url
--- a/src/you_get/extractors/pptv.py
+++ b/src/you_get/extractors/pptv.py
@ -192,14 +192,14 @@ class PPTV(VideoExtractor):
        if self.url and not self.vid:
            if not re.match(r'http://v.pptv.com/show/(\w+)\.html', self.url):
                raise('Unknown url pattern')
-            page_content = get_content(self.url)
+            page_content = get_content(self.url,{"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"})
            self.vid = match1(page_content, r'webcfg\s*=\s*{"id":\s*(\d+)')

        if not self.vid:
            raise('Cannot find id')
        api_url = 'http://web-play.pptv.com/webplay3-0-{}.xml'.format(self.vid)
        api_url += '?appplt=flp&appid=pptv.flashplayer.vod&appver=3.4.2.28&type=&version=4'
-        dom = parseString(get_content(api_url))
+        dom = parseString(get_content(api_url,{"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"}))
        self.title, m_items, m_streams, m_segs = parse_pptv_xml(dom)
        xml_streams = merge_meta(m_items, m_streams, m_segs)
        for stream_id in xml_streams:
--- a/src/you_get/extractors/sohu.py
+++ b/src/you_get/extractors/sohu.py
@ -15,9 +15,9 @@ Changelog:
        new api
 '''

-def real_url(host,vid,tvid,new,clipURL,ck):
-    url = 'http://'+host+'/?prot=9&prod=flash&pt=1&file='+clipURL+'&new='+new +'&key='+ ck+'&vid='+str(vid)+'&uid='+str(int(time.time()*1000))+'&t='+str(random())+'&rb=1'
-    return json.loads(get_html(url))['url']
+def real_url(fileName, key, ch):
+    url = "https://data.vod.itc.cn/ip?new=" + fileName + "&num=1&key=" + key + "&ch=" + ch + "&pt=1&pg=2&prod=h5n"
+    return json.loads(get_html(url))['servers'][0]['url']

 def sohu_download(url, output_dir = '.', merge = True, info_only = False, extractor_proxy=None, **kwargs):
    if re.match(r'http://share.vrs.sohu.com', url):
@ -51,9 +51,8 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac
        title = data['tvName']
        size = sum(data['clipsBytes'])
        assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su'])
-        for new,clip,ck, in zip(data['su'], data['clipsURL'], data['ck']):
-            clipURL = urlparse(clip).path
-            urls.append(real_url(host,hqvid,tvid,new,clipURL,ck))
+        for fileName, key in zip(data['su'], data['ck']):
+            urls.append(real_url(fileName, key, data['ch']))
        # assert data['clipsURL'][0].endswith('.mp4')

    else:
@ -66,9 +65,8 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac
        title = data['tvName']
        size = sum(map(int,data['clipsBytes']))
        assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su'])
-        for new,clip,ck, in zip(data['su'], data['clipsURL'], data['ck']):
-            clipURL = urlparse(clip).path
-            urls.append(real_url(host,vid,tvid,new,clipURL,ck))
+        for fileName, key in zip(data['su'], data['ck']):
+            urls.append(real_url(fileName, key, data['ch']))

    print_info(site_info, title, 'mp4', size)
    if not info_only:
--- a/src/you_get/extractors/tiktok.py
+++ b/src/you_get/extractors/tiktok.py
@ -0,0 +1,23 @@
+#!/usr/bin/env python
+
+__all__ = ['tiktok_download']
+
+from ..common import *
+
+def tiktok_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
+    html = get_html(url)
+    title = r1(r'<title>(.*?)</title>', html)
+    video_id = r1(r'/video/(\d+)', url) or r1(r'musical\?id=(\d+)', html)
+    title = '%s [%s]' % (title, video_id)
+    dataText = r1(r'var data = \[(.*)\] ', html) or r1(r'var data = (\{.*\})', html)
+    data = json.loads(dataText)
+    source = 'http:' + data['video']['play_addr']['url_list'][0]
+    mime, ext, size = url_info(source)
+
+    print_info(site_info, title, mime, size)
+    if not info_only:
+        download_urls([source], title, ext, size, output_dir, merge=merge)
+
+site_info = "TikTok.com"
+download = tiktok_download
+download_playlist = playlist_not_supported('tiktok')
--- a/src/you_get/extractors/tumblr.py
+++ b/src/you_get/extractors/tumblr.py
@ -13,7 +13,29 @@ def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
        universal_download(url, output_dir, merge=merge, info_only=info_only)
        return

-    html = parse.unquote(get_html(url)).replace('\/', '/')
+    import ssl
+    ssl_context = request.HTTPSHandler(context=ssl.SSLContext(ssl.PROTOCOL_TLSv1))
+    cookie_handler = request.HTTPCookieProcessor()
+    opener = request.build_opener(ssl_context, cookie_handler)
+    request.install_opener(opener)
+
+    page = get_html(url)
+    form_key = match1(page, r'id="tumblr_form_key" content="([^"]+)"')
+    if form_key is not None:
+        # bypass GDPR consent page
+        referer = 'https://www.tumblr.com/privacy/consent?redirect=%s' % parse.quote_plus(url)
+        post_content('https://www.tumblr.com/svc/privacy/consent',
+                     headers={
+                         'Content-Type': 'application/json',
+                         'User-Agent': fake_headers['User-Agent'],
+                         'Referer': referer,
+                         'X-tumblr-form-key': form_key,
+                         'X-Requested-With': 'XMLHttpRequest'
+                     },
+                     post_data_raw='{"eu_resident":true,"gdpr_is_acceptable_age":true,"gdpr_consent_core":true,"gdpr_consent_first_party_ads":true,"gdpr_consent_third_party_ads":true,"gdpr_consent_search_history":true,"redirect_to":"%s","gdpr_reconsent":false}' % url)
+        page = get_html(url, faker=True)
+
+    html = parse.unquote(page).replace('\/', '/')
    feed = r1(r'<meta property="og:type" content="tumblr-feed:(\w+)" />', html)

    if feed in ['photo', 'photoset', 'entry'] or feed is None:
@ -21,23 +43,24 @@ def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
        page_title = r1(r'<meta name="description" content="([^"\n]+)', html) or \
                     r1(r'<meta property="og:description" content="([^"\n]+)', html) or \
                     r1(r'<title>([^<\n]*)', html)
-        urls = re.findall(r'(https?://[^;"&]+/tumblr_[^;"]+_\d+\.jpg)', html) +\
-               re.findall(r'(https?://[^;"&]+/tumblr_[^;"]+_\d+\.png)', html) +\
-               re.findall(r'(https?://[^;"&]+/tumblr_[^";]+_\d+\.gif)', html)
+        urls = re.findall(r'(https?://[^;"&]+/tumblr_[^;"&]+_\d+\.jpg)', html) +\
+               re.findall(r'(https?://[^;"&]+/tumblr_[^;"&]+_\d+\.png)', html) +\
+               re.findall(r'(https?://[^;"&]+/tumblr_[^";&]+_\d+\.gif)', html)

        tuggles = {}
        for url in urls:
-            filename = parse.unquote(url.split('/')[-1])
+            hd_url = r1(r'(.+)_\d+\.jpg$', url) + '_1280.jpg'  # FIXME: decide actual quality
+            filename = parse.unquote(hd_url.split('/')[-1])
            title = '.'.join(filename.split('.')[:-1])
            tumblr_id = r1(r'^tumblr_(.+)_\d+$', title)
            quality = int(r1(r'^tumblr_.+_(\d+)$', title))
            ext = filename.split('.')[-1]
            try:
-                size = int(get_head(url)['Content-Length'])
+                size = int(get_head(hd_url)['Content-Length'])
                if tumblr_id not in tuggles or tuggles[tumblr_id]['quality'] < quality:
                    tuggles[tumblr_id] = {
                        'title': title,
-                        'url': url,
+                        'url': hd_url,
                        'quality': quality,
                        'ext': ext,
                        'size': size,
@ -99,11 +122,15 @@ def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
        r1(r'<meta property="og:description" content="([^"]*)" />', html) or
        r1(r'<title>([^<\n]*)', html) or url.split("/")[4]).replace('\n', '')

-    type, ext, size = url_info(real_url)
+    # this is better
+    vcode = r1(r'tumblr_(\w+)', real_url)
+    real_url = 'https://vt.media.tumblr.com/tumblr_%s.mp4' % vcode
+
+    type, ext, size = url_info(real_url, faker=True)

    print_info(site_info, title, type, size)
    if not info_only:
-        download_urls([real_url], title, ext, size, output_dir, merge = merge)
+        download_urls([real_url], title, ext, size, output_dir, merge=merge)

 site_info = "Tumblr.com"
 download = tumblr_download
--- a/src/you_get/extractors/twitter.py
+++ b/src/you_get/extractors/twitter.py
@ -30,9 +30,9 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs)
        return

    html = get_html(url, faker=True)
-    screen_name = r1(r'data-screen-name="([^"]*)"', html) or \
+    screen_name = r1(r'twitter\.com/([^/]+)', url) or r1(r'data-screen-name="([^"]*)"', html) or \
        r1(r'<meta name="twitter:title" content="([^"]*)"', html)
-    item_id = r1(r'data-item-id="([^"]*)"', html) or \
+    item_id = r1(r'twitter\.com/[^/]+/status/(\d+)', url) or r1(r'data-item-id="([^"]*)"', html) or \
        r1(r'<meta name="twitter:site:id" content="([^"]*)"', html)
    page_title = "{} [{}]".format(screen_name, item_id)

--- a/src/you_get/extractors/universal.py
+++ b/src/you_get/extractors/universal.py
@ -67,9 +67,9 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg

        urls = []
        for i in media_exts:
-            urls += re.findall(r'(https?://[^ ;"\'\\]+' + i + r'[^ ;"\'\\]*)', page)
+            urls += re.findall(r'(https?://[^ ;&"\'\\]+' + i + r'[^ ;&"\'\\]*)', page)

-            p_urls = re.findall(r'(https?%3A%2F%2F[^;&]+' + i + r'[^;&]*)', page)
+            p_urls = re.findall(r'(https?%3A%2F%2F[^;&"]+' + i + r'[^;&"]*)', page)
            urls += [parse.unquote(url) for url in p_urls]

            q_urls = re.findall(r'(https?:\\\\/\\\\/[^ ;"\']+' + i + r'[^ ;"\']*)', page)
@ -106,6 +106,9 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg
                title = '%s' % i
                i += 1

+            if r1(r'(https://pinterest.com/pin/)', url):
+                continue
+
            candies.append({'url': url,
                            'title': title})

--- a/src/you_get/extractors/yixia.py
+++ b/src/you_get/extractors/yixia.py
@ -7,6 +7,24 @@ from urllib.parse import urlparse
 from json import loads
 import re

+#----------------------------------------------------------------------
+def miaopai_download_by_smid(smid, output_dir = '.', merge = True, info_only = False):
+    """"""
+    api_endpoint = 'https://n.miaopai.com/api/aj_media/info.json?smid={smid}'.format(smid = smid)
+
+    html = get_content(api_endpoint)
+
+    api_content = loads(html)
+
+    video_url = api_content['data']['meta_data'][0]['play_urls']['l']
+    title = api_content['data']['description']
+
+    type, ext, size = url_info(video_url)
+
+    print_info(site_info, title, type, size)
+    if not info_only:
+        download_urls([video_url], title, ext, size, output_dir, merge=merge)
+
 #----------------------------------------------------------------------
 def yixia_miaopai_download_by_scid(scid, output_dir = '.', merge = True, info_only = False):
    """"""
@ -47,7 +65,11 @@ def yixia_xiaokaxiu_download_by_scid(scid, output_dir = '.', merge = True, info_
 def yixia_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
    """wrapper"""
    hostname = urlparse(url).hostname
-    if 'miaopai.com' in hostname:  #Miaopai
+    if 'n.miaopai.com' == hostname: 
+        smid = match1(url, r'n\.miaopai\.com/media/([^.]+)') 
+        miaopai_download_by_smid(smid, output_dir, merge, info_only)
+        return
+    elif 'miaopai.com' in hostname:  #Miaopai
        yixia_download_by_scid = yixia_miaopai_download_by_scid
        site_info = "Yixia Miaopai"

--- a/src/you_get/extractors/youku.py
+++ b/src/you_get/extractors/youku.py
@ -78,7 +78,7 @@ class Youku(VideoExtractor):
        self.api_error_code = None
        self.api_error_msg = None

-        self.ccode = '0508'
+        self.ccode = '0590'
        # Found in http://g.alicdn.com/player/ykplayer/0.5.64/youku-player.min.js
        # grep -oE '"[0-9a-zA-Z+/=]{256}"' youku-player.min.js
        self.ckey = 'DIl58SLFxFNndSV1GFNnMQVYkx1PP5tKe1siZu/86PR1u/Wh1Ptd+WOZsHHWxysSfAOhNJpdVWsdVJNsfJ8Sxd8WKVvNfAS8aS8fAOzYARzPyPc3JvtnPHjTdKfESTdnuTW6ZPvk2pNDh4uFzotgdMEFkzQ5wZVXl2Pf1/Y6hLK0OnCNxBj3+nb0v72gZ6b0td+WOZsHHWxysSo/0y9D2K42SaB8Y/+aD2K42SaB8Y/+ahU+WOZsHcrxysooUeND'
--- a/src/you_get/extractors/youtube.py
+++ b/src/you_get/extractors/youtube.py
@ -37,6 +37,10 @@ class YouTube(VideoExtractor):
    ]

    def decipher(js, s):
+        # Examples:
+        # - https://www.youtube.com/yts/jsbin/player-da_DK-vflWlK-zq/base.js
+        # - https://www.youtube.com/yts/jsbin/player-vflvABTsY/da_DK/base.js
+        # - https://www.youtube.com/yts/jsbin/player-vfls4aurX/da_DK/base.js
        def tr_js(code):
            code = re.sub(r'function', r'def', code)
            code = re.sub(r'(\W)(as|if|in|is|or)\(', r'\1_\2(', code)
@ -52,11 +56,13 @@ class YouTube(VideoExtractor):
            return code

        js = js.replace('\n', ' ')
-        f1 = match1(js, r'"signature",([$\w]+)\(\w+\.\w+\)')
+        f1 = match1(js, r'\.set\(\w+\.sp,\(0,window\.encodeURIComponent\)\(([$\w]+)') or \
+            match1(js, r'\.set\(\w+\.sp,([$\w]+)\(\w+\.s\)\)') or \
+            match1(js, r'"signature",([$\w]+)\(\w+\.\w+\)')
        f1def = match1(js, r'function %s(\(\w+\)\{[^\{]+\})' % re.escape(f1)) or \
                match1(js, r'\W%s=function(\(\w+\)\{[^\{]+\})' % re.escape(f1))
        f1def = re.sub(r'([$\w]+\.)([$\w]+\(\w+,\d+\))', r'\2', f1def)
-        f1def = 'function %s%s' % (f1, f1def)
+        f1def = 'function main_%s%s' % (f1, f1def)  # prefix to avoid potential namespace conflict
        code = tr_js(f1def)
        f2s = set(re.findall(r'([$\w]+)\(\w+,\d+\)', f1def))
        for f2 in f2s:
@ -73,10 +79,20 @@ class YouTube(VideoExtractor):

        f1 = re.sub(r'(as|if|in|is|or)', r'_\1', f1)
        f1 = re.sub(r'\$', '_dollar', f1)
-        code = code + 'sig=%s(s)' % f1
+        code = code + 'sig=main_%s(s)' % f1  # prefix to avoid potential namespace conflict
        exec(code, globals(), locals())
        return locals()['sig']

+    def chunk_by_range(url, size):
+        urls = []
+        chunk_size = 10485760
+        start, end = 0, chunk_size - 1
+        urls.append('%s&range=%s-%s' % (url, start, end))
+        while end + 1 < size:  # processed size < expected size
+            start, end = end + 1, end + chunk_size
+            urls.append('%s&range=%s-%s' % (url, start, end))
+        return urls
+
    def get_url_from_vid(vid):
        return 'https://youtu.be/{}'.format(vid)

@ -128,7 +144,10 @@ class YouTube(VideoExtractor):
        for video in videos:
            vid = parse_query_param(video, 'v')
            index = parse_query_param(video, 'index')
+            try:
                self.__class__().download_by_url(self.__class__.get_url_from_vid(vid), index=index, **kwargs)
+            except:
+                pass

    def prepare(self, **kwargs):
        assert self.url or self.vid
@ -144,7 +163,8 @@ class YouTube(VideoExtractor):

        ytplayer_config = None
        if 'status' not in video_info:
-            log.wtf('[Failed] Unknown status.')
+            log.wtf('[Failed] Unknown status.', exit_code=None)
+            raise
        elif video_info['status'] == ['ok']:
            if 'use_cipher_signature' not in video_info or video_info['use_cipher_signature'] == ['False']:
                self.title = parse.unquote_plus(video_info['title'][0])
@ -176,7 +196,8 @@ class YouTube(VideoExtractor):
                    ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+});ytplayer', video_page).group(1))
                except:
                    msg = re.search('class="message">([^<]+)<', video_page).group(1)
-                    log.wtf('[Failed] "%s"' % msg.strip())
+                    log.wtf('[Failed] "%s"' % msg.strip(), exit_code=None)
+                    raise

                if 'title' in ytplayer_config['args']:
                    # 150 Restricted from playback on certain sites
@ -185,18 +206,22 @@ class YouTube(VideoExtractor):
                    self.html5player = 'https://www.youtube.com' + ytplayer_config['assets']['js']
                    stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
                else:
-                    log.wtf('[Error] The uploader has not made this video available in your country.')
+                    log.wtf('[Error] The uploader has not made this video available in your country.', exit_code=None)
+                    raise
                    #self.title = re.search('<meta name="title" content="([^"]+)"', video_page).group(1)
                    #stream_list = []

            elif video_info['errorcode'] == ['100']:
-                log.wtf('[Failed] This video does not exist.', exit_code=int(video_info['errorcode'][0]))
+                log.wtf('[Failed] This video does not exist.', exit_code=None) #int(video_info['errorcode'][0])
+                raise

            else:
-                log.wtf('[Failed] %s' % video_info['reason'][0], exit_code=int(video_info['errorcode'][0]))
+                log.wtf('[Failed] %s' % video_info['reason'][0], exit_code=None) #int(video_info['errorcode'][0])
+                raise

        else:
-            log.wtf('[Failed] Invalid status.')
+            log.wtf('[Failed] Invalid status.', exit_code=None)
+            raise

        # YouTube Live
        if ytplayer_config and (ytplayer_config['args'].get('livestream') == '1' or ytplayer_config['args'].get('live_playback') == '1'):
@ -286,13 +311,15 @@ class YouTube(VideoExtractor):
                        if not dash_size:
                            try: dash_size = url_size(dash_url)
                            except: continue
+                        dash_urls = self.__class__.chunk_by_range(dash_url, int(dash_size))
+                        dash_mp4_a_urls = self.__class__.chunk_by_range(dash_mp4_a_url, int(dash_mp4_a_size))
                        self.dash_streams[itag] = {
                            'quality': '%sx%s' % (w, h),
                            'itag': itag,
                            'type': mimeType,
                            'mime': mimeType,
                            'container': 'mp4',
-                            'src': [dash_url, dash_mp4_a_url],
+                            'src': [dash_urls, dash_mp4_a_urls],
                            'size': int(dash_size) + int(dash_mp4_a_size)
                        }
                elif mimeType == 'video/webm':
@ -306,13 +333,15 @@ class YouTube(VideoExtractor):
                        if not dash_size:
                            try: dash_size = url_size(dash_url)
                            except: continue
+                        dash_urls = self.__class__.chunk_by_range(dash_url, int(dash_size))
+                        dash_webm_a_urls = self.__class__.chunk_by_range(dash_webm_a_url, int(dash_webm_a_size))
                        self.dash_streams[itag] = {
                            'quality': '%sx%s' % (w, h),
                            'itag': itag,
                            'type': mimeType,
                            'mime': mimeType,
                            'container': 'webm',
-                            'src': [dash_url, dash_webm_a_url],
+                            'src': [dash_urls, dash_webm_a_urls],
                            'size': int(dash_size) + int(dash_webm_a_size)
                        }
        except:
@ -349,13 +378,15 @@ class YouTube(VideoExtractor):
                                dash_url += '&signature={}'.format(sig)
                            dash_size = stream['clen']
                            itag = stream['itag']
+                            dash_urls = self.__class__.chunk_by_range(dash_url, int(dash_size))
+                            dash_mp4_a_urls = self.__class__.chunk_by_range(dash_mp4_a_url, int(dash_mp4_a_size))
                            self.dash_streams[itag] = {
                                'quality': stream['size'],
                                'itag': itag,
                                'type': mimeType,
                                'mime': mimeType,
                                'container': 'mp4',
-                                'src': [dash_url, dash_mp4_a_url],
+                                'src': [dash_urls, dash_mp4_a_urls],
                                'size': int(dash_size) + int(dash_mp4_a_size)
                            }
                        elif stream['type'].startswith('video/webm'):
@ -374,13 +405,15 @@ class YouTube(VideoExtractor):
                            except UnboundLocalError as e:
                                audio_url = dash_mp4_a_url
                                audio_size = int(dash_mp4_a_size)
+                            dash_urls = self.__class__.chunk_by_range(dash_url, int(dash_size))
+                            audio_urls = self.__class__.chunk_by_range(audio_url, int(audio_size))
                            self.dash_streams[itag] = {
                                'quality': stream['size'],
                                'itag': itag,
                                'type': mimeType,
                                'mime': mimeType,
                                'container': 'webm',
-                                'src': [dash_url, audio_url],
+                                'src': [dash_urls, audio_urls],
                                'size': int(dash_size) + int(audio_size)
                            }

--- a/src/you_get/extractors/zhibo.py
+++ b/src/you_get/extractors/zhibo.py
@ -37,11 +37,14 @@ def zhibo_download(url, output_dir = '.', merge = True, info_only = False, **kwa
    if is_live is not "1":
        raise ValueError("The live stream is not online! (Errno:%s)" % is_live)

-    ourStreamName = r1(r"window.ourStreamName=\'([s\S'\s\.]*)\'\;[\s\S]*window.rtmpDefaultSource", html)
-    rtmpPollUrl = r1(r"window.rtmpPollUrl=\'([s\S'\s\.]*)\'\;[\s\S]*window.hlsDefaultSource", html)
-
-    #real_url = 'rtmp://220.194.213.56/live.zhibo.tv/8live/' + ourStreamName
-    real_url = rtmpPollUrl + ourStreamName
+    match = re.search(r"""
+    ourStreamName .*?
+    '(.*?)' .*?
+    rtmpHighSource .*?
+    '(.*?)' .*?
+    '(.*?)'
+    """, html, re.S | re.X)
+    real_url = match.group(3) + match.group(1) + match.group(2)

    print_info(site_info, title, 'flv', float('inf'))
    if not info_only:
--- a/src/you_get/extractors/zhihu.py
+++ b/src/you_get/extractors/zhihu.py
@ -0,0 +1,79 @@
+#!/usr/bin/env python
+
+__all__ = ['zhihu_download', 'zhihu_download_playlist']
+
+from ..common import *
+import json
+
+
+def zhihu_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
+    paths = url.split("/")
+    # question or column
+    if len(paths) < 3 and len(paths) < 6:
+        raise TypeError("URL does not conform to specifications, Support column and question only."
+                        "Example URL: https://zhuanlan.zhihu.com/p/51669862 or "
+                        "https://www.zhihu.com/question/267782048/answer/490720324")
+
+    if ("question" not in paths or "answer" not in paths) and "zhuanlan.zhihu.com" not in paths:
+        raise TypeError("URL does not conform to specifications, Support column and question only."
+                        "Example URL: https://zhuanlan.zhihu.com/p/51669862 or "
+                        "https://www.zhihu.com/question/267782048/answer/490720324")
+
+    html = get_html(url, faker=True)
+    title = match1(html, r'data-react-helmet="true">(.*?)</title>')
+    for index, video_id in enumerate(matchall(html, [r'<a class="video-box" href="\S+video/(\d+)"'])):
+        try:
+            video_info = json.loads(
+                get_content(r"https://lens.zhihu.com/api/videos/{}".format(video_id), headers=fake_headers))
+        except json.decoder.JSONDecodeError:
+            log.w("Video id not found:{}".format(video_id))
+            continue
+
+        play_list = video_info["playlist"]
+        # first High Definition
+        # second Second Standard Definition
+        # third ld. What is ld ?
+        # finally continue
+        data = play_list.get("hd", play_list.get("sd", play_list.get("ld", None)))
+        if not data:
+            log.w("Video id No play address:{}".format(video_id))
+            continue
+        print_info(site_info, title, data["format"], data["size"])
+        if not info_only:
+            ext = "_{}.{}".format(index, data["format"])
+            if kwargs.get("zhihu_offset"):
+                ext = "_{}".format(kwargs["zhihu_offset"]) + ext
+            download_urls([data["play_url"]], title, ext, data["size"],
+                          output_dir=output_dir, merge=merge, **kwargs)
+
+
+def zhihu_download_playlist(url, output_dir='.', merge=True, info_only=False, **kwargs):
+    if "question" not in url or "answer" in url:  # question page
+        raise TypeError("URL does not conform to specifications, Support question only."
+                        " Example URL: https://www.zhihu.com/question/267782048")
+    url = url.split("?")[0]
+    if url[-1] == "/":
+        question_id = url.split("/")[-2]
+    else:
+        question_id = url.split("/")[-1]
+    videos_url = r"https://www.zhihu.com/api/v4/questions/{}/answers".format(question_id)
+    try:
+        questions = json.loads(get_content(videos_url))
+    except json.decoder.JSONDecodeError:
+        raise TypeError("Check whether the problem URL exists.Example URL: https://www.zhihu.com/question/267782048")
+
+    count = 0
+    while 1:
+        for data in questions["data"]:
+            kwargs["zhihu_offset"] = count
+            zhihu_download("https://www.zhihu.com/question/{}/answer/{}".format(question_id, data["id"]),
+                           output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)
+            count += 1
+        if questions["paging"]["is_end"]:
+            return
+        questions = json.loads(get_content(questions["paging"]["next"], headers=fake_headers))
+
+
+site_info = "zhihu.com"
+download = zhihu_download
+download_playlist = zhihu_download_playlist
--- a/src/you_get/util/fs.py
+++ b/src/you_get/util/fs.py
@ -13,6 +13,7 @@ def legitimize(text, os=detect_os()):
        ord('|'): '-',
    })

+    # FIXME: do some filesystem detection
    if os == 'windows' or os == 'cygwin' or os == 'wsl':
        # Windows (non-POSIX namespace)
        text = text.translate({
@ -28,6 +29,7 @@ def legitimize(text, os=detect_os()):
            ord('>'): '-',
            ord('['): '(',
            ord(']'): ')',
+            ord('\t'): ' ',
        })
    else:
        # *nix
--- a/src/you_get/util/log.py
+++ b/src/you_get/util/log.py
@ -96,3 +96,9 @@ def wtf(message, exit_code=1):
    print_log(message, RED, BOLD)
    if exit_code is not None:
        sys.exit(exit_code)
+
+def yes_or_no(message):
+    ans = str(input('%s (y/N) ' % message)).lower().strip()
+    if ans == 'y':
+        return True
+    return False
--- a/src/you_get/util/os.py
+++ b/src/you_get/util/os.py
@ -19,9 +19,11 @@ def detect_os():
    elif 'linux' in syst:
        os = 'linux'
        # detect WSL https://github.com/Microsoft/BashOnWindows/issues/423
+        try:
            with open('/proc/version', 'r') as f:
                if 'microsoft' in f.read().lower():
                    os = 'wsl'
+        except: pass
    elif 'windows' in syst:
        os = 'windows'
    elif 'bsd' in syst:
--- a/src/you_get/version.py
+++ b/src/you_get/version.py
@ -1,4 +1,4 @@
 #!/usr/bin/env python

 script_name = 'you-get'
-__version__ = '0.4.1128'
+__version__ = '0.4.1193'
--- a/you-get.json
+++ b/you-get.json
@ -25,6 +25,7 @@
    "Programming Language :: Python :: 3.4",
    "Programming Language :: Python :: 3.5",
    "Programming Language :: Python :: 3.6",
+    "Programming Language :: Python :: 3.7",
    "Topic :: Internet",
    "Topic :: Internet :: WWW/HTTP",
    "Topic :: Multimedia",