Merge branch 'develop' into develop

2025-02-13 05:25:21 +03:00 · 2021-10-15 10:45:19 +08:00 · 2021-10-15 10:45:19 +08:00 · 0a7ece66e3
commit 0a7ece66e3
parent 4025f8a6e2 b32741ad8e
16 changed files with 151 additions and 75 deletions
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@ -15,7 +15,7 @@ jobs:
    runs-on: ubuntu-latest
    strategy:
      matrix:
-        python-version: [3.5, 3.6, 3.7, 3.8, pypy3]
+        python-version: [3.5, 3.6, 3.7, 3.8, 3.9, pypy3]

    steps:
    - uses: actions/checkout@v2
--- a/src/you_get/common.py
+++ b/src/you_get/common.py
@ -434,8 +434,17 @@ def get_content(url, headers={}, decoded=True):

    req = request.Request(url, headers=headers)
    if cookies:
-        cookies.add_cookie_header(req)
-        req.headers.update(req.unredirected_hdrs)
+        # NOTE: Do not use cookies.add_cookie_header(req)
+        # #HttpOnly_ cookies were not supported by CookieJar and MozillaCookieJar properly until python 3.10
+        # See also:
+        # - https://github.com/python/cpython/pull/17471
+        # - https://bugs.python.org/issue2190
+        # Here we add cookies to the request headers manually
+        cookie_strings = []
+        for cookie in list(cookies):
+            cookie_strings.append(cookie.name + '=' + cookie.value)
+        cookie_headers = {'Cookie': '; '.join(cookie_strings)}
+        req.headers.update(cookie_headers)

    response = urlopen_with_retry(req)
    data = response.read()
@ -478,8 +487,17 @@ def post_content(url, headers={}, post_data={}, decoded=True, **kwargs):

    req = request.Request(url, headers=headers)
    if cookies:
-        cookies.add_cookie_header(req)
-        req.headers.update(req.unredirected_hdrs)
+        # NOTE: Do not use cookies.add_cookie_header(req)
+        # #HttpOnly_ cookies were not supported by CookieJar and MozillaCookieJar properly until python 3.10
+        # See also:
+        # - https://github.com/python/cpython/pull/17471
+        # - https://bugs.python.org/issue2190
+        # Here we add cookies to the request headers manually
+        cookie_strings = []
+        for cookie in list(cookies):
+            cookie_strings.append(cookie.name + '=' + cookie.value)
+        cookie_headers = {'Cookie': '; '.join(cookie_strings)}
+        req.headers.update(cookie_headers)
    if kwargs.get('post_data_raw'):
        post_data_enc = bytes(kwargs['post_data_raw'], 'utf-8')
    else:
--- a/src/you_get/extractors/bilibili.py
+++ b/src/you_get/extractors/bilibili.py
@ -12,6 +12,8 @@ class Bilibili(VideoExtractor):

    # Bilibili media encoding options, in descending quality order.
    stream_types = [
+        {'id': 'hdflv2', 'quality': 125, 'audio_quality': 30280,
+         'container': 'FLV', 'video_resolution': '3840p', 'desc': '真彩 HDR'},
        {'id': 'hdflv2_4k', 'quality': 120, 'audio_quality': 30280,
         'container': 'FLV', 'video_resolution': '2160p', 'desc': '超清 4K'},
        {'id': 'flv_p60', 'quality': 116, 'audio_quality': 30280,
@ -160,6 +162,11 @@ class Bilibili(VideoExtractor):
            self.url = 'https://www.bilibili.com/bangumi/play/ep%s' % ep_id
            html_content = get_content(self.url, headers=self.bilibili_headers(referer=self.url))

+        # redirect: s
+        elif re.match(r'https?://(www\.)?bilibili\.com/s/(.+)', self.url):
+            self.url = 'https://www.bilibili.com/%s' % match1(self.url, r'/s/(.+)')
+            html_content = get_content(self.url, headers=self.bilibili_headers())
+
        # sort it out
        if re.match(r'https?://(www\.)?bilibili\.com/audio/au(\d+)', self.url):
            sort = 'audio'
@ -179,7 +186,7 @@ class Bilibili(VideoExtractor):
            self.download_playlist_by_url(self.url, **kwargs)
            return

-        # regular av video
+        # regular video
        if sort == 'video':
            initial_state_text = match1(html_content, r'__INITIAL_STATE__=(.*?);\(function\(\)')  # FIXME
            initial_state = json.loads(initial_state_text)
@ -599,13 +606,21 @@ class Bilibili(VideoExtractor):
            log.e('[Error] Unsupported URL pattern.')
            exit(1)

-        # regular av video
+        # regular video
        if sort == 'video':
            initial_state_text = match1(html_content, r'__INITIAL_STATE__=(.*?);\(function\(\)')  # FIXME
            initial_state = json.loads(initial_state_text)
            aid = initial_state['videoData']['aid']
            pn = initial_state['videoData']['videos']
-            if pn!= len(initial_state['videoData']['pages']):#interaction video 互动视频
+
+            if pn == len(initial_state['videoData']['pages']):
+                # non-interative video
+                for pi in range(1, pn + 1):
+                     purl = 'https://www.bilibili.com/video/av%s?p=%s' % (aid, pi)
+                     self.__class__().download_by_url(purl, **kwargs)
+
+            else:
+                # interative video
                search_node_list = []
                download_cid_set = set([initial_state['videoData']['cid']])
                params = {
@ -656,24 +671,6 @@ class Bilibili(VideoExtractor):
                                    self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams]
                                self.extract(**kwargs)
                                self.download(**kwargs)
-            else:
-                playinfo_text = match1(html_content, r'__playinfo__=(.*?)</script><script>')  # FIXME
-                playinfo = json.loads(playinfo_text) if playinfo_text else None
-
-                html_content_ = get_content(self.url, headers=self.bilibili_headers(cookie='CURRENT_FNVAL=16'))
-                playinfo_text_ = match1(html_content_, r'__playinfo__=(.*?)</script><script>')  # FIXME
-                playinfo_ = json.loads(playinfo_text_) if playinfo_text_ else None
-                p = int(match1(self.url, r'[\?&]p=(\d+)') or match1(self.url, r'/index_(\d+)') or '1')-1
-                for pi in range(p,pn):
-                    self.prepare_by_cid(aid,initial_state['videoData']['pages'][pi]['cid'],'%s (P%s. %s)' % (initial_state['videoData']['title'], pi+1, initial_state['videoData']['pages'][pi]['part']),html_content,playinfo,playinfo_,url)
-                    try:
-                        self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams]
-                    except:
-                        self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams]
-                    self.extract(**kwargs)
-                    self.download(**kwargs)
-                    # purl = 'https://www.bilibili.com/video/av%s?p=%s' % (aid, pi+1)
-                    # self.__class__().download_by_url(purl, **kwargs)

        elif sort == 'bangumi':
            initial_state_text = match1(html_content, r'__INITIAL_STATE__=(.*?);\(function\(\)')  # FIXME
--- a/src/you_get/extractors/douyin.py
+++ b/src/you_get/extractors/douyin.py
@ -2,6 +2,7 @@

 import re
 import json
+from urllib.parse import unquote

 from ..common import (
    url_size,
@ -18,17 +19,17 @@ __all__ = ['douyin_download_by_url']

 def douyin_download_by_url(url, **kwargs):
    page_content = get_content(url, headers=fake_headers)
-    match_rule = re.compile(r'var data = \[(.*?)\];')
-    video_info = json.loads(match_rule.findall(page_content)[0])
-    video_url = video_info['video']['play_addr']['url_list'][0]
-    # fix: https://www.douyin.com/share/video/6553248251821165832
-    # if there is no title, use desc
-    cha_list = video_info['cha_list']
-    if cha_list:
-        title = cha_list[0]['cha_name']
-    else:
-        title = video_info['desc']
+    # The video player and video source are rendered client-side, the data
+    # contains in a <script id="RENDER_DATA" type="application/json"> tag
+    # quoted, unquote the whole page content then search using regex with
+    # regular string.
+    page_content = unquote(page_content)
+    title = re.findall(r'"desc":"([^"]*)"', page_content)[0].strip()
    video_format = 'mp4'
+    # video URLs are in this pattern {"src":"THE_URL"}, in json format
+    urls_pattern = r'"playAddr":(\[.*?\])'
+    urls = json.loads(re.findall(urls_pattern, page_content)[0])
+    video_url = 'https:' + urls[0]['src']
    size = url_size(video_url, faker=True)
    print_info(
        site_info='douyin.com', title=title,
--- a/src/you_get/extractors/flickr.py
+++ b/src/you_get/extractors/flickr.py
@ -73,7 +73,7 @@ def get_api_key(page):
    match = match1(page, pattern_inline_api_key)
    # this happens only when the url points to a gallery page
    # that contains no inline api_key(and never makes xhr api calls)
-    # in fact this might be a better approch for getting a temporary api key
+    # in fact this might be a better approach for getting a temporary api key
    # since there's no place for a user to add custom information that may
    # misguide the regex in the homepage
    if not match:
--- a/src/you_get/extractors/instagram.py
+++ b/src/you_get/extractors/instagram.py
@ -6,14 +6,14 @@ from ..common import *

 def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
    url = r1(r'([^?]*)', url)
-    html = get_html(url, faker=True)
+    cont = get_content(url, headers=fake_headers)

    vid = r1(r'instagram.com/\w+/([^/]+)', url)
-    description = r1(r'<meta property="og:title" content="([^"]*)"', html) or \
-        r1(r'<title>\s([^<]*)</title>', html) # with logged-in cookies
+    description = r1(r'<meta property="og:title" content="([^"]*)"', cont) or \
+        r1(r'<title>\s([^<]*)</title>', cont) # with logged-in cookies
    title = "{} [{}]".format(description.replace("\n", " "), vid)

-    stream = r1(r'<meta property="og:video" content="([^"]*)"', html)
+    stream = r1(r'<meta property="og:video" content="([^"]*)"', cont)
    if stream:
        _, ext, size = url_info(stream)

@ -21,14 +21,14 @@ def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwarg
        if not info_only:
            download_urls([stream], title, ext, size, output_dir, merge=merge)
    else:
-        data = re.search(r'window\._sharedData\s*=\s*(.*);</script>', html)
+        data = re.search(r'window\._sharedData\s*=\s*(.*);</script>', cont)
        try:
            info = json.loads(data.group(1))
            post = info['entry_data']['PostPage'][0]
            assert post
        except:
            # with logged-in cookies
-            data = re.search(r'window\.__additionalDataLoaded\(\'[^\']+\',(.*)\);</script>', html)
+            data = re.search(r'window\.__additionalDataLoaded\(\'[^\']+\',(.*)\);</script>', cont)
            if data is not None:
                log.e('[Warning] Cookies needed.')
            post = json.loads(data.group(1))
--- a/src/you_get/extractors/iwara.py
+++ b/src/you_get/extractors/iwara.py
@ -27,6 +27,9 @@ def iwara_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
    api_url = video_url + '/api/video/' + video_hash
    content = get_content(api_url, headers=headers)
    data = json.loads(content)
+    if len(data)<1 :
+        print('Maybe is Private Video?'+'['+title+']')
+        return True;
    down_urls = 'https:' + data[0]['uri']
    type, ext, size = url_info(down_urls, headers=headers)
    print_info(site_info, title+data[0]['resolution'], type, size)
@ -35,10 +38,8 @@ def iwara_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
        download_urls([down_urls], title, ext, size, output_dir, merge=merge, headers=headers)

 def download_playlist_by_url( url, **kwargs):
-    video_page = get_content(url)
-    # url_first=re.findall(r"(http[s]?://[^/]+)",url)
+    video_page = get_html(url)
    url_first=match1(url, r"(http[s]?://[^/]+)")
-    # print (url_first)
    videos = set(re.findall(r'<a href="(/videos/[^"]+)"', video_page))
    if(len(videos)>0):
        for video in videos:
--- a/src/you_get/extractors/miaopai.py
+++ b/src/you_get/extractors/miaopai.py
@ -19,7 +19,7 @@ fake_headers_mobile = {

 def miaopai_download_by_fid(fid, output_dir = '.', merge = False, info_only = False, **kwargs):
    '''Source: Android mobile'''
-    page_url = 'http://video.weibo.com/show?fid=' + fid + '&type=mp4'
+    page_url = 'https://video.weibo.com/show?fid=' + fid + '&type=mp4'

    mobile_page = get_content(page_url, headers=fake_headers_mobile)
    url = match1(mobile_page, r'<video id=.*?src=[\'"](.*?)[\'"]\W')
@ -78,6 +78,51 @@ def miaopai_download_story(url, output_dir='.', merge=False, info_only=False, **
        download_urls([stream_url], fs.legitimize(title), ext, total_size=None, headers=fake_headers_mobile, **kwargs)


+def miaopai_download_h5api(url, output_dir='.', merge=False, info_only=False, **kwargs):
+    oid = match1(url, r'/show/(\d{4}:\w+)')
+    page = "/show/%s" % oid
+    data_url = 'https://h5.video.weibo.com/api/component?%s' % parse.urlencode({
+        'page': page
+    })
+    headers = {}
+    headers.update(fake_headers_mobile)
+    headers['origin'] = 'https://h5.video.weibo.com'
+    headers['page-referer'] = page
+    headers['referer'] = 'https://h5.video.weibo.com/show/%s' % oid
+    post_data = {
+        "data": json.dumps({
+            "Component_Play_Playinfo": {"oid": oid}
+        })
+    }
+    data_content = post_content(data_url, headers=headers, post_data=post_data)
+    data = json.loads(data_content)
+    if data['msg'] != 'succ':
+        raise Exception('Weibo api returns non-success: (%s)%s'.format(data['code'], data['msg']))
+
+    play_info = data['data']['Component_Play_Playinfo']
+    title = play_info['title']
+
+    # get video formats and sort by size desc
+    video_formats = []
+    for fmt, relative_uri in play_info['urls'].items():
+        url = "https:%s" % relative_uri
+        type, ext, size = url_info(url, headers=headers)
+        video_formats.append({
+            'fmt': fmt,
+            'url': url,
+            'type': type,
+            'ext': ext,
+            'size': size,
+        })
+    video_formats.sort(key=lambda v:v['size'], reverse=True)
+    selected_video = video_formats[0]
+    video_url, ext, size = selected_video['url'], selected_video['ext'], selected_video['size']
+
+    print_info(site_info, title, ext, size)
+    if not info_only:
+        download_urls([video_url], fs.legitimize(title), ext, total_size=size, headers=headers, **kwargs)
+
+
 def miaopai_download_direct(url, output_dir='.', merge=False, info_only=False, **kwargs):
    mobile_page = get_content(url, headers=fake_headers_mobile)
    try:
@ -108,12 +153,16 @@ def miaopai_download(url, output_dir='.', merge=False, info_only=False, **kwargs
    if re.match(r'^http[s]://.*\.weibo\.com/tv/v/(\w+)', url):
        return miaopai_download_direct(url, info_only=info_only, output_dir=output_dir, merge=merge, **kwargs)

+    if re.match(r'^http[s]://(.+\.)?weibo\.com/(tv/)?show/(\d{4}:\w+)', url):
+        return miaopai_download_h5api(url, info_only=info_only, output_dir=output_dir, merge=merge, **kwargs)
+
    fid = match1(url, r'\?fid=(\d{4}:\w+)')
    if fid is not None:
        miaopai_download_by_fid(fid, output_dir, merge, info_only)
    elif '/p/230444' in url:
        fid = match1(url, r'/p/230444(\w+)')
        miaopai_download_by_fid('1034:'+fid, output_dir, merge, info_only)
+        pass
    else:
        mobile_page = get_content(url, headers = fake_headers_mobile)
        hit = re.search(r'"page_url"\s*:\s*"([^"]+)"', mobile_page)
--- a/src/you_get/extractors/missevan.py
+++ b/src/you_get/extractors/missevan.py
@ -75,17 +75,13 @@ class _Dispatcher(object):
        raise _NoMatchException()

 missevan_stream_types = [
-    {'id': 'source', 'quality': '源文件', 'url_json_key': 'soundurl',
-     'resource_url_fmt': 'sound/{resource_url}'},
-    {'id': '320', 'quality': '320 Kbps', 'url_json_key': 'soundurl_64'},
+    {'id': 'source', 'quality': '源文件', 'url_json_key': 'soundurl'},
    {'id': '128', 'quality': '128 Kbps', 'url_json_key': 'soundurl_128'},
-    {'id': '32', 'quality': '32 Kbps', 'url_json_key': 'soundurl_32'},
    {'id': 'covers', 'desc': '封面图', 'url_json_key': 'cover_image',
     'default_src': 'covers/nocover.png',
     'resource_url_fmt': 'covers/{resource_url}'},
-    {'id': 'coversmini', 'desc': '封面缩略图', 'url_json_key': 'cover_image',
-     'default_src': 'coversmini/nocover.png',
-     'resource_url_fmt': 'coversmini/{resource_url}'}
+    {'id': 'coversmini', 'desc': '封面缩略图', 'url_json_key': 'front_cover',
+     'default_src': 'coversmini/nocover.png'}
 ]

 def _get_resource_uri(data, stream_type):
@ -353,7 +349,7 @@ class MissEvan(VideoExtractor):

    @staticmethod
    def url_resource(uri):
-        return 'https://static.missevan.com/' + uri
+        return uri if re.match(r'^https?:/{2}\w.+$', uri) else 'https://static.missevan.com/' + uri

 site = MissEvan()
 site_info = 'MissEvan.com'
--- a/src/you_get/extractors/mtv81.py
+++ b/src/you_get/extractors/mtv81.py
@ -28,7 +28,7 @@ def mtv81_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
    #
    # rtmpdump  -r 'rtmpe://cp30865.edgefcs.net/ondemand/mtviestor/_!/intlod/MTVInternational/MBUS/GeoLocals/00JP/VIAMTVI/PYC/201304/7122HVAQ4/00JPVIAMTVIPYC7122HVAQ4_640x_360_1200_m30.mp4' -o "title.mp4" --swfVfy http://media.mtvnservices.com/player/prime/mediaplayerprime.1.10.8.swf
    #
-    # because rtmpdump is unstable,may try serveral times
+    # because rtmpdump is unstable,may try several times
    #
    if not info_only:
        # import pdb
--- a/src/you_get/extractors/qingting.py
+++ b/src/you_get/extractors/qingting.py
@ -10,7 +10,7 @@ __all__ = ['qingting_download_by_url']

 class Qingting(VideoExtractor):
    # every resource is described by its channel id and program id
-    # so vid is tuple (chaanel_id, program_id)
+    # so vid is tuple (channel_id, program_id)

    name = 'Qingting'
    stream_types = [
--- a/src/you_get/extractors/qq.py
+++ b/src/you_get/extractors/qq.py
@ -35,6 +35,7 @@ def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):

    part_urls= []
    total_size = 0
+    ext = None
    for part in range(1, seg_cnt+1):
        if fc_cnt == 0:
            # fix json parsing error
--- a/src/you_get/extractors/universal.py
+++ b/src/you_get/extractors/universal.py
@ -70,12 +70,13 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg
                      '[-_][6-9]\d\dx1\d\d\d\.jpe?g',
                      '[-_][6-9]\d\dx[6-9]\d\d\.jpe?g',
                      's1600/[\w%]+\.jpe?g', # blogger
+                      'blogger\.googleusercontent\.com/img/a/\w*', # blogger
                      'img[6-9]\d\d/[\w%]+\.jpe?g' # oricon?
        ]

        urls = []
        for i in media_exts:
-            urls += re.findall(r'(https?://[^ ;&"\'\\<>]+' + i + r'[^ ;&"\'\\<>]*)', page)
+            urls += re.findall(r'(https?://[^ ;&"\'\\<>]*' + i + r'[^ ;&"\'\\<>]*)', page)

            p_urls = re.findall(r'(https?%3A%2F%2F[^;&"]+' + i + r'[^;&"]*)', page)
            urls += [parse.unquote(url) for url in p_urls]
--- a/src/you_get/extractors/youtube.py
+++ b/src/you_get/extractors/youtube.py
@ -76,11 +76,14 @@ class YouTube(VideoExtractor):
        # - https://www.youtube.com/yts/jsbin/player_ias-vfl_RGK2l/en_US/base.js
        # - https://www.youtube.com/yts/jsbin/player-vflRjqq_w/da_DK/base.js
        # - https://www.youtube.com/yts/jsbin/player_ias-vfl-jbnrr/da_DK/base.js
+        # - https://www.youtube.com/s/player/0b643cd1/player_ias.vflset/sv_SE/base.js
+        # - https://www.youtube.com/s/player/50e823fc/player_ias.vflset/sv_SE/base.js
        def tr_js(code):
            code = re.sub(r'function', r'def', code)
-            code = re.sub(r'(\W)(as|if|in|is|or)\(', r'\1_\2(', code)
+            # add prefix '_sig_' to prevent namespace pollution
+            code = re.sub(r'(\W)([$\w][$\w][$\w]?)\(', r'\1_sig_\2(', code)
            code = re.sub(r'\$', '_dollar', code)
-            code = re.sub(r'\{', r':\n\t', code)
+            code = re.sub(r'\{', r': ', code)
            code = re.sub(r'\}', r'\n', code)
            code = re.sub(r'var\s+', r'', code)
            code = re.sub(r'(\w+).join\(""\)', r'"".join(\1)', code)
@ -99,7 +102,7 @@ class YouTube(VideoExtractor):
        f1def = match1(js, r'function %s(\(\w+\)\{[^\{]+\})' % re.escape(f1)) or \
                match1(js, r'\W%s=function(\(\w+\)\{[^\{]+\})' % re.escape(f1))
        f1def = re.sub(r'([$\w]+\.)([$\w]+\(\w+,\d+\))', r'\2', f1def)
-        f1def = 'function main_%s%s' % (f1, f1def)  # prefix to avoid potential namespace conflict
+        f1def = 'function %s%s' % (f1, f1def)
        code = tr_js(f1def)
        f2s = set(re.findall(r'([$\w]+)\(\w+,\d+\)', f1def))
        for f2 in f2s:
@ -112,13 +115,13 @@ class YouTube(VideoExtractor):
                f2def = 'function {}({},b){}'.format(f2e, f2def.group(1), f2def.group(2))
            f2 = re.sub(r'(as|if|in|is|or)', r'_\1', f2)
            f2 = re.sub(r'\$', '_dollar', f2)
-            code = code + 'global %s\n' % f2 + tr_js(f2def)
+            code = code + 'global _sig_%s\n' % f2 + tr_js(f2def)

        f1 = re.sub(r'(as|if|in|is|or)', r'_\1', f1)
        f1 = re.sub(r'\$', '_dollar', f1)
-        code = code + 'sig=main_%s(s)' % f1  # prefix to avoid potential namespace conflict
+        code = code + '_sig=_sig_%s(s)' % f1
        exec(code, globals(), locals())
-        return locals()['sig']
+        return locals()['_sig']

    def chunk_by_range(url, size):
        urls = []
@ -195,8 +198,9 @@ class YouTube(VideoExtractor):
        # Get video info
        # 'eurl' is a magic parameter that can bypass age restriction
        # full form: 'eurl=https%3A%2F%2Fyoutube.googleapis.com%2Fv%2F{VIDEO_ID}'
-        video_info = parse.parse_qs(get_content('https://www.youtube.com/get_video_info?video_id={}&eurl=https%3A%2F%2Fy'.format(self.vid)))
-        logging.debug('STATUS: %s' % video_info['status'][0])
+        #video_info = parse.parse_qs(get_content('https://www.youtube.com/get_video_info?video_id={}&eurl=https%3A%2F%2Fy'.format(self.vid)))
+        #logging.debug('STATUS: %s' % video_info['status'][0])
+        video_info = {'status': ['ok'], 'use_cipher_signature': 'True'}

        ytplayer_config = None
        if 'status' not in video_info:
@ -253,11 +257,16 @@ class YouTube(VideoExtractor):
            else:
                # Parse video page instead
                video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid)
-                ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+?});', video_page).group(1))

-                self.title = json.loads(ytplayer_config["args"]["player_response"])["videoDetails"]["title"]
-                self.html5player = 'https://www.youtube.com' + ytplayer_config['assets']['js']
-                stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
+                ytInitialPlayerResponse = json.loads(re.search('ytInitialPlayerResponse\s*=\s*([^\n]+?});', video_page).group(1))
+
+                self.title = ytInitialPlayerResponse["videoDetails"]["title"]
+                if re.search('([^"]*/base\.js)"', video_page):
+                    self.html5player = 'https://www.youtube.com' + re.search('([^"]*/base\.js)"', video_page).group(1)
+                else:
+                    self.html5player = None
+
+                stream_list = ytInitialPlayerResponse['streamingData']['formats']

        elif video_info['status'] == ['fail']:
            logging.debug('ERRORCODE: %s' % video_info['errorcode'][0])
--- a/src/you_get/version.py
+++ b/src/you_get/version.py
@ -1,4 +1,4 @@
 #!/usr/bin/env python

 script_name = 'you-get'
-__version__ = '0.4.1500'
+__version__ = '0.4.1545'
--- a/tests/test.py
+++ b/tests/test.py
@ -41,6 +41,7 @@ class YouGetTests(unittest.TestCase):
    #def test_acfun(self):
     #   acfun.download('https://www.acfun.cn/v/ac11701912', info_only=True)

+
    #def test_bilibili(self):
    #    bilibili.download(
    #        "https://www.bilibili.com/watchlater/#/BV1PE411q7mZ/p6", info_only=True
@ -49,11 +50,13 @@ class YouGetTests(unittest.TestCase):
     #       "https://www.bilibili.com/watchlater/#/av74906671/p6", info_only=True
     #   )

-   # def test_soundcloud(self):
+
+    #def test_soundcloud(self):
        ## single song
-    #    soundcloud.download(
-   #         'https://soundcloud.com/keiny-pham/impure-bird', info_only=True
-    #    )
+        #soundcloud.download(
+        #    'https://soundcloud.com/keiny-pham/impure-bird', info_only=True
+        #)
+
        ## playlist
        #soundcloud.download(
        #    'https://soundcloud.com/anthony-flieger/sets/cytus', info_only=True