diff --git a/.travis.yml b/.travis.yml
index 8433fe75..eedbeeb2 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,15 +4,10 @@ python:
- "3.4"
- "3.5"
- "3.6"
+ - "3.7"
+ - "3.8"
+ #- "nightly" (flake8 not working in python 3.9 yet, module 'ast' has no attribute 'AugLoad')
- "pypy3"
-matrix:
- include:
- - python: "3.7"
- dist: xenial
- - python: "3.8-dev"
- dist: xenial
- - python: "nightly"
- dist: xenial
before_install:
- pip install flake8
before_script:
diff --git a/LICENSE.txt b/LICENSE.txt
index 5964bf20..a193d8e2 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -1,6 +1,7 @@
MIT License
-Copyright (c) 2012-2019 Mort Yao
+Copyright (c) 2012-2020 Mort Yao and other contributors
+ (https://github.com/soimort/you-get/graphs/contributors)
Copyright (c) 2012 Boyu Guo
Permission is hereby granted, free of charge, to any person obtaining a copy
diff --git a/README.md b/README.md
index 0735bd8a..3429f9d8 100644
--- a/README.md
+++ b/README.md
@@ -368,15 +368,12 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
| VK | |✓|✓| |
| Vine | |✓| | |
| Vimeo | |✓| | |
-| Vidto | |✓| | |
-| Videomega | |✓| | |
| Veoh | |✓| | |
| **Tumblr** | |✓|✓|✓|
| TED | |✓| | |
| SoundCloud | | | |✓|
| SHOWROOM | |✓| | |
| Pinterest | | |✓| |
-| MusicPlayOn | |✓| | |
| MTV81 | |✓| | |
| Mixcloud | | | |✓|
| Metacafe | |✓| | |
@@ -387,7 +384,6 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
| InfoQ | |✓| | |
| Imgur | | |✓| |
| Heavy Music Archive | | | |✓|
-| **Google+** | |✓|✓| |
| Freesound | | | |✓|
| Flickr | |✓|✓| |
| FC2 Video | |✓| | |
@@ -406,10 +402,9 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
| **AcFun** | |✓| | |
| **Baidu
百度贴吧** | |✓|✓| |
| 爆米花网 | |✓| | |
-| **bilibili
哔哩哔哩** | |✓| | |
+| **bilibili
哔哩哔哩** | |✓|✓|✓|
| 豆瓣 | |✓| |✓|
| 斗鱼 | |✓| | |
-| Panda
熊猫 | |✓| | |
| 凤凰视频 | |✓| | |
| 风行网 | |✓| | |
| iQIYI
爱奇艺 | |✓| | |
@@ -441,6 +436,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
| 火猫TV | |✓| | |
| 阳光宽频网 | |✓| | |
| 西瓜视频 | |✓| | |
+| 新片场 | |✓| | |
| 快手 | |✓|✓| |
| 抖音 | |✓| | |
| TikTok | |✓| | |
diff --git a/setup.py b/setup.py
index 21246c5f..24dc9fb2 100755
--- a/setup.py
+++ b/setup.py
@@ -41,5 +41,9 @@ setup(
classifiers = proj_info['classifiers'],
- entry_points = {'console_scripts': proj_info['console_scripts']}
+ entry_points = {'console_scripts': proj_info['console_scripts']},
+
+ extras_require={
+ 'socks': ['PySocks'],
+ }
)
diff --git a/src/you_get/common.py b/src/you_get/common.py
index b8302e01..ef52a07a 100755
--- a/src/you_get/common.py
+++ b/src/you_get/common.py
@@ -66,6 +66,7 @@ SITES = {
'iwara' : 'iwara',
'joy' : 'joy',
'kankanews' : 'bilibili',
+ 'kakao' : 'kakao',
'khanacademy' : 'khan',
'ku6' : 'ku6',
'kuaishou' : 'kuaishou',
@@ -82,7 +83,6 @@ SITES = {
'missevan' : 'missevan',
'mixcloud' : 'mixcloud',
'mtv81' : 'mtv81',
- 'musicplayon' : 'musicplayon',
'miaopai' : 'yixia',
'naver' : 'naver',
'7gogo' : 'nanagogo',
@@ -106,8 +106,6 @@ SITES = {
'twimg' : 'twitter',
'twitter' : 'twitter',
'ucas' : 'ucas',
- 'videomega' : 'videomega',
- 'vidto' : 'vidto',
'vimeo' : 'vimeo',
'wanmen' : 'wanmen',
'weibo' : 'miaopai',
@@ -118,6 +116,7 @@ SITES = {
'xiaokaxiu' : 'yixia',
'xiaojiadianvideo' : 'fc2video',
'ximalaya' : 'ximalaya',
+ 'xinpianchang' : 'xinpianchang',
'yinyuetai' : 'yinyuetai',
'yizhibo' : 'yizhibo',
'youku' : 'youku',
@@ -280,15 +279,21 @@ def matchall(text, patterns):
def launch_player(player, urls):
import subprocess
import shlex
+ urls = list(urls)
+ for url in urls.copy():
+ if type(url) is list:
+ urls.extend(url)
+ urls = [url for url in urls if type(url) is str]
+ assert urls
if (sys.version_info >= (3, 3)):
import shutil
exefile=shlex.split(player)[0]
if shutil.which(exefile) is not None:
- subprocess.call(shlex.split(player) + list(urls))
+ subprocess.call(shlex.split(player) + urls)
else:
log.wtf('[Failed] Cannot find player "%s"' % exefile)
else:
- subprocess.call(shlex.split(player) + list(urls))
+ subprocess.call(shlex.split(player) + urls)
def parse_query_param(url, param):
diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py
index 2961f015..4280d236 100755
--- a/src/you_get/extractors/__init__.py
+++ b/src/you_get/extractors/__init__.py
@@ -33,7 +33,10 @@ from .interest import *
from .iqilu import *
from .iqiyi import *
from .joy import *
+from .khan import *
from .ku6 import *
+from .kakao import *
+from .kuaishou import *
from .kugou import *
from .kuwo import *
from .le import *
@@ -46,7 +49,6 @@ from .miaopai import *
from .miomio import *
from .mixcloud import *
from .mtv81 import *
-from .musicplayon import *
from .nanagogo import *
from .naver import *
from .netease import *
@@ -62,6 +64,7 @@ from .sina import *
from .sohu import *
from .soundcloud import *
from .suntv import *
+from .ted import *
from .theplatform import *
from .tiktok import *
from .tucao import *
@@ -70,20 +73,17 @@ from .tumblr import *
from .twitter import *
from .ucas import *
from .veoh import *
-from .videomega import *
from .vimeo import *
from .vine import *
from .vk import *
from .w56 import *
from .wanmen import *
from .xiami import *
+from .xinpianchang import *
from .yinyuetai import *
from .yixia import *
from .youku import *
from .youtube import *
-from .ted import *
-from .khan import *
from .zhanqi import *
-from .kuaishou import *
from .zhibo import *
-from .zhihu import *
+from .zhihu import *
\ No newline at end of file
diff --git a/src/you_get/extractors/acfun.py b/src/you_get/extractors/acfun.py
index 52fcb4f9..6bf5964a 100644
--- a/src/you_get/extractors/acfun.py
+++ b/src/you_get/extractors/acfun.py
@@ -121,9 +121,17 @@ def acfun_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
video_list = json_data.get('videoList')
if len(video_list) > 1:
title += " - " + [p.get('title') for p in video_list if p.get('id') == vid][0]
-
- m3u8_url = json_data.get('currentVideoInfo')['playInfos'][0]['playUrls'][0]
-
+ currentVideoInfo = json_data.get('currentVideoInfo')
+ if 'playInfos' in currentVideoInfo:
+ m3u8_url = currentVideoInfo['playInfos'][0]['playUrls'][0]
+ elif 'ksPlayJson' in currentVideoInfo:
+ ksPlayJson = json.loads( currentVideoInfo['ksPlayJson'] )
+ representation = ksPlayJson.get('adaptationSet').get('representation')
+ reps = []
+ for one in representation:
+ reps.append( (one['width']* one['height'], one['url'], one['backupUrl']) )
+ m3u8_url = max(reps)[1]
+
elif re.match("https?://[^\.]*\.*acfun\.[^\.]+/bangumi/ab(\d+)", url):
html = get_content(url, headers=fake_headers)
tag_script = match1(html, r'')
diff --git a/src/you_get/extractors/baidu.py b/src/you_get/extractors/baidu.py
index 7914667e..521d5e99 100644
--- a/src/you_get/extractors/baidu.py
+++ b/src/you_get/extractors/baidu.py
@@ -112,15 +112,15 @@ def baidu_download(url, output_dir='.', stream_type=None, merge=True, info_only=
time.sleep(5)
download_urls([real_url], title, ext, size,
output_dir, url, merge=merge, faker=True)
- elif re.match(r'http://music.baidu.com/album/\d+', url):
- id = r1(r'http://music.baidu.com/album/(\d+)', url)
+ elif re.match(r'https?://music.baidu.com/album/\d+', url):
+ id = r1(r'https?://music.baidu.com/album/(\d+)', url)
baidu_download_album(id, output_dir, merge, info_only)
- elif re.match('http://music.baidu.com/song/\d+', url):
- id = r1(r'http://music.baidu.com/song/(\d+)', url)
+ elif re.match('https?://music.baidu.com/song/\d+', url):
+ id = r1(r'https?://music.baidu.com/song/(\d+)', url)
baidu_download_song(id, output_dir, merge, info_only)
- elif re.match('http://tieba.baidu.com/', url):
+ elif re.match('https?://tieba.baidu.com/', url):
try:
# embedded videos
embed_download(url, output_dir, merge=merge, info_only=info_only, **kwargs)
@@ -140,8 +140,8 @@ def baidu_download(url, output_dir='.', stream_type=None, merge=True, info_only=
output_dir=output_dir, merge=False)
items = re.findall(
- r'//imgsrc.baidu.com/forum/w[^"]+/([^/"]+)', html)
- urls = ['http://imgsrc.baidu.com/forum/pic/item/' + i
+ r'//tiebapic.baidu.com/forum/w[^"]+/([^/"]+)', html)
+ urls = ['http://tiebapic.baidu.com/forum/pic/item/' + i
for i in set(items)]
# handle albums
@@ -151,7 +151,7 @@ def baidu_download(url, output_dir='.', stream_type=None, merge=True, info_only=
album_info = json.loads(get_content(album_url))
for i in album_info['data']['pic_list']:
urls.append(
- 'http://imgsrc.baidu.com/forum/pic/item/' + i['pic_id'] + '.jpg')
+ 'http://tiebapic.baidu.com/forum/pic/item/' + i['pic_id'] + '.jpg')
ext = 'jpg'
size = float('Inf')
diff --git a/src/you_get/extractors/baomihua.py b/src/you_get/extractors/baomihua.py
index 99dd7132..9e97879a 100644
--- a/src/you_get/extractors/baomihua.py
+++ b/src/you_get/extractors/baomihua.py
@@ -6,6 +6,16 @@ from ..common import *
import urllib
+def baomihua_headers(referer=None, cookie=None):
+ # a reasonable UA
+ ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'
+ headers = {'Accept': '*/*', 'Accept-Language': 'en-US,en;q=0.5', 'User-Agent': ua}
+ if referer is not None:
+ headers.update({'Referer': referer})
+ if cookie is not None:
+ headers.update({'Cookie': cookie})
+ return headers
+
def baomihua_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_html('http://play.baomihua.com/getvideourl.aspx?flvid=%s&devicetype=phone_app' % id)
host = r1(r'host=([^&]*)', html)
@@ -16,10 +26,10 @@ def baomihua_download_by_id(id, title=None, output_dir='.', merge=True, info_onl
assert vid
dir_str = r1(r'&dir=([^&]*)', html).strip()
url = "http://%s/%s/%s.%s" % (host, dir_str, vid, type)
- _, ext, size = url_info(url)
+ _, ext, size = url_info(url, headers=baomihua_headers())
print_info(site_info, title, type, size)
if not info_only:
- download_urls([url], title, ext, size, output_dir, merge = merge)
+ download_urls([url], title, ext, size, output_dir, merge = merge, headers=baomihua_headers())
def baomihua_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_html(url)
diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py
index 5dd55fc2..e1c447e9 100644
--- a/src/you_get/extractors/bilibili.py
+++ b/src/you_get/extractors/bilibili.py
@@ -28,18 +28,22 @@ class Bilibili(VideoExtractor):
'container': 'FLV', 'video_resolution': '360p', 'desc': '流畅 360P'},
# 'quality': 15?
{'id': 'mp4', 'quality': 0},
+
+ {'id': 'jpg', 'quality': 0},
]
@staticmethod
- def height_to_quality(height):
- if height <= 360:
+ def height_to_quality(height, qn):
+ if height <= 360 and qn <= 16:
return 16
- elif height <= 480:
+ elif height <= 480 and qn <= 32:
return 32
- elif height <= 720:
+ elif height <= 720 and qn <= 64:
return 64
- else:
+ elif height <= 1080 and qn <= 80:
return 80
+ else:
+ return 112
@staticmethod
def bilibili_headers(referer=None, cookie=None):
@@ -102,8 +106,8 @@ class Bilibili(VideoExtractor):
return 'https://api.bilibili.com/x/space/channel/video?mid=%s&cid=%s&pn=%s&ps=%s&order=0&jsonp=jsonp' % (mid, cid, pn, ps)
@staticmethod
- def bilibili_space_favlist_api(vmid, fid, pn=1, ps=100):
- return 'https://api.bilibili.com/x/space/fav/arc?vmid=%s&fid=%s&pn=%s&ps=%s&order=0&jsonp=jsonp' % (vmid, fid, pn, ps)
+ def bilibili_space_favlist_api(fid, pn=1, ps=20):
+ return 'https://api.bilibili.com/x/v3/fav/resource/list?media_id=%s&pn=%s&ps=%s&order=mtime&type=0&tid=0&jsonp=jsonp' % (fid, pn, ps)
@staticmethod
def bilibili_space_video_api(mid, pn=1, ps=100):
@@ -113,6 +117,10 @@ class Bilibili(VideoExtractor):
def bilibili_vc_api(video_id):
return 'https://api.vc.bilibili.com/clip/v1/video/detail?video_id=%s' % video_id
+ @staticmethod
+ def bilibili_h_api(doc_id):
+ return 'https://api.vc.bilibili.com/link_draw/v1/doc/detail?doc_id=%s' % doc_id
+
@staticmethod
def url_size(url, faker=False, headers={},err_value=0):
try:
@@ -131,10 +139,10 @@ class Bilibili(VideoExtractor):
# r'
')
title = title.strip() if title else vid
@@ -152,11 +156,11 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
title = vid
elif 'view.inews.qq.com' in url:
# view.inews.qq.com/a/20180521V0Z9MH00
- content = get_content(url)
+ content = get_content(url, headers)
vid = match1(content, r'"vid":"(\w+)"')
title = match1(content, r'"title":"(\w+)"')
else:
- content = get_content(url)
+ content = get_content(url, headers)
#vid = parse_qs(urlparse(url).query).get('vid') #for links specified vid like http://v.qq.com/cover/p/ps6mnfqyrfo7es3.html?vid=q0181hpdvo5
rurl = match1(content, r'') #https://v.qq.com/x/cover/9hpjiv5fhiyn86u/t0522x58xma.html
vid = ""
diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py
index a1afc126..74374202 100644
--- a/src/you_get/extractors/sohu.py
+++ b/src/you_get/extractors/sohu.py
@@ -26,7 +26,7 @@ def sohu_download(url, output_dir='.', merge=True, info_only=False, extractor_pr
vid = r1('id=(\d+)', url)
else:
html = get_html(url)
- vid = r1(r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?', html)
+ vid = r1(r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?', html) or r1(r'bid:\'(\d+)\',', html) or r1(r'bid=(\d+)', html)
assert vid
if extractor_proxy:
diff --git a/src/you_get/extractors/videomega.py b/src/you_get/extractors/videomega.py
deleted file mode 100644
index 34fb5205..00000000
--- a/src/you_get/extractors/videomega.py
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/usr/bin/env python
-
-__all__ = ['videomega_download']
-
-from ..common import *
-import ssl
-
-def videomega_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
- # Hot-plug cookie handler
- ssl_context = request.HTTPSHandler(
- context=ssl.SSLContext(ssl.PROTOCOL_TLSv1))
- cookie_handler = request.HTTPCookieProcessor()
- opener = request.build_opener(ssl_context, cookie_handler)
- opener.addheaders = [('Referer', url),
- ('Cookie', 'noadvtday=0')]
- request.install_opener(opener)
-
- if re.search(r'view\.php', url):
- php_url = url
- else:
- content = get_content(url)
- m = re.search(r'ref="([^"]*)";\s*width="([^"]*)";\s*height="([^"]*)"', content)
- ref = m.group(1)
- width, height = m.group(2), m.group(3)
- php_url = 'http://videomega.tv/view.php?ref=%s&width=%s&height=%s' % (ref, width, height)
- content = get_content(php_url)
-
- title = match1(content, r'(.*)')
- js = match1(content, r'(eval.*)')
- t = match1(js, r'\$\("\w+"\)\.\w+\("\w+","([^"]+)"\)')
- t = re.sub(r'(\w)', r'{\1}', t)
- t = t.translate({87 + i: str(i) for i in range(10, 36)})
- s = match1(js, r"'([^']+)'\.split").split('|')
- src = t.format(*s)
-
- type, ext, size = url_info(src, faker=True)
-
- print_info(site_info, title, type, size)
- if not info_only:
- download_urls([src], title, ext, size, output_dir, merge=merge, faker=True)
-
-site_info = "Videomega.tv"
-download = videomega_download
-download_playlist = playlist_not_supported('videomega')
diff --git a/src/you_get/extractors/vidto.py b/src/you_get/extractors/vidto.py
deleted file mode 100644
index c4e3b87e..00000000
--- a/src/you_get/extractors/vidto.py
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/usr/bin/env python
-
-__all__ = ['vidto_download']
-
-from ..common import *
-import pdb
-import time
-
-
-def vidto_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
- html = get_content(url)
- params = {}
- r = re.findall(
- r'type="(?:hidden|submit)?"(?:.*?)name="(.+?)"\s* value="?(.+?)">', html)
- for name, value in r:
- params[name] = value
- data = parse.urlencode(params).encode('utf-8')
- req = request.Request(url)
- print("Please wait for 6 seconds...")
- time.sleep(6)
- print("Starting")
- new_html = request.urlopen(req, data).read().decode('utf-8', 'replace')
- new_stff = re.search('lnk_download" href="(.*?)">', new_html)
- if(new_stff):
- url = new_stff.group(1)
- title = params['fname']
- type = ""
- ext = ""
- a, b, size = url_info(url)
- print_info(site_info, title, type, size)
- if not info_only:
- download_urls([url], title, ext, size, output_dir, merge=merge)
- else:
- print("cannot find link, please review")
- pdb.set_trace()
-
-
-site_info = "vidto.me"
-download = vidto_download
-download_playlist = playlist_not_supported('vidto')
diff --git a/src/you_get/extractors/xinpianchang.py b/src/you_get/extractors/xinpianchang.py
new file mode 100644
index 00000000..fac3d01f
--- /dev/null
+++ b/src/you_get/extractors/xinpianchang.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python
+
+import re
+import json
+from ..extractor import VideoExtractor
+from ..common import get_content, playlist_not_supported
+
+
+class Xinpianchang(VideoExtractor):
+ name = 'xinpianchang'
+ stream_types = [
+ {'id': '4K', 'quality': '超清 4K', 'video_profile': 'mp4-4K'},
+ {'id': '2K', 'quality': '超清 2K', 'video_profile': 'mp4-2K'},
+ {'id': '1080', 'quality': '高清 1080P', 'video_profile': 'mp4-FHD'},
+ {'id': '720', 'quality': '高清 720P', 'video_profile': 'mp4-HD'},
+ {'id': '540', 'quality': '清晰 540P', 'video_profile': 'mp4-SD'},
+ {'id': '360', 'quality': '流畅 360P', 'video_profile': 'mp4-LD'}
+ ]
+
+ def prepare(self, **kwargs):
+ # find key
+ page_content = get_content(self.url)
+ match_rule = r"vid: \"(.+?)\","
+ key = re.findall(match_rule, page_content)[0]
+
+ # get videos info
+ video_url = 'https://openapi-vtom.vmovier.com/v3/video/' + key + '?expand=resource'
+ data = json.loads(get_content(video_url))
+ self.title = data["data"]["video"]["title"]
+ video_info = data["data"]["resource"]["progressive"]
+
+ # set streams dict
+ for video in video_info:
+ url = video["https_url"]
+ size = video["filesize"]
+ profile = video["profile_code"]
+ stype = [st for st in self.__class__.stream_types if st['video_profile'] == profile][0]
+
+ stream_data = dict(src=[url], size=size, container='mp4', quality=stype['quality'])
+ self.streams[stype['id']] = stream_data
+
+
+download = Xinpianchang().download_by_url
+download_playlist = playlist_not_supported('xinpianchang')
diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py
index 4483f8eb..38aa1a4e 100644
--- a/src/you_get/extractors/youtube.py
+++ b/src/you_get/extractors/youtube.py
@@ -94,7 +94,8 @@ class YouTube(VideoExtractor):
f1 = match1(js, r'\.set\(\w+\.sp,encodeURIComponent\(([$\w]+)') or \
match1(js, r'\.set\(\w+\.sp,\(0,window\.encodeURIComponent\)\(([$\w]+)') or \
match1(js, r'\.set\(\w+\.sp,([$\w]+)\(\w+\.s\)\)') or \
- match1(js, r'"signature",([$\w]+)\(\w+\.\w+\)')
+ match1(js, r'"signature",([$\w]+)\(\w+\.\w+\)') or \
+ match1(js, r'=([$\w]+)\(decodeURIComponent\(')
f1def = match1(js, r'function %s(\(\w+\)\{[^\{]+\})' % re.escape(f1)) or \
match1(js, r'\W%s=function(\(\w+\)\{[^\{]+\})' % re.escape(f1))
f1def = re.sub(r'([$\w]+\.)([$\w]+\(\w+,\d+\))', r'\2', f1def)
@@ -217,10 +218,16 @@ class YouTube(VideoExtractor):
ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+?});', video_page).group(1))
self.html5player = 'https://www.youtube.com' + ytplayer_config['assets']['js']
# Workaround: get_video_info returns bad s. Why?
- stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
+ if 'url_encoded_fmt_stream_map' not in ytplayer_config['args']:
+ stream_list = json.loads(ytplayer_config['args']['player_response'])['streamingData']['formats']
+ else:
+ stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
#stream_list = ytplayer_config['args']['adaptive_fmts'].split(',')
except:
- stream_list = video_info['url_encoded_fmt_stream_map'][0].split(',')
+ if 'url_encoded_fmt_stream_map' not in video_info:
+ stream_list = json.loads(video_info['player_response'][0])['streamingData']['formats']
+ else:
+ stream_list = video_info['url_encoded_fmt_stream_map'][0].split(',')
if re.search('([^"]*/base\.js)"', video_page):
self.html5player = 'https://www.youtube.com' + re.search('([^"]*/base\.js)"', video_page).group(1)
else:
@@ -302,19 +309,35 @@ class YouTube(VideoExtractor):
exit(0)
for stream in stream_list:
- metadata = parse.parse_qs(stream)
- stream_itag = metadata['itag'][0]
- self.streams[stream_itag] = {
- 'itag': metadata['itag'][0],
- 'url': metadata['url'][0],
- 'sig': metadata['sig'][0] if 'sig' in metadata else None,
- 's': metadata['s'][0] if 's' in metadata else None,
- 'quality': metadata['quality'][0] if 'quality' in metadata else None,
- #'quality': metadata['quality_label'][0] if 'quality_label' in metadata else None,
- 'type': metadata['type'][0],
- 'mime': metadata['type'][0].split(';')[0],
- 'container': mime_to_container(metadata['type'][0].split(';')[0]),
- }
+ if isinstance(stream, str):
+ metadata = parse.parse_qs(stream)
+ stream_itag = metadata['itag'][0]
+ self.streams[stream_itag] = {
+ 'itag': metadata['itag'][0],
+ 'url': metadata['url'][0],
+ 'sig': metadata['sig'][0] if 'sig' in metadata else None,
+ 's': metadata['s'][0] if 's' in metadata else None,
+ 'quality': metadata['quality'][0] if 'quality' in metadata else None,
+ #'quality': metadata['quality_label'][0] if 'quality_label' in metadata else None,
+ 'type': metadata['type'][0],
+ 'mime': metadata['type'][0].split(';')[0],
+ 'container': mime_to_container(metadata['type'][0].split(';')[0]),
+ }
+ else:
+ stream_itag = str(stream['itag'])
+ self.streams[stream_itag] = {
+ 'itag': str(stream['itag']),
+ 'url': stream['url'] if 'url' in stream else None,
+ 'sig': None,
+ 's': None,
+ 'quality': stream['quality'],
+ 'type': stream['mimeType'],
+ 'mime': stream['mimeType'].split(';')[0],
+ 'container': mime_to_container(stream['mimeType'].split(';')[0]),
+ }
+ if 'cipher' in stream:
+ self.streams[stream_itag].update(dict([(_.split('=')[0], parse.unquote(_.split('=')[1]))
+ for _ in stream['cipher'].split('&')]))
# Prepare caption tracks
try:
@@ -347,7 +370,7 @@ class YouTube(VideoExtractor):
self.caption_tracks[lang] = srt
except: pass
- # Prepare DASH streams
+ # Prepare DASH streams (NOTE: not every video has DASH streams!)
try:
dashmpd = ytplayer_config['args']['dashmpd']
dash_xml = parseString(get_content(dashmpd))
@@ -425,10 +448,43 @@ class YouTube(VideoExtractor):
for i in afmt.split('&')])
for afmt in ytplayer_config['args']['adaptive_fmts'].split(',')]
except:
- streams = [dict([(i.split('=')[0],
- parse.unquote(i.split('=')[1]))
- for i in afmt.split('&')])
- for afmt in video_info['adaptive_fmts'][0].split(',')]
+ if 'adaptive_fmts' in video_info:
+ streams = [dict([(i.split('=')[0],
+ parse.unquote(i.split('=')[1]))
+ for i in afmt.split('&')])
+ for afmt in video_info['adaptive_fmts'][0].split(',')]
+ else:
+ try:
+ streams = json.loads(video_info['player_response'][0])['streamingData']['adaptiveFormats']
+ except: # no DASH stream at all
+ return
+ # streams without contentLength got broken urls, just remove them (#2767)
+ streams = [stream for stream in streams if 'contentLength' in stream]
+ for stream in streams:
+ stream['itag'] = str(stream['itag'])
+ if 'qualityLabel' in stream:
+ stream['quality_label'] = stream['qualityLabel']
+ del stream['qualityLabel']
+ if 'width' in stream:
+ stream['size'] = '{}x{}'.format(stream['width'], stream['height'])
+ del stream['width']
+ del stream['height']
+ stream['type'] = stream['mimeType']
+ stream['clen'] = stream['contentLength']
+ stream['init'] = '{}-{}'.format(
+ stream['initRange']['start'],
+ stream['initRange']['end'])
+ stream['index'] = '{}-{}'.format(
+ stream['indexRange']['start'],
+ stream['indexRange']['end'])
+ del stream['mimeType']
+ del stream['contentLength']
+ del stream['initRange']
+ del stream['indexRange']
+ if 'cipher' in stream:
+ stream.update(dict([(_.split('=')[0], parse.unquote(_.split('=')[1]))
+ for _ in stream['cipher'].split('&')]))
+ del stream['cipher']
for stream in streams: # get over speed limiting
stream['url'] += '&ratebypass=yes'
diff --git a/src/you_get/json_output.py b/src/you_get/json_output.py
index 5971bd93..c6195761 100644
--- a/src/you_get/json_output.py
+++ b/src/you_get/json_output.py
@@ -29,7 +29,7 @@ def output(video_extractor, pretty_print=True):
if extra:
out["extra"] = extra
if pretty_print:
- print(json.dumps(out, indent=4, sort_keys=True, ensure_ascii=False))
+ print(json.dumps(out, indent=4, ensure_ascii=False))
else:
print(json.dumps(out))
diff --git a/src/you_get/util/log.py b/src/you_get/util/log.py
index 67b26b78..81fd1bf5 100644
--- a/src/you_get/util/log.py
+++ b/src/you_get/util/log.py
@@ -99,6 +99,4 @@ def wtf(message, exit_code=1):
def yes_or_no(message):
ans = str(input('%s (y/N) ' % message)).lower().strip()
- if ans == 'y':
- return True
- return False
+ return ans == 'y'
diff --git a/src/you_get/version.py b/src/you_get/version.py
index 1d87177c..d5004187 100644
--- a/src/you_get/version.py
+++ b/src/you_get/version.py
@@ -1,4 +1,4 @@
#!/usr/bin/env python
script_name = 'you-get'
-__version__ = '0.4.1355'
+__version__ = '0.4.1432'
diff --git a/tests/test.py b/tests/test.py
index e2f77a79..6fd3db6c 100644
--- a/tests/test.py
+++ b/tests/test.py
@@ -6,14 +6,15 @@ from you_get.extractors import (
imgur,
magisto,
youtube,
- missevan
+ missevan,
+ acfun,
+ bilibili
)
class YouGetTests(unittest.TestCase):
def test_imgur(self):
imgur.download('http://imgur.com/WVLk5nD', info_only=True)
- imgur.download('http://imgur.com/gallery/WVLk5nD', info_only=True)
def test_magisto(self):
magisto.download(
@@ -21,13 +22,6 @@ class YouGetTests(unittest.TestCase):
info_only=True
)
- def test_missevan(self):
- missevan.download('https://m.missevan.com/sound/1285995', info_only=True)
- missevan.download_playlist(
- 'https://www.missevan.com/mdrama/drama/24130', info_only=True)
- missevan.download_playlist(
- 'https://www.missevan.com/albuminfo/203090', info_only=True)
-
def test_youtube(self):
youtube.download(
'http://www.youtube.com/watch?v=pzKerr0JIPA', info_only=True
@@ -37,7 +31,19 @@ class YouGetTests(unittest.TestCase):
'http://www.youtube.com/attribution_link?u=/watch?v%3DldAKIzq7bvs%26feature%3Dshare', # noqa
info_only=True
)
+ youtube.download(
+ 'https://www.youtube.com/watch?v=Fpr4fQSh1cc', info_only=True
+ )
+ def test_acfun(self):
+ acfun.download('https://www.acfun.cn/v/ac11701912', info_only=True)
+ def test_bilibil(self):
+ bilibili.download(
+ "https://www.bilibili.com/watchlater/#/BV1PE411q7mZ/p6", info_only=True
+ )
+ bilibili.download(
+ "https://www.bilibili.com/watchlater/#/av74906671/p6", info_only=True
+ )
if __name__ == '__main__':
unittest.main()
diff --git a/you-get.json b/you-get.json
index 56f8212a..e98e2e8a 100644
--- a/you-get.json
+++ b/you-get.json
@@ -18,14 +18,13 @@
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3 :: Only",
- "Programming Language :: Python :: 3.0",
- "Programming Language :: Python :: 3.1",
"Programming Language :: Python :: 3.2",
"Programming Language :: Python :: 3.3",
"Programming Language :: Python :: 3.4",
"Programming Language :: Python :: 3.5",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
+ "Programming Language :: Python :: 3.8",
"Topic :: Internet",
"Topic :: Internet :: WWW/HTTP",
"Topic :: Multimedia",