From 93fc74db2af9a6af02b574090e84c1c402099f50 Mon Sep 17 00:00:00 2001
From: cage <120989324@qq.com>
Date: Mon, 28 Oct 2019 01:39:55 +0800
Subject: [PATCH 01/51] Update bilibili.py
download multipart video form given P number
---
src/you_get/extractors/bilibili.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py
index e5ddbafc..d0bbace3 100644
--- a/src/you_get/extractors/bilibili.py
+++ b/src/you_get/extractors/bilibili.py
@@ -624,7 +624,8 @@ class Bilibili(VideoExtractor):
html_content_ = get_content(self.url, headers=self.bilibili_headers(cookie='CURRENT_FNVAL=16'))
playinfo_text_ = match1(html_content_, r'__playinfo__=(.*?)')
diff --git a/tests/test.py b/tests/test.py
index e2f77a79..16741722 100644
--- a/tests/test.py
+++ b/tests/test.py
@@ -6,7 +6,8 @@ from you_get.extractors import (
imgur,
magisto,
youtube,
- missevan
+ missevan,
+ acfun
)
@@ -38,6 +39,8 @@ class YouGetTests(unittest.TestCase):
info_only=True
)
+ def test_acfun(self):
+ acfun.download('https://www.acfun.cn/v/ac11701912', info_only=True)
if __name__ == '__main__':
unittest.main()
From 867b66effc3554273efcb3e756ae5e388a1caa5c Mon Sep 17 00:00:00 2001
From: SFMDI <36741818+SFMDI@users.noreply.github.com>
Date: Sat, 23 Nov 2019 03:05:21 +0900
Subject: [PATCH 06/51] fix extractor naver.py
can download both old and recent videos without wrong result
---
src/you_get/extractors/naver.py | 13 +++----------
1 file changed, 3 insertions(+), 10 deletions(-)
diff --git a/src/you_get/extractors/naver.py b/src/you_get/extractors/naver.py
index add884e9..42a607e4 100644
--- a/src/you_get/extractors/naver.py
+++ b/src/you_get/extractors/naver.py
@@ -16,15 +16,8 @@ def naver_download_by_url(url, output_dir='.', merge=True, info_only=False, **kw
ep = 'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{}?key={}'
page = get_content(url)
try:
- temp = re.search(r"", page)
- if temp is not None:
- og_video_url = temp.group(1)
- params_dict = urllib.parse.parse_qs(urllib.parse.urlparse(og_video_url).query)
- vid = params_dict['vid'][0]
- key = params_dict['outKey'][0]
- else:
- vid = re.search(r"\"videoId\"\s*:\s*\"(.+?)\"", page).group(1)
- key = re.search(r"\"inKey\"\s*:\s*\"(.+?)\"", page).group(1)
+ vid = re.search(r"\"videoId\"\s*:\s*\"(.+?)\"", page).group(1)
+ key = re.search(r"\"inKey\"\s*:\s*\"(.+?)\"", page).group(1)
meta_str = get_content(ep.format(vid, key))
meta_json = json.loads(meta_str)
if 'errorCode' in meta_json:
@@ -38,7 +31,7 @@ def naver_download_by_url(url, output_dir='.', merge=True, info_only=False, **kw
size = url_size(video_url)
print_info(site_info, title, 'mp4', size)
if not info_only:
- download_urls([video_url], title, 'mp4', size, **kwargs)
+ download_urls([video_url], title, 'mp4', size, output_dir, **kwargs)
except:
universal_download(url, output_dir, merge=merge, info_only=info_only, **kwargs)
From 6c4fbd0651ad0b002864cf5f33ed3b0f28d59a53 Mon Sep 17 00:00:00 2001
From: SFMDI <36741818+SFMDI@users.noreply.github.com>
Date: Sat, 23 Nov 2019 03:53:32 +0900
Subject: [PATCH 07/51] add tv.kakao.com extractor
can download video from tv.kakao.com
and sort __init__ by extarctor name
---
src/you_get/common.py | 1 +
src/you_get/extractors/__init__.py | 7 +++--
src/you_get/extractors/kakao.py | 50 ++++++++++++++++++++++++++++++
3 files changed, 55 insertions(+), 3 deletions(-)
create mode 100644 src/you_get/extractors/kakao.py
diff --git a/src/you_get/common.py b/src/you_get/common.py
index 2397a0a6..19474a75 100755
--- a/src/you_get/common.py
+++ b/src/you_get/common.py
@@ -66,6 +66,7 @@ SITES = {
'iwara' : 'iwara',
'joy' : 'joy',
'kankanews' : 'bilibili',
+ 'kakao' : 'kakao',
'khanacademy' : 'khan',
'ku6' : 'ku6',
'kuaishou' : 'kuaishou',
diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py
index 2961f015..5ed5264b 100755
--- a/src/you_get/extractors/__init__.py
+++ b/src/you_get/extractors/__init__.py
@@ -33,7 +33,10 @@ from .interest import *
from .iqilu import *
from .iqiyi import *
from .joy import *
+from .khan import *
from .ku6 import *
+from .kakao import *
+from .kuaishou import *
from .kugou import *
from .kuwo import *
from .le import *
@@ -62,6 +65,7 @@ from .sina import *
from .sohu import *
from .soundcloud import *
from .suntv import *
+from .ted import *
from .theplatform import *
from .tiktok import *
from .tucao import *
@@ -81,9 +85,6 @@ from .yinyuetai import *
from .yixia import *
from .youku import *
from .youtube import *
-from .ted import *
-from .khan import *
from .zhanqi import *
-from .kuaishou import *
from .zhibo import *
from .zhihu import *
diff --git a/src/you_get/extractors/kakao.py b/src/you_get/extractors/kakao.py
new file mode 100644
index 00000000..4ec282e3
--- /dev/null
+++ b/src/you_get/extractors/kakao.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+
+from ..common import *
+from .universal import *
+
+__all__ = ['kakao_download']
+
+
+def kakao_download(url, output_dir='.', info_only=False, **kwargs):
+ json_request_url = 'https://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json?vid={}'
+
+ # in this implementation playlist not supported so use url_without_playlist
+ # if want to support playlist need to change that
+ if re.search('playlistId', url):
+ url = re.search(r"(.+)\?.+?", url).group(1)
+
+ page = get_content(url)
+ try:
+ vid = re.search(r"", page).group(1)
+ title = re.search(r"", page).group(1)
+
+ meta_str = get_content(json_request_url.format(vid))
+ meta_json = json.loads(meta_str)
+
+ standard_preset = meta_json['output_list']['standard_preset']
+ output_videos = meta_json['output_list']['output_list']
+ size = ''
+ if meta_json['svcname'] == 'smr_pip':
+ for v in output_videos:
+ if v['preset'] == 'mp4_PIP_SMR_480P':
+ size = int(v['filesize'])
+ break
+ else:
+ for v in output_videos:
+ if v['preset'] == standard_preset:
+ size = int(v['filesize'])
+ break
+
+ video_url = meta_json['location']['url']
+
+ print_info(site_info, title, 'mp4', size)
+ if not info_only:
+ download_urls([video_url], title, 'mp4', size, output_dir, **kwargs)
+ except:
+ universal_download(url, output_dir, merge=kwargs['merge'], info_only=info_only, **kwargs)
+
+
+site_info = "tv.kakao.com"
+download = kakao_download
+download_playlist = playlist_not_supported('kakao')
From e9d5cc0232df0e1593d60aed7ae256c048178e0f Mon Sep 17 00:00:00 2001
From: Jerry
Date: Sun, 1 Dec 2019 23:49:34 +0800
Subject: [PATCH 08/51] Fix playback of VideoExtractor.dash_streams
---
src/you_get/common.py | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/src/you_get/common.py b/src/you_get/common.py
index 2397a0a6..65063647 100755
--- a/src/you_get/common.py
+++ b/src/you_get/common.py
@@ -272,15 +272,21 @@ def matchall(text, patterns):
def launch_player(player, urls):
import subprocess
import shlex
+ urls = list(urls)
+ for url in urls.copy():
+ if type(url) is list:
+ urls.extend(url)
+ urls = [url for url in urls if type(url) is str]
+ assert urls
if (sys.version_info >= (3, 3)):
import shutil
exefile=shlex.split(player)[0]
if shutil.which(exefile) is not None:
- subprocess.call(shlex.split(player) + list(urls))
+ subprocess.call(shlex.split(player) + urls)
else:
log.wtf('[Failed] Cannot find player "%s"' % exefile)
else:
- subprocess.call(shlex.split(player) + list(urls))
+ subprocess.call(shlex.split(player) + urls)
def parse_query_param(url, param):
From 44698a0f39d6b6c5e5fd1e5a8efffe8de3278519 Mon Sep 17 00:00:00 2001
From: out001a <545827465@qq.com>
Date: Sun, 15 Dec 2019 12:29:20 +0800
Subject: [PATCH 09/51] [ixigua] fix error 'video_id not found'
---
src/you_get/extractors/ixigua.py | 20 +++++++++++++++++++-
1 file changed, 19 insertions(+), 1 deletion(-)
diff --git a/src/you_get/extractors/ixigua.py b/src/you_get/extractors/ixigua.py
index 20e45616..151107a6 100644
--- a/src/you_get/extractors/ixigua.py
+++ b/src/you_get/extractors/ixigua.py
@@ -5,6 +5,8 @@ import binascii
from ..common import *
import random
+import requests
+import string
import ctypes
from json import loads
@@ -80,7 +82,23 @@ def get_video_url_from_video_id(video_id):
def ixigua_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
# example url: https://www.ixigua.com/i6631065141750268420/#mid=63024814422
- html = get_html(url, faker=True)
+ sess = requests.session()
+ html = sess.get(url, headers=headers).text
+ conf = loads(match1(html, r"window\.config = (.+);"))
+ if not conf:
+ log.e("Get window.config from url failed, url: {}".format(url))
+ return
+ verify_url = conf['prefix'] + conf['url'] + '?key=' + conf['key'] + '&psm=' + conf['psm'] \
+ + '&_signature=' + ''.join(random.sample(string.ascii_letters + string.digits, 31))
+ try:
+ ok = get_content(verify_url)
+ except Exception as e:
+ ok = e.msg
+ if ok != 'OK':
+ log.e("Verify failed, verify_url: {}, result: {}".format(verify_url, ok))
+ return
+ html = sess.get(url, headers=headers).text
+
video_id = match1(html, r"\"vid\":\"([^\"]+)")
title = match1(html, r"\"player__videoTitle\">.*?(.*)<\/h1><\/div>")
if not video_id:
From a54a9b36260f84bcd6f7fda3017a54dc9b5330fd Mon Sep 17 00:00:00 2001
From: Mort Yao
Date: Wed, 25 Dec 2019 02:47:29 +0100
Subject: [PATCH 10/51] [baidu] fix tiebapic
---
src/you_get/extractors/baidu.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/you_get/extractors/baidu.py b/src/you_get/extractors/baidu.py
index 7914667e..77e666b3 100644
--- a/src/you_get/extractors/baidu.py
+++ b/src/you_get/extractors/baidu.py
@@ -140,8 +140,8 @@ def baidu_download(url, output_dir='.', stream_type=None, merge=True, info_only=
output_dir=output_dir, merge=False)
items = re.findall(
- r'//imgsrc.baidu.com/forum/w[^"]+/([^/"]+)', html)
- urls = ['http://imgsrc.baidu.com/forum/pic/item/' + i
+ r'//tiebapic.baidu.com/forum/w[^"]+/([^/"]+)', html)
+ urls = ['http://tiebapic.baidu.com/forum/pic/item/' + i
for i in set(items)]
# handle albums
@@ -151,7 +151,7 @@ def baidu_download(url, output_dir='.', stream_type=None, merge=True, info_only=
album_info = json.loads(get_content(album_url))
for i in album_info['data']['pic_list']:
urls.append(
- 'http://imgsrc.baidu.com/forum/pic/item/' + i['pic_id'] + '.jpg')
+ 'http://tiebapic.baidu.com/forum/pic/item/' + i['pic_id'] + '.jpg')
ext = 'jpg'
size = float('Inf')
From 654371e851d0c8b2f403ee81d29e41d3bed52e2b Mon Sep 17 00:00:00 2001
From: chonpsk
Date: Fri, 27 Dec 2019 22:55:51 +0800
Subject: [PATCH 11/51] fix issue about KeyError: 'url_encoded_fmt_stream_map'
---
src/you_get/extractors/youtube.py | 82 ++++++++++++++++++++++++-------
1 file changed, 64 insertions(+), 18 deletions(-)
diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py
index 4483f8eb..3606a3c6 100644
--- a/src/you_get/extractors/youtube.py
+++ b/src/you_get/extractors/youtube.py
@@ -220,7 +220,10 @@ class YouTube(VideoExtractor):
stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
#stream_list = ytplayer_config['args']['adaptive_fmts'].split(',')
except:
- stream_list = video_info['url_encoded_fmt_stream_map'][0].split(',')
+ if 'url_encoded_fmt_stream_map' not in video_info:
+ stream_list = json.loads(video_info['player_response'][0])['streamingData']['formats']
+ else:
+ stream_list = video_info['url_encoded_fmt_stream_map'][0].split(',')
if re.search('([^"]*/base\.js)"', video_page):
self.html5player = 'https://www.youtube.com' + re.search('([^"]*/base\.js)"', video_page).group(1)
else:
@@ -302,19 +305,35 @@ class YouTube(VideoExtractor):
exit(0)
for stream in stream_list:
- metadata = parse.parse_qs(stream)
- stream_itag = metadata['itag'][0]
- self.streams[stream_itag] = {
- 'itag': metadata['itag'][0],
- 'url': metadata['url'][0],
- 'sig': metadata['sig'][0] if 'sig' in metadata else None,
- 's': metadata['s'][0] if 's' in metadata else None,
- 'quality': metadata['quality'][0] if 'quality' in metadata else None,
- #'quality': metadata['quality_label'][0] if 'quality_label' in metadata else None,
- 'type': metadata['type'][0],
- 'mime': metadata['type'][0].split(';')[0],
- 'container': mime_to_container(metadata['type'][0].split(';')[0]),
- }
+ if isinstance(stream, str):
+ metadata = parse.parse_qs(stream)
+ stream_itag = metadata['itag'][0]
+ self.streams[stream_itag] = {
+ 'itag': metadata['itag'][0],
+ 'url': metadata['url'][0],
+ 'sig': metadata['sig'][0] if 'sig' in metadata else None,
+ 's': metadata['s'][0] if 's' in metadata else None,
+ 'quality': metadata['quality'][0] if 'quality' in metadata else None,
+ #'quality': metadata['quality_label'][0] if 'quality_label' in metadata else None,
+ 'type': metadata['type'][0],
+ 'mime': metadata['type'][0].split(';')[0],
+ 'container': mime_to_container(metadata['type'][0].split(';')[0]),
+ }
+ else:
+ stream_itag = stream['itag']
+ self.streams[stream_itag] = {
+ 'itag': stream['itag'],
+ 'url': stream['url'] if 'url' in stream else None,
+ 'sig': None,
+ 's': None,
+ 'quality': stream['quality'],
+ 'type': stream['mimeType'],
+ 'mime': stream['mimeType'].split(';')[0],
+ 'container': mime_to_container(stream['mimeType'].split(';')[0]),
+ }
+ if 'cipher' in stream:
+ self.streams[stream_itag].update(dict([(_.split('=')[0], parse.unquote(_.split('=')[1]))
+ for _ in stream['cipher'].split('&')]))
# Prepare caption tracks
try:
@@ -425,10 +444,37 @@ class YouTube(VideoExtractor):
for i in afmt.split('&')])
for afmt in ytplayer_config['args']['adaptive_fmts'].split(',')]
except:
- streams = [dict([(i.split('=')[0],
- parse.unquote(i.split('=')[1]))
- for i in afmt.split('&')])
- for afmt in video_info['adaptive_fmts'][0].split(',')]
+ if 'adaptive_fmts' in video_info:
+ streams = [dict([(i.split('=')[0],
+ parse.unquote(i.split('=')[1]))
+ for i in afmt.split('&')])
+ for afmt in video_info['adaptive_fmts'][0].split(',')]
+ else:
+ streams = json.loads(video_info['player_response'][0])['streamingData']['adaptiveFormats']
+ for stream in streams:
+ if 'qualityLabel' in stream:
+ stream['quality_label'] = stream['qualityLabel']
+ del stream['qualityLabel']
+ if 'width' in stream:
+ stream['size'] = '{}x{}'.format(stream['width'], stream['height'])
+ del stream['width']
+ del stream['height']
+ stream['type'] = stream['mimeType']
+ stream['clen'] = stream['contentLength']
+ stream['init'] = '{}-{}'.format(
+ stream['initRange']['start'],
+ stream['initRange']['end'])
+ stream['index'] = '{}-{}'.format(
+ stream['indexRange']['start'],
+ stream['indexRange']['end'])
+ del stream['mimeType']
+ del stream['contentLength']
+ del stream['initRange']
+ del stream['indexRange']
+ if 'cipher' in stream:
+ stream.update(dict([(_.split('=')[0], parse.unquote(_.split('=')[1]))
+ for _ in stream['cipher'].split('&')]))
+ del stream['cipher']
for stream in streams: # get over speed limiting
stream['url'] += '&ratebypass=yes'
From ab4bce79f3641a651f5ad2953e05edbe17df958f Mon Sep 17 00:00:00 2001
From: chonpsk
Date: Fri, 27 Dec 2019 23:25:59 +0800
Subject: [PATCH 12/51] fix issue on itag
---
src/you_get/extractors/youtube.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py
index 3606a3c6..ebb42c69 100644
--- a/src/you_get/extractors/youtube.py
+++ b/src/you_get/extractors/youtube.py
@@ -322,7 +322,7 @@ class YouTube(VideoExtractor):
else:
stream_itag = stream['itag']
self.streams[stream_itag] = {
- 'itag': stream['itag'],
+ 'itag': str(stream['itag']),
'url': stream['url'] if 'url' in stream else None,
'sig': None,
's': None,
@@ -452,6 +452,7 @@ class YouTube(VideoExtractor):
else:
streams = json.loads(video_info['player_response'][0])['streamingData']['adaptiveFormats']
for stream in streams:
+ stream['itag'] = str(stream['itag'])
if 'qualityLabel' in stream:
stream['quality_label'] = stream['qualityLabel']
del stream['qualityLabel']
From a934dea8c5d9e91b62f0a7b91a5da9a201f72982 Mon Sep 17 00:00:00 2001
From: Mort Yao
Date: Sat, 28 Dec 2019 21:35:25 +0100
Subject: [PATCH 13/51] version 0.4.1388
---
src/you_get/version.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/you_get/version.py b/src/you_get/version.py
index 1d87177c..235b8f85 100644
--- a/src/you_get/version.py
+++ b/src/you_get/version.py
@@ -1,4 +1,4 @@
#!/usr/bin/env python
script_name = 'you-get'
-__version__ = '0.4.1355'
+__version__ = '0.4.1388'
From 767339915b44172dcfb3a394feed4af169f739fb Mon Sep 17 00:00:00 2001
From: Mort Yao
Date: Mon, 6 Jan 2020 18:25:43 +0100
Subject: [PATCH 14/51] [tests] remove one test_imgur case since it fails too
often
---
tests/test.py | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/tests/test.py b/tests/test.py
index 16741722..220b2169 100644
--- a/tests/test.py
+++ b/tests/test.py
@@ -14,7 +14,6 @@ from you_get.extractors import (
class YouGetTests(unittest.TestCase):
def test_imgur(self):
imgur.download('http://imgur.com/WVLk5nD', info_only=True)
- imgur.download('http://imgur.com/gallery/WVLk5nD', info_only=True)
def test_magisto(self):
magisto.download(
@@ -40,7 +39,7 @@ class YouGetTests(unittest.TestCase):
)
def test_acfun(self):
- acfun.download('https://www.acfun.cn/v/ac11701912', info_only=True)
+ acfun.download('https://www.acfun.cn/v/ac11701912', info_only=True)
if __name__ == '__main__':
unittest.main()
From b96acaa526f61667518ad0aac233a50eed9b38f4 Mon Sep 17 00:00:00 2001
From: laiqing
Date: Wed, 8 Jan 2020 14:05:57 +0800
Subject: [PATCH 15/51] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=90=9C=E7=8B=90?=
=?UTF-8?q?=E5=8F=B7=E7=9A=84=E8=A7=86=E9=A2=91=E4=B8=8D=E8=83=BD=E4=B8=8B?=
=?UTF-8?q?=E8=BD=BD=E7=9A=84=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
src/you_get/extractors/sohu.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py
index a1afc126..1aedb3e6 100644
--- a/src/you_get/extractors/sohu.py
+++ b/src/you_get/extractors/sohu.py
@@ -26,7 +26,7 @@ def sohu_download(url, output_dir='.', merge=True, info_only=False, extractor_pr
vid = r1('id=(\d+)', url)
else:
html = get_html(url)
- vid = r1(r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?', html)
+ vid = r1(r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?', html) or r1(r'bid:\'(\d+)\',', html)
assert vid
if extractor_proxy:
From 67c240abd043e5effd92f955d420f18e25c76dc2 Mon Sep 17 00:00:00 2001
From: shanhm
Date: Wed, 8 Jan 2020 15:20:27 +0800
Subject: [PATCH 16/51] use urllib instead of requests
---
src/you_get/extractors/ixigua.py | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/src/you_get/extractors/ixigua.py b/src/you_get/extractors/ixigua.py
index 151107a6..2f11e7f9 100644
--- a/src/you_get/extractors/ixigua.py
+++ b/src/you_get/extractors/ixigua.py
@@ -5,10 +5,10 @@ import binascii
from ..common import *
import random
-import requests
import string
import ctypes
from json import loads
+from urllib import request
__all__ = ['ixigua_download', 'ixigua_download_playlist_by_url']
@@ -82,8 +82,14 @@ def get_video_url_from_video_id(video_id):
def ixigua_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
# example url: https://www.ixigua.com/i6631065141750268420/#mid=63024814422
- sess = requests.session()
- html = sess.get(url, headers=headers).text
+ resp = urlopen_with_retry(request.Request(url))
+ html = resp.read().decode('utf-8')
+
+ _cookies = []
+ for c in resp.getheader('Set-Cookie').split("httponly,"):
+ _cookies.append(c.strip().split(' ')[0])
+ headers['cookie'] = ' '.join(_cookies)
+
conf = loads(match1(html, r"window\.config = (.+);"))
if not conf:
log.e("Get window.config from url failed, url: {}".format(url))
@@ -97,7 +103,7 @@ def ixigua_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
if ok != 'OK':
log.e("Verify failed, verify_url: {}, result: {}".format(verify_url, ok))
return
- html = sess.get(url, headers=headers).text
+ html = get_content(url, headers=headers)
video_id = match1(html, r"\"vid\":\"([^\"]+)")
title = match1(html, r"\"player__videoTitle\">.*?(.*)<\/h1><\/div>")
From 5943fb6ca34371c3a87219c7c67b5eb139b34980 Mon Sep 17 00:00:00 2001
From: Mort Yao
Date: Wed, 8 Jan 2020 17:36:16 +0100
Subject: [PATCH 17/51] [sohu] fix bid
---
src/you_get/extractors/sohu.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py
index 1aedb3e6..74374202 100644
--- a/src/you_get/extractors/sohu.py
+++ b/src/you_get/extractors/sohu.py
@@ -26,7 +26,7 @@ def sohu_download(url, output_dir='.', merge=True, info_only=False, extractor_pr
vid = r1('id=(\d+)', url)
else:
html = get_html(url)
- vid = r1(r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?', html) or r1(r'bid:\'(\d+)\',', html)
+ vid = r1(r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?', html) or r1(r'bid:\'(\d+)\',', html) or r1(r'bid=(\d+)', html)
assert vid
if extractor_proxy:
From c3ae61c04e8235b444caedcd25064fa5af4f4c92 Mon Sep 17 00:00:00 2001
From: Mort Yao
Date: Mon, 13 Jan 2020 22:16:33 +0100
Subject: [PATCH 18/51] [youtube] remove streams without contentLength (fix
#2767)
---
src/you_get/extractors/youtube.py | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py
index ebb42c69..07c1382e 100644
--- a/src/you_get/extractors/youtube.py
+++ b/src/you_get/extractors/youtube.py
@@ -222,7 +222,7 @@ class YouTube(VideoExtractor):
except:
if 'url_encoded_fmt_stream_map' not in video_info:
stream_list = json.loads(video_info['player_response'][0])['streamingData']['formats']
- else:
+ else:
stream_list = video_info['url_encoded_fmt_stream_map'][0].split(',')
if re.search('([^"]*/base\.js)"', video_page):
self.html5player = 'https://www.youtube.com' + re.search('([^"]*/base\.js)"', video_page).group(1)
@@ -451,6 +451,8 @@ class YouTube(VideoExtractor):
for afmt in video_info['adaptive_fmts'][0].split(',')]
else:
streams = json.loads(video_info['player_response'][0])['streamingData']['adaptiveFormats']
+ # streams without contentLength got broken urls, just remove them (#2767)
+ streams = [stream for stream in streams if 'contentLength' in stream]
for stream in streams:
stream['itag'] = str(stream['itag'])
if 'qualityLabel' in stream:
From 5a008ad878fa2676ee4fa55a020f09acc7e5f66e Mon Sep 17 00:00:00 2001
From: Jarry Shaw
Date: Sat, 25 Jan 2020 12:11:41 +0800
Subject: [PATCH 19/51] Added PySocks extra requirement
---
setup.py | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/setup.py b/setup.py
index 21246c5f..24dc9fb2 100755
--- a/setup.py
+++ b/setup.py
@@ -41,5 +41,9 @@ setup(
classifiers = proj_info['classifiers'],
- entry_points = {'console_scripts': proj_info['console_scripts']}
+ entry_points = {'console_scripts': proj_info['console_scripts']},
+
+ extras_require={
+ 'socks': ['PySocks'],
+ }
)
From ccdc58a82d17a0eebe81d4d337353b81d9b6cb68 Mon Sep 17 00:00:00 2001
From: Mort Yao
Date: Mon, 27 Jan 2020 00:19:51 +0100
Subject: [PATCH 20/51] [youtube] new pattern for function name
---
src/you_get/extractors/youtube.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py
index 07c1382e..b8ca4280 100644
--- a/src/you_get/extractors/youtube.py
+++ b/src/you_get/extractors/youtube.py
@@ -94,7 +94,8 @@ class YouTube(VideoExtractor):
f1 = match1(js, r'\.set\(\w+\.sp,encodeURIComponent\(([$\w]+)') or \
match1(js, r'\.set\(\w+\.sp,\(0,window\.encodeURIComponent\)\(([$\w]+)') or \
match1(js, r'\.set\(\w+\.sp,([$\w]+)\(\w+\.s\)\)') or \
- match1(js, r'"signature",([$\w]+)\(\w+\.\w+\)')
+ match1(js, r'"signature",([$\w]+)\(\w+\.\w+\)') or \
+ match1(js, r'=([$\w]+)\(decodeURIComponent\(')
f1def = match1(js, r'function %s(\(\w+\)\{[^\{]+\})' % re.escape(f1)) or \
match1(js, r'\W%s=function(\(\w+\)\{[^\{]+\})' % re.escape(f1))
f1def = re.sub(r'([$\w]+\.)([$\w]+\(\w+,\d+\))', r'\2', f1def)
From 610c3e8942c9f7b5dad2b9342d869b4693a72ceb Mon Sep 17 00:00:00 2001
From: Mort Yao
Date: Mon, 27 Jan 2020 00:27:15 +0100
Subject: [PATCH 21/51] version 0.4.1403
---
src/you_get/version.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/you_get/version.py b/src/you_get/version.py
index 235b8f85..c124a979 100644
--- a/src/you_get/version.py
+++ b/src/you_get/version.py
@@ -1,4 +1,4 @@
#!/usr/bin/env python
script_name = 'you-get'
-__version__ = '0.4.1388'
+__version__ = '0.4.1403'
From 771a89fc8568007b870c34834ed00f48038a0d2d Mon Sep 17 00:00:00 2001
From: Ok
Date: Wed, 29 Jan 2020 01:25:21 +0200
Subject: [PATCH 22/51] fixed regex
---
src/you_get/extractors/coub.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/you_get/extractors/coub.py b/src/you_get/extractors/coub.py
index 36a0a5d6..a71cbc18 100644
--- a/src/you_get/extractors/coub.py
+++ b/src/you_get/extractors/coub.py
@@ -79,7 +79,7 @@ def get_title_and_urls(json_data):
def get_coub_data(html):
- coub_data = r1(r'', html)
+ coub_data = r1(r'))', html)
json_data = json.loads(coub_data)
return json_data
From 5147481a89ea752913914ddd60366b8143b2a06c Mon Sep 17 00:00:00 2001
From: Mort Yao
Date: Fri, 31 Jan 2020 15:11:58 +0100
Subject: [PATCH 23/51] [json_output] remove sort_keys in json.dumps call (fix
#2773)
---
src/you_get/json_output.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/you_get/json_output.py b/src/you_get/json_output.py
index 5971bd93..c6195761 100644
--- a/src/you_get/json_output.py
+++ b/src/you_get/json_output.py
@@ -29,7 +29,7 @@ def output(video_extractor, pretty_print=True):
if extra:
out["extra"] = extra
if pretty_print:
- print(json.dumps(out, indent=4, sort_keys=True, ensure_ascii=False))
+ print(json.dumps(out, indent=4, ensure_ascii=False))
else:
print(json.dumps(out))
From cd9931e1f6b56f28dcb7202051bc9c6520b6e5ff Mon Sep 17 00:00:00 2001
From: nsb2006
Date: Mon, 10 Feb 2020 23:29:09 +0800
Subject: [PATCH 24/51] =?UTF-8?q?=E8=AE=BE=E6=83=B3=E4=BC=AA=E8=A3=85?=
=?UTF-8?q?=E8=85=BE=E8=AE=AF=E8=A7=86=E9=A2=91=E5=AE=A2=E6=88=B7=E7=AB=AF?=
=?UTF-8?q?=E4=B8=8B=E8=BD=BD1080P?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
尝试通过修改UA下载1080p,但是之前没接触过Python,依葫芦画瓢改了半天重新编译后发现UA还是默认的Python-urllib。
或者有没有UA的命令参数,试了-user-agent报错。
祝新春吉祥,百毒不侵,感谢。
---
src/you_get/extractors/qq.py | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)
diff --git a/src/you_get/extractors/qq.py b/src/you_get/extractors/qq.py
index 232a08b4..6411b195 100644
--- a/src/you_get/extractors/qq.py
+++ b/src/you_get/extractors/qq.py
@@ -6,6 +6,10 @@ from .qie import download as qieDownload
from .qie_video import download_by_url as qie_video_download
from ..common import *
+headers = {
+ 'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) QQLive/10275340/50192209 Chrome/43.0.2357.134 Safari/537.36 QBCore/3.43.561.202 QQBrowser/9.0.2524.400'
+}
+
def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):
@@ -14,7 +18,7 @@ def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):
platforms = [4100201, 11]
for platform in platforms:
info_api = 'http://vv.video.qq.com/getinfo?otype=json&appver=3.2.19.333&platform={}&defnpayver=1&defn=shd&vid={}'.format(platform, vid)
- info = get_content(info_api)
+ info = get_content(info_api, headers)
video_json = json.loads(match1(info, r'QZOutputJson=(.*)')[:-1])
if not video_json.get('msg')=='cannot play outside':
break
@@ -41,7 +45,7 @@ def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):
filename = '.'.join([fn_pre, magic_str, str(part), video_type])
key_api = "http://vv.video.qq.com/getkey?otype=json&platform=11&format={}&vid={}&filename={}&appver=3.2.19.333".format(part_format_id, vid, filename)
- part_info = get_content(key_api)
+ part_info = get_content(key_api, headers)
key_json = json.loads(match1(part_info, r'QZOutputJson=(.*)')[:-1])
if key_json.get('key') is None:
vkey = video_json['vl']['vi'][0]['fvkey']
@@ -71,7 +75,7 @@ def kg_qq_download_by_shareid(shareid, output_dir='.', info_only=False, caption=
BASE_URL = 'http://cgi.kg.qq.com/fcgi-bin/kg_ugc_getdetail'
params_str = '?dataType=jsonp&jsonp=callback&jsonpCallback=jsopgetsonginfo&v=4&outCharset=utf-8&shareid=' + shareid
url = BASE_URL + params_str
- content = get_content(url)
+ content = get_content(url, headers)
json_str = content[len('jsonpcallback('):-1]
json_data = json.loads(json_str)
@@ -127,7 +131,7 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
return
if 'mp.weixin.qq.com/s' in url:
- content = get_content(url)
+ content = get_content(url, headers)
vids = matchall(content, [r'[?;]vid=(\w+)'])
for vid in vids:
qq_download_by_vid(vid, vid, output_dir, merge, info_only)
@@ -142,7 +146,7 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
title=info_json['videoinfo']['title']
elif 'kuaibao.qq.com' in url or re.match(r'http://daxue.qq.com/content/content/id/\d+', url):
# http://daxue.qq.com/content/content/id/2321
- content = get_content(url)
+ content = get_content(url, headers)
vid = match1(content, r'vid\s*=\s*"\s*([^"]+)"')
title = match1(content, r'title">([^"]+)
')
title = title.strip() if title else vid
@@ -152,11 +156,11 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
title = vid
elif 'view.inews.qq.com' in url:
# view.inews.qq.com/a/20180521V0Z9MH00
- content = get_content(url)
+ content = get_content(url, headers)
vid = match1(content, r'"vid":"(\w+)"')
title = match1(content, r'"title":"(\w+)"')
else:
- content = get_content(url)
+ content = get_content(url, headers)
#vid = parse_qs(urlparse(url).query).get('vid') #for links specified vid like http://v.qq.com/cover/p/ps6mnfqyrfo7es3.html?vid=q0181hpdvo5
rurl = match1(content, r'') #https://v.qq.com/x/cover/9hpjiv5fhiyn86u/t0522x58xma.html
vid = ""
From bf49e2d1b398d4901243115746b9fd14a71aceda Mon Sep 17 00:00:00 2001
From: flewsea
Date: Thu, 27 Feb 2020 22:38:31 +0800
Subject: [PATCH 25/51] =?UTF-8?q?=E6=94=AF=E6=8C=81=E4=B8=8B=E8=BD=BD?=
=?UTF-8?q?=E9=A1=B5=E9=9D=A2=E5=86=85=E6=89=80=E6=9C=89=E8=A7=86=E9=A2=91?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
src/you_get/extractors/iwara.py | 20 +++++++++++++++++---
1 file changed, 17 insertions(+), 3 deletions(-)
diff --git a/src/you_get/extractors/iwara.py b/src/you_get/extractors/iwara.py
index a30159d7..67a41d41 100644
--- a/src/you_get/extractors/iwara.py
+++ b/src/you_get/extractors/iwara.py
@@ -9,12 +9,15 @@ headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Cache-Control': 'max-age=0',
-
'Connection': 'keep-alive',
'Save-Data': 'on',
'Cookie':'has_js=1;show_adult=1',
}
-
+stream_types = [
+ {'id': 'Source', 'container': 'mp4', 'video_profile': '原始'},
+ {'id': '540p', 'container': 'mp4', 'video_profile': '540p'},
+ {'id': '360p', 'container': 'mp4', 'video_profile': '360P'},
+ ]
def iwara_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
global headers
video_hash = match1(url, r'https?://\w+.iwara.tv/videos/(\w+)')
@@ -31,6 +34,17 @@ def iwara_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
if not info_only:
download_urls([down_urls], title, ext, size, output_dir, merge=merge, headers=headers)
+def download_playlist_by_url( url, **kwargs):
+ video_page = get_content(url)
+ # url_first=re.findall(r"(http[s]?://[^/]+)",url)
+ url_first=match1(url, r"(http[s]?://[^/]+)")
+ # print (url_first)
+ videos = set(re.findall(r'0):
+ for video in videos:
+ iwara_download(url_first+video, **kwargs)
+ else:
+ maybe_print('this page not found any videos')
site_info = "Iwara"
download = iwara_download
-download_playlist = playlist_not_supported('iwara')
+download_playlist = download_playlist_by_url
From 358d79778122c391d83b2eaed5c139be2f798e7f Mon Sep 17 00:00:00 2001
From: Mort Yao
Date: Mon, 2 Mar 2020 14:27:30 +0100
Subject: [PATCH 26/51] [youtube] fix download for non-DASH streams
---
src/you_get/extractors/youtube.py | 14 ++++++++++----
tests/test.py | 3 +++
2 files changed, 13 insertions(+), 4 deletions(-)
diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py
index b8ca4280..38aa1a4e 100644
--- a/src/you_get/extractors/youtube.py
+++ b/src/you_get/extractors/youtube.py
@@ -218,7 +218,10 @@ class YouTube(VideoExtractor):
ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+?});', video_page).group(1))
self.html5player = 'https://www.youtube.com' + ytplayer_config['assets']['js']
# Workaround: get_video_info returns bad s. Why?
- stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
+ if 'url_encoded_fmt_stream_map' not in ytplayer_config['args']:
+ stream_list = json.loads(ytplayer_config['args']['player_response'])['streamingData']['formats']
+ else:
+ stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
#stream_list = ytplayer_config['args']['adaptive_fmts'].split(',')
except:
if 'url_encoded_fmt_stream_map' not in video_info:
@@ -321,7 +324,7 @@ class YouTube(VideoExtractor):
'container': mime_to_container(metadata['type'][0].split(';')[0]),
}
else:
- stream_itag = stream['itag']
+ stream_itag = str(stream['itag'])
self.streams[stream_itag] = {
'itag': str(stream['itag']),
'url': stream['url'] if 'url' in stream else None,
@@ -367,7 +370,7 @@ class YouTube(VideoExtractor):
self.caption_tracks[lang] = srt
except: pass
- # Prepare DASH streams
+ # Prepare DASH streams (NOTE: not every video has DASH streams!)
try:
dashmpd = ytplayer_config['args']['dashmpd']
dash_xml = parseString(get_content(dashmpd))
@@ -451,7 +454,10 @@ class YouTube(VideoExtractor):
for i in afmt.split('&')])
for afmt in video_info['adaptive_fmts'][0].split(',')]
else:
- streams = json.loads(video_info['player_response'][0])['streamingData']['adaptiveFormats']
+ try:
+ streams = json.loads(video_info['player_response'][0])['streamingData']['adaptiveFormats']
+ except: # no DASH stream at all
+ return
# streams without contentLength got broken urls, just remove them (#2767)
streams = [stream for stream in streams if 'contentLength' in stream]
for stream in streams:
diff --git a/tests/test.py b/tests/test.py
index 220b2169..7187cfb0 100644
--- a/tests/test.py
+++ b/tests/test.py
@@ -37,6 +37,9 @@ class YouGetTests(unittest.TestCase):
'http://www.youtube.com/attribution_link?u=/watch?v%3DldAKIzq7bvs%26feature%3Dshare', # noqa
info_only=True
)
+ youtube.download(
+ 'https://www.youtube.com/watch?v=Fpr4fQSh1cc', info_only=True
+ )
def test_acfun(self):
acfun.download('https://www.acfun.cn/v/ac11701912', info_only=True)
From 8b7566eeb3f82112ac9996619164503c8cd8f309 Mon Sep 17 00:00:00 2001
From: Mort Yao
Date: Mon, 2 Mar 2020 14:45:57 +0100
Subject: [PATCH 27/51] version 0.4.1410
---
src/you_get/version.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/you_get/version.py b/src/you_get/version.py
index c124a979..c867e5b8 100644
--- a/src/you_get/version.py
+++ b/src/you_get/version.py
@@ -1,4 +1,4 @@
#!/usr/bin/env python
script_name = 'you-get'
-__version__ = '0.4.1403'
+__version__ = '0.4.1410'
From 11f78325e637cff01aad6e52d13f757052511965 Mon Sep 17 00:00:00 2001
From: Ivan Tham
Date: Sun, 8 Mar 2020 00:10:23 +0800
Subject: [PATCH 28/51] Sort return without duplicate condition
---
src/you_get/util/log.py | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/src/you_get/util/log.py b/src/you_get/util/log.py
index 67b26b78..81fd1bf5 100644
--- a/src/you_get/util/log.py
+++ b/src/you_get/util/log.py
@@ -99,6 +99,4 @@ def wtf(message, exit_code=1):
def yes_or_no(message):
ans = str(input('%s (y/N) ' % message)).lower().strip()
- if ans == 'y':
- return True
- return False
+ return ans == 'y'
From 7a43ac0782325d596e2372519faf033738d9a9ea Mon Sep 17 00:00:00 2001
From: helong0911
Date: Sat, 14 Mar 2020 13:33:34 +0800
Subject: [PATCH 29/51] [baomihua] fix download 403
---
src/you_get/extractors/baomihua.py | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/src/you_get/extractors/baomihua.py b/src/you_get/extractors/baomihua.py
index 99dd7132..9e97879a 100644
--- a/src/you_get/extractors/baomihua.py
+++ b/src/you_get/extractors/baomihua.py
@@ -6,6 +6,16 @@ from ..common import *
import urllib
+def baomihua_headers(referer=None, cookie=None):
+ # a reasonable UA
+ ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'
+ headers = {'Accept': '*/*', 'Accept-Language': 'en-US,en;q=0.5', 'User-Agent': ua}
+ if referer is not None:
+ headers.update({'Referer': referer})
+ if cookie is not None:
+ headers.update({'Cookie': cookie})
+ return headers
+
def baomihua_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_html('http://play.baomihua.com/getvideourl.aspx?flvid=%s&devicetype=phone_app' % id)
host = r1(r'host=([^&]*)', html)
@@ -16,10 +26,10 @@ def baomihua_download_by_id(id, title=None, output_dir='.', merge=True, info_onl
assert vid
dir_str = r1(r'&dir=([^&]*)', html).strip()
url = "http://%s/%s/%s.%s" % (host, dir_str, vid, type)
- _, ext, size = url_info(url)
+ _, ext, size = url_info(url, headers=baomihua_headers())
print_info(site_info, title, type, size)
if not info_only:
- download_urls([url], title, ext, size, output_dir, merge = merge)
+ download_urls([url], title, ext, size, output_dir, merge = merge, headers=baomihua_headers())
def baomihua_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_html(url)
From 8a47a729a9805032a94b7ce5171609ef3b5cb90d Mon Sep 17 00:00:00 2001
From: Mort Yao
Date: Thu, 19 Mar 2020 11:46:44 +0100
Subject: [PATCH 30/51] [tests] remove test_missevan
---
tests/test.py | 7 -------
1 file changed, 7 deletions(-)
diff --git a/tests/test.py b/tests/test.py
index 7187cfb0..b5454fbf 100644
--- a/tests/test.py
+++ b/tests/test.py
@@ -21,13 +21,6 @@ class YouGetTests(unittest.TestCase):
info_only=True
)
- def test_missevan(self):
- missevan.download('https://m.missevan.com/sound/1285995', info_only=True)
- missevan.download_playlist(
- 'https://www.missevan.com/mdrama/drama/24130', info_only=True)
- missevan.download_playlist(
- 'https://www.missevan.com/albuminfo/203090', info_only=True)
-
def test_youtube(self):
youtube.download(
'http://www.youtube.com/watch?v=pzKerr0JIPA', info_only=True
From a8d5819417ce0d4c7d7c4789043b2fa7e065b721 Mon Sep 17 00:00:00 2001
From: Mort Yao
Date: Thu, 19 Mar 2020 11:57:36 +0100
Subject: [PATCH 31/51] update .travis.yml
---
.travis.yml | 11 +++--------
1 file changed, 3 insertions(+), 8 deletions(-)
diff --git a/.travis.yml b/.travis.yml
index 8433fe75..8dd26bfa 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,15 +4,10 @@ python:
- "3.4"
- "3.5"
- "3.6"
+ - "3.7"
+ - "3.8"
+ - "nightly"
- "pypy3"
-matrix:
- include:
- - python: "3.7"
- dist: xenial
- - python: "3.8-dev"
- dist: xenial
- - python: "nightly"
- dist: xenial
before_install:
- pip install flake8
before_script:
From 50318b1e4d94da2e7034080cfa428feff3904df6 Mon Sep 17 00:00:00 2001
From: Mort Yao
Date: Thu, 19 Mar 2020 12:04:32 +0100
Subject: [PATCH 32/51] update .travis.yml (remove nightly)
---
.travis.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.travis.yml b/.travis.yml
index 8dd26bfa..eedbeeb2 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,7 +6,7 @@ python:
- "3.6"
- "3.7"
- "3.8"
- - "nightly"
+ #- "nightly" (flake8 not working in python 3.9 yet, module 'ast' has no attribute 'AugLoad')
- "pypy3"
before_install:
- pip install flake8
From cfa93fb16c2f0460caf62f6fce6fada683dad564 Mon Sep 17 00:00:00 2001
From: Yiyin Gu
Date: Fri, 20 Mar 2020 19:16:41 -0400
Subject: [PATCH 33/51] temp fix of netease download
Some vip download can download through this, others can't. Still looking into it.
---
src/you_get/extractors/netease.py | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/src/you_get/extractors/netease.py b/src/you_get/extractors/netease.py
index f74747b1..c7c0f666 100644
--- a/src/you_get/extractors/netease.py
+++ b/src/you_get/extractors/netease.py
@@ -107,6 +107,9 @@ def netease_video_download(vinfo, output_dir='.', info_only=False):
def netease_song_download(song, output_dir='.', info_only=False, playlist_prefix=""):
title = "%s%s. %s" % (playlist_prefix, song['position'], song['name'])
+ url_best = "http://music.163.com/song/media/outer/url?id=" + \
+ str(song['id']) + ".mp3"
+ '''
songNet = 'p' + song['mp3Url'].split('/')[2][1:]
if 'hMusic' in song and song['hMusic'] != None:
@@ -115,7 +118,7 @@ def netease_song_download(song, output_dir='.', info_only=False, playlist_prefix
url_best = song['mp3Url']
elif 'bMusic' in song:
url_best = make_url(songNet, song['bMusic']['dfsId'])
-
+ '''
netease_download_common(title, url_best,
output_dir=output_dir, info_only=info_only)
From b4ea5976fb51233e0289196eb39fa73a6f3e1829 Mon Sep 17 00:00:00 2001
From: brainbush <960821@gmail.com>
Date: Mon, 23 Mar 2020 15:52:15 +0800
Subject: [PATCH 34/51] add support for BVID of bilibili
---
src/you_get/extractors/bilibili.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py
index 444ccb6f..29ba0aff 100644
--- a/src/you_get/extractors/bilibili.py
+++ b/src/you_get/extractors/bilibili.py
@@ -159,7 +159,7 @@ class Bilibili(VideoExtractor):
sort = 'live'
elif re.match(r'https?://vc\.bilibili\.com/video/(\d+)', self.url):
sort = 'vc'
- elif re.match(r'https?://(www\.)?bilibili\.com/video/av(\d+)', self.url):
+ elif re.match(r'https?://(www\.)?bilibili\.com/video/(av(\d+)|(BV(\S+)))', self.url):
sort = 'video'
else:
self.download_playlist_by_url(self.url, **kwargs)
From 25c481cdcddf40b784c4b24fd8840d1574854845 Mon Sep 17 00:00:00 2001
From: Mort Yao
Date: Mon, 23 Mar 2020 11:55:48 +0100
Subject: [PATCH 35/51] purge dead sites
---
README.md | 5 ---
src/you_get/extractors/musicplayon.py | 38 -----------------------
src/you_get/extractors/videomega.py | 44 ---------------------------
src/you_get/extractors/vidto.py | 40 ------------------------
4 files changed, 127 deletions(-)
delete mode 100644 src/you_get/extractors/musicplayon.py
delete mode 100644 src/you_get/extractors/videomega.py
delete mode 100644 src/you_get/extractors/vidto.py
diff --git a/README.md b/README.md
index 0735bd8a..3105766b 100644
--- a/README.md
+++ b/README.md
@@ -368,15 +368,12 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
| VK | |✓|✓| |
| Vine | |✓| | |
| Vimeo | |✓| | |
-| Vidto | |✓| | |
-| Videomega | |✓| | |
| Veoh | |✓| | |
| **Tumblr** | |✓|✓|✓|
| TED | |✓| | |
| SoundCloud | | | |✓|
| SHOWROOM | |✓| | |
| Pinterest | | |✓| |
-| MusicPlayOn | |✓| | |
| MTV81 | |✓| | |
| Mixcloud | | | |✓|
| Metacafe | |✓| | |
@@ -387,7 +384,6 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
| InfoQ | |✓| | |
| Imgur | | |✓| |
| Heavy Music Archive | | | |✓|
-| **Google+** | |✓|✓| |
| Freesound | | | |✓|
| Flickr | |✓|✓| |
| FC2 Video | |✓| | |
@@ -409,7 +405,6 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
| **bilibili
哔哩哔哩** | |✓| | |
| 豆瓣 | |✓| |✓|
| 斗鱼 | |✓| | |
-| Panda
熊猫 | |✓| | |
| 凤凰视频 | |✓| | |
| 风行网 | |✓| | |
| iQIYI
爱奇艺 | |✓| | |
diff --git a/src/you_get/extractors/musicplayon.py b/src/you_get/extractors/musicplayon.py
deleted file mode 100644
index ffc4ec36..00000000
--- a/src/you_get/extractors/musicplayon.py
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/env python
-
-from ..common import *
-from ..extractor import VideoExtractor
-
-import json
-
-class MusicPlayOn(VideoExtractor):
- name = "MusicPlayOn"
-
- stream_types = [
- {'id': '720p HD'},
- {'id': '360p SD'},
- ]
-
- def prepare(self, **kwargs):
- content = get_content(self.url)
-
- self.title = match1(content,
- r'setup\[\'title\'\] = "([^"]+)";')
-
- for s in self.stream_types:
- quality = s['id']
- src = match1(content,
- r'src: "([^"]+)", "data-res": "%s"' % quality)
- if src is not None:
- url = 'http://en.musicplayon.com%s' % src
- self.streams[quality] = {'url': url}
-
- def extract(self, **kwargs):
- for i in self.streams:
- s = self.streams[i]
- _, s['container'], s['size'] = url_info(s['url'])
- s['src'] = [s['url']]
-
-site = MusicPlayOn()
-download = site.download_by_url
-# TBD: implement download_playlist
diff --git a/src/you_get/extractors/videomega.py b/src/you_get/extractors/videomega.py
deleted file mode 100644
index 34fb5205..00000000
--- a/src/you_get/extractors/videomega.py
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/usr/bin/env python
-
-__all__ = ['videomega_download']
-
-from ..common import *
-import ssl
-
-def videomega_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
- # Hot-plug cookie handler
- ssl_context = request.HTTPSHandler(
- context=ssl.SSLContext(ssl.PROTOCOL_TLSv1))
- cookie_handler = request.HTTPCookieProcessor()
- opener = request.build_opener(ssl_context, cookie_handler)
- opener.addheaders = [('Referer', url),
- ('Cookie', 'noadvtday=0')]
- request.install_opener(opener)
-
- if re.search(r'view\.php', url):
- php_url = url
- else:
- content = get_content(url)
- m = re.search(r'ref="([^"]*)";\s*width="([^"]*)";\s*height="([^"]*)"', content)
- ref = m.group(1)
- width, height = m.group(2), m.group(3)
- php_url = 'http://videomega.tv/view.php?ref=%s&width=%s&height=%s' % (ref, width, height)
- content = get_content(php_url)
-
- title = match1(content, r'(.*)')
- js = match1(content, r'(eval.*)')
- t = match1(js, r'\$\("\w+"\)\.\w+\("\w+","([^"]+)"\)')
- t = re.sub(r'(\w)', r'{\1}', t)
- t = t.translate({87 + i: str(i) for i in range(10, 36)})
- s = match1(js, r"'([^']+)'\.split").split('|')
- src = t.format(*s)
-
- type, ext, size = url_info(src, faker=True)
-
- print_info(site_info, title, type, size)
- if not info_only:
- download_urls([src], title, ext, size, output_dir, merge=merge, faker=True)
-
-site_info = "Videomega.tv"
-download = videomega_download
-download_playlist = playlist_not_supported('videomega')
diff --git a/src/you_get/extractors/vidto.py b/src/you_get/extractors/vidto.py
deleted file mode 100644
index c4e3b87e..00000000
--- a/src/you_get/extractors/vidto.py
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/usr/bin/env python
-
-__all__ = ['vidto_download']
-
-from ..common import *
-import pdb
-import time
-
-
-def vidto_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
- html = get_content(url)
- params = {}
- r = re.findall(
- r'type="(?:hidden|submit)?"(?:.*?)name="(.+?)"\s* value="?(.+?)">', html)
- for name, value in r:
- params[name] = value
- data = parse.urlencode(params).encode('utf-8')
- req = request.Request(url)
- print("Please wait for 6 seconds...")
- time.sleep(6)
- print("Starting")
- new_html = request.urlopen(req, data).read().decode('utf-8', 'replace')
- new_stff = re.search('lnk_download" href="(.*?)">', new_html)
- if(new_stff):
- url = new_stff.group(1)
- title = params['fname']
- type = ""
- ext = ""
- a, b, size = url_info(url)
- print_info(site_info, title, type, size)
- if not info_only:
- download_urls([url], title, ext, size, output_dir, merge=merge)
- else:
- print("cannot find link, please review")
- pdb.set_trace()
-
-
-site_info = "vidto.me"
-download = vidto_download
-download_playlist = playlist_not_supported('vidto')
From e1edd9f912c147a4f04d780a39169b14a589208c Mon Sep 17 00:00:00 2001
From: Mort Yao
Date: Mon, 23 Mar 2020 12:03:41 +0100
Subject: [PATCH 36/51] purge dead sites
---
src/you_get/common.py | 3 ---
src/you_get/extractors/__init__.py | 2 --
2 files changed, 5 deletions(-)
diff --git a/src/you_get/common.py b/src/you_get/common.py
index 70602c89..8c609d8c 100755
--- a/src/you_get/common.py
+++ b/src/you_get/common.py
@@ -83,7 +83,6 @@ SITES = {
'missevan' : 'missevan',
'mixcloud' : 'mixcloud',
'mtv81' : 'mtv81',
- 'musicplayon' : 'musicplayon',
'miaopai' : 'yixia',
'naver' : 'naver',
'7gogo' : 'nanagogo',
@@ -107,8 +106,6 @@ SITES = {
'twimg' : 'twitter',
'twitter' : 'twitter',
'ucas' : 'ucas',
- 'videomega' : 'videomega',
- 'vidto' : 'vidto',
'vimeo' : 'vimeo',
'wanmen' : 'wanmen',
'weibo' : 'miaopai',
diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py
index 5ed5264b..ce95904c 100755
--- a/src/you_get/extractors/__init__.py
+++ b/src/you_get/extractors/__init__.py
@@ -49,7 +49,6 @@ from .miaopai import *
from .miomio import *
from .mixcloud import *
from .mtv81 import *
-from .musicplayon import *
from .nanagogo import *
from .naver import *
from .netease import *
@@ -74,7 +73,6 @@ from .tumblr import *
from .twitter import *
from .ucas import *
from .veoh import *
-from .videomega import *
from .vimeo import *
from .vine import *
from .vk import *
From 88d574a4e9144f5a122460a2ea24c849fd04b88b Mon Sep 17 00:00:00 2001
From: Mort Yao
Date: Mon, 23 Mar 2020 12:13:33 +0100
Subject: [PATCH 37/51] version 0.4.1423
---
src/you_get/version.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/you_get/version.py b/src/you_get/version.py
index c867e5b8..ab19c2be 100644
--- a/src/you_get/version.py
+++ b/src/you_get/version.py
@@ -1,4 +1,4 @@
#!/usr/bin/env python
script_name = 'you-get'
-__version__ = '0.4.1410'
+__version__ = '0.4.1423'
From 25422ea3c5520bafc35614865637968634f93086 Mon Sep 17 00:00:00 2001
From: Mort Yao
Date: Mon, 23 Mar 2020 12:23:40 +0100
Subject: [PATCH 38/51] update supported Python versions
---
you-get.json | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/you-get.json b/you-get.json
index 56f8212a..e98e2e8a 100644
--- a/you-get.json
+++ b/you-get.json
@@ -18,14 +18,13 @@
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3 :: Only",
- "Programming Language :: Python :: 3.0",
- "Programming Language :: Python :: 3.1",
"Programming Language :: Python :: 3.2",
"Programming Language :: Python :: 3.3",
"Programming Language :: Python :: 3.4",
"Programming Language :: Python :: 3.5",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
+ "Programming Language :: Python :: 3.8",
"Topic :: Internet",
"Topic :: Internet :: WWW/HTTP",
"Topic :: Multimedia",
From f10a1cdade1398758aafa8137118d21e7b77179e Mon Sep 17 00:00:00 2001
From: Mort Yao
Date: Mon, 23 Mar 2020 12:28:02 +0100
Subject: [PATCH 39/51] update LICENSE.txt
---
LICENSE.txt | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/LICENSE.txt b/LICENSE.txt
index 5964bf20..a193d8e2 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -1,6 +1,7 @@
MIT License
-Copyright (c) 2012-2019 Mort Yao
+Copyright (c) 2012-2020 Mort Yao and other contributors
+ (https://github.com/soimort/you-get/graphs/contributors)
Copyright (c) 2012 Boyu Guo
Permission is hereby granted, free of charge, to any person obtaining a copy
From d6afc2e829f152d3b6d88944d1ad1ce7fe30776b Mon Sep 17 00:00:00 2001
From: icpz
Date: Tue, 24 Mar 2020 18:48:22 +0800
Subject: [PATCH 40/51] add support for BVID in playlist mode of bilibili
---
src/you_get/extractors/bilibili.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py
index 29ba0aff..045853f3 100644
--- a/src/you_get/extractors/bilibili.py
+++ b/src/you_get/extractors/bilibili.py
@@ -552,7 +552,7 @@ class Bilibili(VideoExtractor):
elif re.match(r'https?://(www\.)?bilibili\.com/bangumi/media/md(\d+)', self.url) or \
re.match(r'https?://bangumi\.bilibili\.com/anime/(\d+)', self.url):
sort = 'bangumi_md'
- elif re.match(r'https?://(www\.)?bilibili\.com/video/av(\d+)', self.url):
+ elif re.match(r'https?://(www\.)?bilibili\.com/video/(av(\d+)|BV(\S+))', self.url):
sort = 'video'
elif re.match(r'https?://space\.?bilibili\.com/(\d+)/channel/detail\?.*cid=(\d+)', self.url):
sort = 'space_channel'
From f12943aa00a5c59fe183eb9b59f6f1928d26a230 Mon Sep 17 00:00:00 2001
From: e <1160590998@qq.com>
Date: Sat, 28 Mar 2020 18:15:22 +0800
Subject: [PATCH 41/51] add support for BVID in watchlater mode.
---
src/you_get/extractors/bilibili.py | 6 +++---
tests/test.py | 10 +++++++++-
2 files changed, 12 insertions(+), 4 deletions(-)
diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py
index 045853f3..c669415d 100644
--- a/src/you_get/extractors/bilibili.py
+++ b/src/you_get/extractors/bilibili.py
@@ -132,10 +132,10 @@ class Bilibili(VideoExtractor):
# r' bangumi/play/ep
diff --git a/tests/test.py b/tests/test.py
index b5454fbf..6fd3db6c 100644
--- a/tests/test.py
+++ b/tests/test.py
@@ -7,7 +7,8 @@ from you_get.extractors import (
magisto,
youtube,
missevan,
- acfun
+ acfun,
+ bilibili
)
@@ -37,5 +38,12 @@ class YouGetTests(unittest.TestCase):
def test_acfun(self):
acfun.download('https://www.acfun.cn/v/ac11701912', info_only=True)
+ def test_bilibil(self):
+ bilibili.download(
+ "https://www.bilibili.com/watchlater/#/BV1PE411q7mZ/p6", info_only=True
+ )
+ bilibili.download(
+ "https://www.bilibili.com/watchlater/#/av74906671/p6", info_only=True
+ )
if __name__ == '__main__':
unittest.main()
From c7b7a996ffa348833787dd77da70be288c65a9a5 Mon Sep 17 00:00:00 2001
From: zhufengning
Date: Sun, 29 Mar 2020 18:09:29 +0800
Subject: [PATCH 42/51] fix bilibili favlist download
and updated the api url
---
src/you_get/extractors/bilibili.py | 22 ++++++++++++----------
1 file changed, 12 insertions(+), 10 deletions(-)
diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py
index c669415d..2152661f 100644
--- a/src/you_get/extractors/bilibili.py
+++ b/src/you_get/extractors/bilibili.py
@@ -103,8 +103,8 @@ class Bilibili(VideoExtractor):
return 'https://api.bilibili.com/x/space/channel/video?mid=%s&cid=%s&pn=%s&ps=%s&order=0&jsonp=jsonp' % (mid, cid, pn, ps)
@staticmethod
- def bilibili_space_favlist_api(vmid, fid, pn=1, ps=100):
- return 'https://api.bilibili.com/x/space/fav/arc?vmid=%s&fid=%s&pn=%s&ps=%s&order=0&jsonp=jsonp' % (vmid, fid, pn, ps)
+ def bilibili_space_favlist_api(fid, pn=1, ps=20):
+ return 'https://api.bilibili.com/x/v3/fav/resource/list?media_id=%s&pn=%s&ps=%s&order=mtime&type=0&tid=0&jsonp=jsonp' % (fid, pn, ps)
@staticmethod
def bilibili_space_video_api(mid, pn=1, ps=100):
@@ -679,20 +679,22 @@ class Bilibili(VideoExtractor):
elif sort == 'space_favlist':
m = re.match(r'https?://space\.?bilibili\.com/(\d+)/favlist\?.*fid=(\d+)', self.url)
vmid, fid = m.group(1), m.group(2)
- api_url = self.bilibili_space_favlist_api(vmid, fid)
+ api_url = self.bilibili_space_favlist_api(fid)
api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url))
favlist_info = json.loads(api_content)
- pc = favlist_info['data']['pagecount']
-
- for pn in range(1, pc + 1):
- api_url = self.bilibili_space_favlist_api(vmid, fid, pn=pn)
+ pc = favlist_info['data']['info']['media_count'] // len(favlist_info['data']['medias'])
+ if favlist_info['data']['info']['media_count'] % len(favlist_info['data']['medias']) != 0:
+ pc += 1
+ for pn in range(1, pc):
+ log.w('Extracting %s of %s pages ...' % (pn, pc))
+ api_url = self.bilibili_space_favlist_api(fid, pn=pn)
api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url))
favlist_info = json.loads(api_content)
- epn, i = len(favlist_info['data']['archives']), 0
- for video in favlist_info['data']['archives']:
+ epn, i = len(favlist_info['data']['medias']), 0
+ for video in favlist_info['data']['medias']:
i += 1; log.w('Extracting %s of %s videos ...' % (i, epn))
- url = 'https://www.bilibili.com/video/av%s' % video['aid']
+ url = 'https://www.bilibili.com/video/av%s' % video['id']
self.__class__().download_playlist_by_url(url, **kwargs)
elif sort == 'space_video':
From d603266a421f6ee00f24a16cb29063403cee389a Mon Sep 17 00:00:00 2001
From: zhufengning
Date: Sun, 29 Mar 2020 19:31:26 +0800
Subject: [PATCH 43/51] fix wrong range usage
---
src/you_get/extractors/bilibili.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py
index 2152661f..95ce707a 100644
--- a/src/you_get/extractors/bilibili.py
+++ b/src/you_get/extractors/bilibili.py
@@ -685,7 +685,7 @@ class Bilibili(VideoExtractor):
pc = favlist_info['data']['info']['media_count'] // len(favlist_info['data']['medias'])
if favlist_info['data']['info']['media_count'] % len(favlist_info['data']['medias']) != 0:
pc += 1
- for pn in range(1, pc):
+ for pn in range(1, pc + 1):
log.w('Extracting %s of %s pages ...' % (pn, pc))
api_url = self.bilibili_space_favlist_api(fid, pn=pn)
api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url))
From 5c339cc68893fa67cdf2d09163e9c4ad1e85d060 Mon Sep 17 00:00:00 2001
From: Mort Yao
Date: Sun, 29 Mar 2020 22:49:46 +0200
Subject: [PATCH 44/51] [baidu] support https
---
src/you_get/extractors/baidu.py | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/src/you_get/extractors/baidu.py b/src/you_get/extractors/baidu.py
index 77e666b3..521d5e99 100644
--- a/src/you_get/extractors/baidu.py
+++ b/src/you_get/extractors/baidu.py
@@ -112,15 +112,15 @@ def baidu_download(url, output_dir='.', stream_type=None, merge=True, info_only=
time.sleep(5)
download_urls([real_url], title, ext, size,
output_dir, url, merge=merge, faker=True)
- elif re.match(r'http://music.baidu.com/album/\d+', url):
- id = r1(r'http://music.baidu.com/album/(\d+)', url)
+ elif re.match(r'https?://music.baidu.com/album/\d+', url):
+ id = r1(r'https?://music.baidu.com/album/(\d+)', url)
baidu_download_album(id, output_dir, merge, info_only)
- elif re.match('http://music.baidu.com/song/\d+', url):
- id = r1(r'http://music.baidu.com/song/(\d+)', url)
+ elif re.match('https?://music.baidu.com/song/\d+', url):
+ id = r1(r'https?://music.baidu.com/song/(\d+)', url)
baidu_download_song(id, output_dir, merge, info_only)
- elif re.match('http://tieba.baidu.com/', url):
+ elif re.match('https?://tieba.baidu.com/', url):
try:
# embedded videos
embed_download(url, output_dir, merge=merge, info_only=info_only, **kwargs)
From b347b1bb06c2f2aee71ddb1d770d7c1294919cee Mon Sep 17 00:00:00 2001
From: Mort Yao
Date: Mon, 30 Mar 2020 02:40:05 +0200
Subject: [PATCH 45/51] [bilibili] support h
---
src/you_get/extractors/bilibili.py | 26 ++++++++++++++++++++++++++
1 file changed, 26 insertions(+)
diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py
index 95ce707a..f53af468 100644
--- a/src/you_get/extractors/bilibili.py
+++ b/src/you_get/extractors/bilibili.py
@@ -28,6 +28,8 @@ class Bilibili(VideoExtractor):
'container': 'FLV', 'video_resolution': '360p', 'desc': '流畅 360P'},
# 'quality': 15?
{'id': 'mp4', 'quality': 0},
+
+ {'id': 'jpg', 'quality': 0},
]
@staticmethod
@@ -114,6 +116,10 @@ class Bilibili(VideoExtractor):
def bilibili_vc_api(video_id):
return 'https://api.vc.bilibili.com/clip/v1/video/detail?video_id=%s' % video_id
+ @staticmethod
+ def bilibili_h_api(doc_id):
+ return 'https://api.vc.bilibili.com/link_draw/v1/doc/detail?doc_id=%s' % doc_id
+
@staticmethod
def url_size(url, faker=False, headers={},err_value=0):
try:
@@ -161,6 +167,8 @@ class Bilibili(VideoExtractor):
sort = 'vc'
elif re.match(r'https?://(www\.)?bilibili\.com/video/(av(\d+)|(BV(\S+)))', self.url):
sort = 'video'
+ elif re.match(r'https?://h\.?bilibili\.com/(\d+)', self.url):
+ sort = 'h'
else:
self.download_playlist_by_url(self.url, **kwargs)
return
@@ -426,6 +434,24 @@ class Bilibili(VideoExtractor):
self.streams['mp4'] = {'container': container,
'size': size, 'src': [playurl]}
+ # h images
+ elif sort == 'h':
+ m = re.match(r'https?://h\.?bilibili\.com/(\d+)', self.url)
+ doc_id = m.group(1)
+ api_url = self.bilibili_h_api(doc_id)
+ api_content = get_content(api_url, headers=self.bilibili_headers())
+ h_info = json.loads(api_content)
+
+ urls = []
+ for pic in h_info['data']['item']['pictures']:
+ img_src = pic['img_src']
+ urls.append(img_src)
+ size = urls_size(urls)
+
+ self.title = doc_id
+ container = 'jpg' # enforce JPG container
+ self.streams[container] = {'container': container,
+ 'size': size, 'src': urls}
def prepare_by_cid(self,avid,cid,title,html_content,playinfo,playinfo_,url):
#response for interaction video
From 9858e2f25daca32f9205d5be9e3371e387976e2d Mon Sep 17 00:00:00 2001
From: Mort Yao
Date: Mon, 30 Mar 2020 02:43:59 +0200
Subject: [PATCH 46/51] version 0.4.1432
---
src/you_get/version.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/you_get/version.py b/src/you_get/version.py
index ab19c2be..d5004187 100644
--- a/src/you_get/version.py
+++ b/src/you_get/version.py
@@ -1,4 +1,4 @@
#!/usr/bin/env python
script_name = 'you-get'
-__version__ = '0.4.1423'
+__version__ = '0.4.1432'
From bd06317fcc947d2705adaae4ee5e2a21acececc4 Mon Sep 17 00:00:00 2001
From: Mort Yao
Date: Mon, 30 Mar 2020 18:16:58 +0200
Subject: [PATCH 47/51] [README] [bilibili] images and audios supported
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 3105766b..8ec210b7 100644
--- a/README.md
+++ b/README.md
@@ -402,7 +402,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
| **AcFun** | |✓| | |
| **Baidu
百度贴吧** | |✓|✓| |
| 爆米花网 | |✓| | |
-| **bilibili
哔哩哔哩** | |✓| | |
+| **bilibili
哔哩哔哩** | |✓|✓|✓|
| 豆瓣 | |✓| |✓|
| 斗鱼 | |✓| | |
| 凤凰视频 | |✓| | |
From 84a5611939443dbf03e9751a0b33598934647652 Mon Sep 17 00:00:00 2001
From: richard
Date: Tue, 7 Apr 2020 23:45:32 -0400
Subject: [PATCH 48/51] inital
---
README.md | 1 +
src/you_get/common.py | 1 +
src/you_get/extractors/__init__.py | 3 +-
src/you_get/extractors/xinpianchang.py | 46 ++++++++++++++++++++++++++
tests/test.py | 7 +++-
5 files changed, 56 insertions(+), 2 deletions(-)
create mode 100644 src/you_get/extractors/xinpianchang.py
diff --git a/README.md b/README.md
index 8ec210b7..3429f9d8 100644
--- a/README.md
+++ b/README.md
@@ -436,6 +436,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
| 火猫TV | |✓| | |
| 阳光宽频网 | |✓| | |
| 西瓜视频 | |✓| | |
+| 新片场 | |✓| | |
| 快手 | |✓|✓| |
| 抖音 | |✓| | |
| TikTok | |✓| | |
diff --git a/src/you_get/common.py b/src/you_get/common.py
index 8c609d8c..2e4edef5 100755
--- a/src/you_get/common.py
+++ b/src/you_get/common.py
@@ -116,6 +116,7 @@ SITES = {
'xiaokaxiu' : 'yixia',
'xiaojiadianvideo' : 'fc2video',
'ximalaya' : 'ximalaya',
+ 'xinpianchang' : 'xinpianchang',
'yinyuetai' : 'yinyuetai',
'yizhibo' : 'yizhibo',
'youku' : 'youku',
diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py
index ce95904c..4280d236 100755
--- a/src/you_get/extractors/__init__.py
+++ b/src/you_get/extractors/__init__.py
@@ -79,10 +79,11 @@ from .vk import *
from .w56 import *
from .wanmen import *
from .xiami import *
+from .xinpianchang import *
from .yinyuetai import *
from .yixia import *
from .youku import *
from .youtube import *
from .zhanqi import *
from .zhibo import *
-from .zhihu import *
+from .zhihu import *
\ No newline at end of file
diff --git a/src/you_get/extractors/xinpianchang.py b/src/you_get/extractors/xinpianchang.py
new file mode 100644
index 00000000..a15b193a
--- /dev/null
+++ b/src/you_get/extractors/xinpianchang.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+
+import re
+import json
+from ..extractor import VideoExtractor
+from ..common import get_content, playlist_not_supported
+
+
+class Xinpianchang(VideoExtractor):
+ stream_types = [
+ {'id': '4K', 'quality': '超清 4K', 'video_profile': 'mp4-4K'},
+ {'id': '2K', 'quality': '超清 2K', 'video_profile': 'mp4-2K'},
+ {'id': '1080', 'quality': '高清 1080P', 'video_profile': 'mp4-FHD'},
+ {'id': '720', 'quality': '高清 720P', 'video_profile': 'mp4-HD'},
+ {'id': '540', 'quality': '清晰 540P', 'video_profile': 'mp4-SD'},
+ {'id': '360', 'quality': '流畅 360P', 'video_profile': 'mp4-LD'}
+ ]
+
+ name = 'xinpianchang'
+
+ def prepare(self, **kwargs):
+ # find key
+ page_content = get_content(self.url)
+ match_rule = r"vid: \"(.+?)\","
+ key = re.findall(match_rule, page_content)[0]
+
+ # get videos info
+ video_url = 'https://openapi-vtom.vmovier.com/v3/video/' + key + '?expand=resource'
+ data = json.loads(get_content(video_url))
+ self.title = data["data"]["video"]["title"]
+ video_info = data["data"]["resource"]["progressive"]
+
+ # set streams dict
+ for video in video_info:
+ url = video["https_url"]
+ size = video["filesize"]
+ profile = video["profile_code"]
+ stype = [st for st in self.__class__.stream_types if st['video_profile'] == profile][0]
+
+ stream_data = dict(src=[url], size=size, container='mp4', quality=stype['quality'])
+ print(stream_data)
+ self.streams[stype['id']] = stream_data
+
+
+download = Xinpianchang().download_by_url
+download_playlist = playlist_not_supported('xinpianchang')
diff --git a/tests/test.py b/tests/test.py
index 6fd3db6c..5bc0a2e5 100644
--- a/tests/test.py
+++ b/tests/test.py
@@ -8,7 +8,8 @@ from you_get.extractors import (
youtube,
missevan,
acfun,
- bilibili
+ bilibili,
+ xinpianchang
)
@@ -45,5 +46,9 @@ class YouGetTests(unittest.TestCase):
bilibili.download(
"https://www.bilibili.com/watchlater/#/av74906671/p6", info_only=True
)
+
+ def test_xinpianchang(self):
+ imgur.download('https://www.xinpianchang.com/a10673220', info_only=True)
+
if __name__ == '__main__':
unittest.main()
From b771248d23a73c7dc18e2b1ea5bd13247342e456 Mon Sep 17 00:00:00 2001
From: richard
Date: Tue, 7 Apr 2020 23:54:43 -0400
Subject: [PATCH 49/51] fix
---
src/you_get/extractors/xinpianchang.py | 1 -
tests/test.py | 2 +-
2 files changed, 1 insertion(+), 2 deletions(-)
diff --git a/src/you_get/extractors/xinpianchang.py b/src/you_get/extractors/xinpianchang.py
index a15b193a..48830d97 100644
--- a/src/you_get/extractors/xinpianchang.py
+++ b/src/you_get/extractors/xinpianchang.py
@@ -38,7 +38,6 @@ class Xinpianchang(VideoExtractor):
stype = [st for st in self.__class__.stream_types if st['video_profile'] == profile][0]
stream_data = dict(src=[url], size=size, container='mp4', quality=stype['quality'])
- print(stream_data)
self.streams[stype['id']] = stream_data
diff --git a/tests/test.py b/tests/test.py
index 5bc0a2e5..5e4de738 100644
--- a/tests/test.py
+++ b/tests/test.py
@@ -48,7 +48,7 @@ class YouGetTests(unittest.TestCase):
)
def test_xinpianchang(self):
- imgur.download('https://www.xinpianchang.com/a10673220', info_only=True)
+ xinpianchang.download('https://www.xinpianchang.com/a10673220', info_only=True)
if __name__ == '__main__':
unittest.main()
From 4e0ca6f3e4a02d851a51e56ebcff472891ad6a56 Mon Sep 17 00:00:00 2001
From: richard
Date: Tue, 7 Apr 2020 23:55:50 -0400
Subject: [PATCH 50/51] rm test
---
tests/test.py | 7 +------
1 file changed, 1 insertion(+), 6 deletions(-)
diff --git a/tests/test.py b/tests/test.py
index 5e4de738..6fd3db6c 100644
--- a/tests/test.py
+++ b/tests/test.py
@@ -8,8 +8,7 @@ from you_get.extractors import (
youtube,
missevan,
acfun,
- bilibili,
- xinpianchang
+ bilibili
)
@@ -46,9 +45,5 @@ class YouGetTests(unittest.TestCase):
bilibili.download(
"https://www.bilibili.com/watchlater/#/av74906671/p6", info_only=True
)
-
- def test_xinpianchang(self):
- xinpianchang.download('https://www.xinpianchang.com/a10673220', info_only=True)
-
if __name__ == '__main__':
unittest.main()
From 018cfde6048707a8a642493a3dc0e934de2f267e Mon Sep 17 00:00:00 2001
From: Richard Xue
Date: Wed, 8 Apr 2020 00:08:44 -0400
Subject: [PATCH 51/51] Update xinpianchang.py
---
src/you_get/extractors/xinpianchang.py | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/src/you_get/extractors/xinpianchang.py b/src/you_get/extractors/xinpianchang.py
index 48830d97..fac3d01f 100644
--- a/src/you_get/extractors/xinpianchang.py
+++ b/src/you_get/extractors/xinpianchang.py
@@ -7,6 +7,7 @@ from ..common import get_content, playlist_not_supported
class Xinpianchang(VideoExtractor):
+ name = 'xinpianchang'
stream_types = [
{'id': '4K', 'quality': '超清 4K', 'video_profile': 'mp4-4K'},
{'id': '2K', 'quality': '超清 2K', 'video_profile': 'mp4-2K'},
@@ -16,8 +17,6 @@ class Xinpianchang(VideoExtractor):
{'id': '360', 'quality': '流畅 360P', 'video_profile': 'mp4-LD'}
]
- name = 'xinpianchang'
-
def prepare(self, **kwargs):
# find key
page_content = get_content(self.url)