diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index b3d50ff7..daae6668 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -15,7 +15,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
- python-version: [3.5, 3.6, 3.7, 3.8, pypy3]
+ python-version: [3.5, 3.6, 3.7, 3.8, 3.9, pypy3]
steps:
- uses: actions/checkout@v2
diff --git a/src/you_get/common.py b/src/you_get/common.py
index 1d352c53..6caf81cb 100755
--- a/src/you_get/common.py
+++ b/src/you_get/common.py
@@ -434,8 +434,17 @@ def get_content(url, headers={}, decoded=True):
req = request.Request(url, headers=headers)
if cookies:
- cookies.add_cookie_header(req)
- req.headers.update(req.unredirected_hdrs)
+ # NOTE: Do not use cookies.add_cookie_header(req)
+ # #HttpOnly_ cookies were not supported by CookieJar and MozillaCookieJar properly until python 3.10
+ # See also:
+ # - https://github.com/python/cpython/pull/17471
+ # - https://bugs.python.org/issue2190
+ # Here we add cookies to the request headers manually
+ cookie_strings = []
+ for cookie in list(cookies):
+ cookie_strings.append(cookie.name + '=' + cookie.value)
+ cookie_headers = {'Cookie': '; '.join(cookie_strings)}
+ req.headers.update(cookie_headers)
response = urlopen_with_retry(req)
data = response.read()
@@ -478,8 +487,17 @@ def post_content(url, headers={}, post_data={}, decoded=True, **kwargs):
req = request.Request(url, headers=headers)
if cookies:
- cookies.add_cookie_header(req)
- req.headers.update(req.unredirected_hdrs)
+ # NOTE: Do not use cookies.add_cookie_header(req)
+ # #HttpOnly_ cookies were not supported by CookieJar and MozillaCookieJar properly until python 3.10
+ # See also:
+ # - https://github.com/python/cpython/pull/17471
+ # - https://bugs.python.org/issue2190
+ # Here we add cookies to the request headers manually
+ cookie_strings = []
+ for cookie in list(cookies):
+ cookie_strings.append(cookie.name + '=' + cookie.value)
+ cookie_headers = {'Cookie': '; '.join(cookie_strings)}
+ req.headers.update(cookie_headers)
if kwargs.get('post_data_raw'):
post_data_enc = bytes(kwargs['post_data_raw'], 'utf-8')
else:
diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py
index a812d72d..edb656c7 100644
--- a/src/you_get/extractors/bilibili.py
+++ b/src/you_get/extractors/bilibili.py
@@ -12,6 +12,8 @@ class Bilibili(VideoExtractor):
# Bilibili media encoding options, in descending quality order.
stream_types = [
+ {'id': 'hdflv2', 'quality': 125, 'audio_quality': 30280,
+ 'container': 'FLV', 'video_resolution': '3840p', 'desc': '真彩 HDR'},
{'id': 'hdflv2_4k', 'quality': 120, 'audio_quality': 30280,
'container': 'FLV', 'video_resolution': '2160p', 'desc': '超清 4K'},
{'id': 'flv_p60', 'quality': 116, 'audio_quality': 30280,
@@ -160,6 +162,11 @@ class Bilibili(VideoExtractor):
self.url = 'https://www.bilibili.com/bangumi/play/ep%s' % ep_id
html_content = get_content(self.url, headers=self.bilibili_headers(referer=self.url))
+ # redirect: s
+ elif re.match(r'https?://(www\.)?bilibili\.com/s/(.+)', self.url):
+ self.url = 'https://www.bilibili.com/%s' % match1(self.url, r'/s/(.+)')
+ html_content = get_content(self.url, headers=self.bilibili_headers())
+
# sort it out
if re.match(r'https?://(www\.)?bilibili\.com/audio/au(\d+)', self.url):
sort = 'audio'
@@ -179,7 +186,7 @@ class Bilibili(VideoExtractor):
self.download_playlist_by_url(self.url, **kwargs)
return
- # regular av video
+ # regular video
if sort == 'video':
initial_state_text = match1(html_content, r'__INITIAL_STATE__=(.*?);\(function\(\)') # FIXME
initial_state = json.loads(initial_state_text)
@@ -599,13 +606,21 @@ class Bilibili(VideoExtractor):
log.e('[Error] Unsupported URL pattern.')
exit(1)
- # regular av video
+ # regular video
if sort == 'video':
initial_state_text = match1(html_content, r'__INITIAL_STATE__=(.*?);\(function\(\)') # FIXME
initial_state = json.loads(initial_state_text)
aid = initial_state['videoData']['aid']
pn = initial_state['videoData']['videos']
- if pn!= len(initial_state['videoData']['pages']):#interaction video 互动视频
+
+ if pn == len(initial_state['videoData']['pages']):
+ # non-interative video
+ for pi in range(1, pn + 1):
+ purl = 'https://www.bilibili.com/video/av%s?p=%s' % (aid, pi)
+ self.__class__().download_by_url(purl, **kwargs)
+
+ else:
+ # interative video
search_node_list = []
download_cid_set = set([initial_state['videoData']['cid']])
params = {
@@ -656,24 +671,6 @@ class Bilibili(VideoExtractor):
self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams]
self.extract(**kwargs)
self.download(**kwargs)
- else:
- playinfo_text = match1(html_content, r'__playinfo__=(.*?)', html)
+ data = re.search(r'window\._sharedData\s*=\s*(.*);', cont)
try:
info = json.loads(data.group(1))
post = info['entry_data']['PostPage'][0]
assert post
except:
# with logged-in cookies
- data = re.search(r'window\.__additionalDataLoaded\(\'[^\']+\',(.*)\);', html)
+ data = re.search(r'window\.__additionalDataLoaded\(\'[^\']+\',(.*)\);', cont)
if data is not None:
log.e('[Warning] Cookies needed.')
post = json.loads(data.group(1))
diff --git a/src/you_get/extractors/iwara.py b/src/you_get/extractors/iwara.py
index 67a41d41..37cd712a 100644
--- a/src/you_get/extractors/iwara.py
+++ b/src/you_get/extractors/iwara.py
@@ -27,6 +27,9 @@ def iwara_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
api_url = video_url + '/api/video/' + video_hash
content = get_content(api_url, headers=headers)
data = json.loads(content)
+ if len(data)<1 :
+ print('Maybe is Private Video?'+'['+title+']')
+ return True;
down_urls = 'https:' + data[0]['uri']
type, ext, size = url_info(down_urls, headers=headers)
print_info(site_info, title+data[0]['resolution'], type, size)
@@ -35,10 +38,8 @@ def iwara_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
download_urls([down_urls], title, ext, size, output_dir, merge=merge, headers=headers)
def download_playlist_by_url( url, **kwargs):
- video_page = get_content(url)
- # url_first=re.findall(r"(http[s]?://[^/]+)",url)
+ video_page = get_html(url)
url_first=match1(url, r"(http[s]?://[^/]+)")
- # print (url_first)
videos = set(re.findall(r'0):
for video in videos:
diff --git a/src/you_get/extractors/miaopai.py b/src/you_get/extractors/miaopai.py
index 0ddcadba..05c1e650 100644
--- a/src/you_get/extractors/miaopai.py
+++ b/src/you_get/extractors/miaopai.py
@@ -19,7 +19,7 @@ fake_headers_mobile = {
def miaopai_download_by_fid(fid, output_dir = '.', merge = False, info_only = False, **kwargs):
'''Source: Android mobile'''
- page_url = 'http://video.weibo.com/show?fid=' + fid + '&type=mp4'
+ page_url = 'https://video.weibo.com/show?fid=' + fid + '&type=mp4'
mobile_page = get_content(page_url, headers=fake_headers_mobile)
url = match1(mobile_page, r'