diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index b3d50ff7..daae6668 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.5, 3.6, 3.7, 3.8, pypy3] + python-version: [3.5, 3.6, 3.7, 3.8, 3.9, pypy3] steps: - uses: actions/checkout@v2 diff --git a/src/you_get/common.py b/src/you_get/common.py index 1d352c53..6caf81cb 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -434,8 +434,17 @@ def get_content(url, headers={}, decoded=True): req = request.Request(url, headers=headers) if cookies: - cookies.add_cookie_header(req) - req.headers.update(req.unredirected_hdrs) + # NOTE: Do not use cookies.add_cookie_header(req) + # #HttpOnly_ cookies were not supported by CookieJar and MozillaCookieJar properly until python 3.10 + # See also: + # - https://github.com/python/cpython/pull/17471 + # - https://bugs.python.org/issue2190 + # Here we add cookies to the request headers manually + cookie_strings = [] + for cookie in list(cookies): + cookie_strings.append(cookie.name + '=' + cookie.value) + cookie_headers = {'Cookie': '; '.join(cookie_strings)} + req.headers.update(cookie_headers) response = urlopen_with_retry(req) data = response.read() @@ -478,8 +487,17 @@ def post_content(url, headers={}, post_data={}, decoded=True, **kwargs): req = request.Request(url, headers=headers) if cookies: - cookies.add_cookie_header(req) - req.headers.update(req.unredirected_hdrs) + # NOTE: Do not use cookies.add_cookie_header(req) + # #HttpOnly_ cookies were not supported by CookieJar and MozillaCookieJar properly until python 3.10 + # See also: + # - https://github.com/python/cpython/pull/17471 + # - https://bugs.python.org/issue2190 + # Here we add cookies to the request headers manually + cookie_strings = [] + for cookie in list(cookies): + cookie_strings.append(cookie.name + '=' + cookie.value) + cookie_headers = {'Cookie': '; '.join(cookie_strings)} + req.headers.update(cookie_headers) if kwargs.get('post_data_raw'): post_data_enc = bytes(kwargs['post_data_raw'], 'utf-8') else: diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index a812d72d..edb656c7 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -12,6 +12,8 @@ class Bilibili(VideoExtractor): # Bilibili media encoding options, in descending quality order. stream_types = [ + {'id': 'hdflv2', 'quality': 125, 'audio_quality': 30280, + 'container': 'FLV', 'video_resolution': '3840p', 'desc': '真彩 HDR'}, {'id': 'hdflv2_4k', 'quality': 120, 'audio_quality': 30280, 'container': 'FLV', 'video_resolution': '2160p', 'desc': '超清 4K'}, {'id': 'flv_p60', 'quality': 116, 'audio_quality': 30280, @@ -160,6 +162,11 @@ class Bilibili(VideoExtractor): self.url = 'https://www.bilibili.com/bangumi/play/ep%s' % ep_id html_content = get_content(self.url, headers=self.bilibili_headers(referer=self.url)) + # redirect: s + elif re.match(r'https?://(www\.)?bilibili\.com/s/(.+)', self.url): + self.url = 'https://www.bilibili.com/%s' % match1(self.url, r'/s/(.+)') + html_content = get_content(self.url, headers=self.bilibili_headers()) + # sort it out if re.match(r'https?://(www\.)?bilibili\.com/audio/au(\d+)', self.url): sort = 'audio' @@ -179,7 +186,7 @@ class Bilibili(VideoExtractor): self.download_playlist_by_url(self.url, **kwargs) return - # regular av video + # regular video if sort == 'video': initial_state_text = match1(html_content, r'__INITIAL_STATE__=(.*?);\(function\(\)') # FIXME initial_state = json.loads(initial_state_text) @@ -599,13 +606,21 @@ class Bilibili(VideoExtractor): log.e('[Error] Unsupported URL pattern.') exit(1) - # regular av video + # regular video if sort == 'video': initial_state_text = match1(html_content, r'__INITIAL_STATE__=(.*?);\(function\(\)') # FIXME initial_state = json.loads(initial_state_text) aid = initial_state['videoData']['aid'] pn = initial_state['videoData']['videos'] - if pn!= len(initial_state['videoData']['pages']):#interaction video 互动视频 + + if pn == len(initial_state['videoData']['pages']): + # non-interative video + for pi in range(1, pn + 1): + purl = 'https://www.bilibili.com/video/av%s?p=%s' % (aid, pi) + self.__class__().download_by_url(purl, **kwargs) + + else: + # interative video search_node_list = [] download_cid_set = set([initial_state['videoData']['cid']]) params = { @@ -656,24 +671,6 @@ class Bilibili(VideoExtractor): self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams] self.extract(**kwargs) self.download(**kwargs) - else: - playinfo_text = match1(html_content, r'__playinfo__=(.*?)', html) + data = re.search(r'window\._sharedData\s*=\s*(.*);', cont) try: info = json.loads(data.group(1)) post = info['entry_data']['PostPage'][0] assert post except: # with logged-in cookies - data = re.search(r'window\.__additionalDataLoaded\(\'[^\']+\',(.*)\);', html) + data = re.search(r'window\.__additionalDataLoaded\(\'[^\']+\',(.*)\);', cont) if data is not None: log.e('[Warning] Cookies needed.') post = json.loads(data.group(1)) diff --git a/src/you_get/extractors/iwara.py b/src/you_get/extractors/iwara.py index 67a41d41..37cd712a 100644 --- a/src/you_get/extractors/iwara.py +++ b/src/you_get/extractors/iwara.py @@ -27,6 +27,9 @@ def iwara_download(url, output_dir='.', merge=True, info_only=False, **kwargs): api_url = video_url + '/api/video/' + video_hash content = get_content(api_url, headers=headers) data = json.loads(content) + if len(data)<1 : + print('Maybe is Private Video?'+'['+title+']') + return True; down_urls = 'https:' + data[0]['uri'] type, ext, size = url_info(down_urls, headers=headers) print_info(site_info, title+data[0]['resolution'], type, size) @@ -35,10 +38,8 @@ def iwara_download(url, output_dir='.', merge=True, info_only=False, **kwargs): download_urls([down_urls], title, ext, size, output_dir, merge=merge, headers=headers) def download_playlist_by_url( url, **kwargs): - video_page = get_content(url) - # url_first=re.findall(r"(http[s]?://[^/]+)",url) + video_page = get_html(url) url_first=match1(url, r"(http[s]?://[^/]+)") - # print (url_first) videos = set(re.findall(r'0): for video in videos: diff --git a/src/you_get/extractors/miaopai.py b/src/you_get/extractors/miaopai.py index 0ddcadba..05c1e650 100644 --- a/src/you_get/extractors/miaopai.py +++ b/src/you_get/extractors/miaopai.py @@ -19,7 +19,7 @@ fake_headers_mobile = { def miaopai_download_by_fid(fid, output_dir = '.', merge = False, info_only = False, **kwargs): '''Source: Android mobile''' - page_url = 'http://video.weibo.com/show?fid=' + fid + '&type=mp4' + page_url = 'https://video.weibo.com/show?fid=' + fid + '&type=mp4' mobile_page = get_content(page_url, headers=fake_headers_mobile) url = match1(mobile_page, r'