diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py old mode 100644 new mode 100755 index 4b801e62..88dcfb03 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -33,8 +33,7 @@ class Bilibili(VideoExtractor): {'id': 'flv'}, {'id': 'hdmp4'}, {'id': 'mp4'}, - {'id': 'live'}, - {'id': 'vc'} + {'id': 'live'} ] fmt2qlt = dict(hdflv=4, flv=3, hdmp4=2, mp4=1) @@ -128,8 +127,6 @@ class Bilibili(VideoExtractor): self.bangumi_entry(**kwargs) elif 'live.bilibili.com' in self.url: self.live_entry(**kwargs) - elif 'vc.bilibili.com' in self.url: - self.vc_entry(**kwargs) else: self.entry(**kwargs) @@ -185,48 +182,37 @@ class Bilibili(VideoExtractor): self.streams['live']['container'] = 'flv' self.streams['live']['size'] = 0 - def vc_entry(self, **kwargs): - vc_id = re.search(r'video/(\d+)', self.url) - if not vc_id: - vc_id = re.search(r'vcdetail\?vc=(\d+)', self.url) - if not vc_id: - log.wtf('Unknown url pattern') - endpoint = 'http://api.vc.bilibili.com/clip/v1/video/detail?video_id={}&need_playurl=1'.format(vc_id.group(1)) - vc_meta = json.loads(get_content(endpoint, headers=fake_headers)) - if vc_meta['code'] != 0: - log.wtf('{}\n{}'.format(vc_meta['msg'], vc_meta['message'])) - item = vc_meta['data']['item'] - self.title = item['description'] - - self.streams['vc'] = {} - self.streams['vc']['src'] = [item['video_playurl']] - self.streams['vc']['container'] = 'mp4' - self.streams['vc']['size'] = int(item['video_size']) - def bangumi_entry(self, **kwargs): bangumi_id = re.search(r'(\d+)', self.url).group(1) + tmp_bangumi_id = re.search(r"varseason_id=\"(\d+)\"",re.sub("\s","",self.page)).group(1) + + if int(bangumi_id) != int(tmp_bangumi_id): + bangumi_id = tmp_bangumi_id + bangumi_data = get_bangumi_info(bangumi_id) bangumi_payment = bangumi_data.get('payment') if bangumi_payment and bangumi_payment['price'] != '0': log.w("It's a paid item") - # ep_ids = collect_bangumi_epids(bangumi_data) + ep_ids = collect_bangumi_epids(bangumi_data) frag = urllib.parse.urlparse(self.url).fragment if frag: episode_id = frag else: episode_id = re.search(r'first_ep_id\s*=\s*"(\d+)"', self.page) - # cont = post_content('http://bangumi.bilibili.com/web_api/get_source', post_data=dict(episode_id=episode_id)) - # cid = json.loads(cont)['result']['cid'] + cont = post_content('http://bangumi.bilibili.com/web_api/get_source', post_data=dict(episode_id=episode_id)) + cid = json.loads(cont)['result']['cid'] cont = get_content('http://bangumi.bilibili.com/web_api/episode/{}.json'.format(episode_id)) ep_info = json.loads(cont)['result']['currentEpisode'] - index_title = ep_info['indexTitle'] - long_title = ep_info['longTitle'].strip() - cid = ep_info['danmaku'] + long_title = ep_info['longTitle'] + aid = ep_info['avId'] - self.title = '{} [{} {}]'.format(self.title, index_title, long_title) - print(self.title) + idx = 0 + while ep_ids[idx] != episode_id: + idx += 1 + + self.title = '{} [{} {}]'.format(self.title, idx+1, long_title) self.download_by_vid(cid, bangumi=True, **kwargs) @@ -263,8 +249,12 @@ def fetch_sid(cid, aid): raise def collect_bangumi_epids(json_data): - eps = json_data['episodes'][::-1] - return [ep['episode_id'] for ep in eps] + eps = json_data['result']['episodes'] + eps = sorted(eps, key=lambda item: float(item['index'])) + result = [] + for ep in eps: + result.append(ep['episode_id']) + return result def get_bangumi_info(bangumi_id): BASE_URL = 'http://bangumi.bilibili.com/jsonp/seasoninfo/' @@ -274,7 +264,7 @@ def get_bangumi_info(bangumi_id): season_data = season_data[len('seasonListCallback('):] season_data = season_data[: -1 * len(');')] json_data = json.loads(season_data) - return json_data['result'] + return json_data def get_danmuku_xml(cid): return get_content('http://comment.bilibili.com/{}.xml'.format(cid))