mirror of
https://github.com/soimort/you-get.git
synced 2025-01-24 14:05:01 +03:00
Merge branch 'fix-bilibili-redirect' of https://github.com/rosynirvana/you-get into rosynirvana-fix-bilibili-redirect
This commit is contained in:
commit
50d34d2d99
@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from .common import match1, maybe_print, download_urls, get_filename, parse_host, set_proxy, unset_proxy
|
||||
from .common import match1, maybe_print, download_urls, get_filename, parse_host, set_proxy, unset_proxy, get_content, dry_run
|
||||
from .common import print_more_compatible as print
|
||||
from .util import log
|
||||
from . import json_output
|
||||
@ -28,6 +28,10 @@ class VideoExtractor():
|
||||
self.password_protected = False
|
||||
self.dash_streams = {}
|
||||
self.caption_tracks = {}
|
||||
self.out = False
|
||||
self.ua = None
|
||||
self.referer = None
|
||||
self.danmuku = None
|
||||
|
||||
if args:
|
||||
self.url = args[0]
|
||||
@ -39,6 +43,8 @@ class VideoExtractor():
|
||||
if 'extractor_proxy' in kwargs and kwargs['extractor_proxy']:
|
||||
set_proxy(parse_host(kwargs['extractor_proxy']))
|
||||
self.prepare(**kwargs)
|
||||
if self.out:
|
||||
return
|
||||
if 'extractor_proxy' in kwargs and kwargs['extractor_proxy']:
|
||||
unset_proxy()
|
||||
|
||||
@ -99,7 +105,8 @@ class VideoExtractor():
|
||||
print(" quality: %s" % stream['quality'])
|
||||
|
||||
if 'size' in stream and stream['container'].lower() != 'm3u8':
|
||||
print(" size: %s MiB (%s bytes)" % (round(stream['size'] / 1048576, 1), stream['size']))
|
||||
if stream['size'] != float('inf') and stream['size'] != 0:
|
||||
print(" size: %s MiB (%s bytes)" % (round(stream['size'] / 1048576, 1), stream['size']))
|
||||
|
||||
if 'itag' in stream:
|
||||
print(" # download-with: %s" % log.sprint("you-get --itag=%s [URL]" % stream_id, log.UNDERLINE))
|
||||
@ -202,12 +209,17 @@ class VideoExtractor():
|
||||
if not urls:
|
||||
log.wtf('[Failed] Cannot extract video source.')
|
||||
# For legacy main()
|
||||
download_urls(urls, self.title, ext, total_size,
|
||||
headers = {}
|
||||
if self.ua is not None:
|
||||
headers['User-Agent'] = self.ua
|
||||
if self.referer is not None:
|
||||
headers['Referer'] = self.referer
|
||||
download_urls(urls, self.title, ext, total_size, headers=headers,
|
||||
output_dir=kwargs['output_dir'],
|
||||
merge=kwargs['merge'],
|
||||
av=stream_id in self.dash_streams)
|
||||
if 'caption' not in kwargs or not kwargs['caption']:
|
||||
print('Skipping captions.')
|
||||
print('Skipping captions or danmuku.')
|
||||
return
|
||||
for lang in self.caption_tracks:
|
||||
filename = '%s.%s.srt' % (get_filename(self.title), lang)
|
||||
@ -217,6 +229,11 @@ class VideoExtractor():
|
||||
'w', encoding='utf-8') as x:
|
||||
x.write(srt)
|
||||
print('Done.')
|
||||
if self.danmuku is not None and not dry_run:
|
||||
filename = '{}.cmt.xml'.format(get_filename(self.title))
|
||||
print('Downloading {} ...\n'.format(filename))
|
||||
with open(os.path.join(kwargs['output_dir'], filename), 'w', encoding='utf8') as fp:
|
||||
fp.write(self.danmuku)
|
||||
|
||||
# For main_dev()
|
||||
#download_urls(urls, self.title, self.streams[stream_id]['container'], self.streams[stream_id]['size'])
|
||||
|
@ -2,210 +2,315 @@
|
||||
|
||||
__all__ = ['bilibili_download']
|
||||
|
||||
from ..common import *
|
||||
import hashlib
|
||||
import re
|
||||
import time
|
||||
import json
|
||||
import http.cookiejar
|
||||
import urllib.request
|
||||
import urllib.parse
|
||||
from xml.dom.minidom import parseString
|
||||
|
||||
from ..common import *
|
||||
from ..util.log import *
|
||||
from ..extractor import *
|
||||
|
||||
from .qq import qq_download_by_vid
|
||||
from .sina import sina_download_by_vid
|
||||
from .tudou import tudou_download_by_id
|
||||
from .youku import youku_download_by_vid
|
||||
|
||||
import hashlib
|
||||
import re
|
||||
class Bilibili(VideoExtractor):
|
||||
name = 'Bilibili'
|
||||
live_api = 'http://live.bilibili.com/api/playurl?cid={}&otype=json'
|
||||
api_url = 'http://interface.bilibili.com/playurl?'
|
||||
bangumi_api_url = 'http://bangumi.bilibili.com/player/web_api/playurl?'
|
||||
|
||||
SEC1 = '1c15888dc316e05a15fdd0a02ed6584f'
|
||||
SEC2 = '9b288147e5474dd2aa67085f716c560d'
|
||||
stream_types = [
|
||||
{'id': 'hdflv'},
|
||||
{'id': 'flv'},
|
||||
{'id': 'hdmp4'},
|
||||
{'id': 'mp4'},
|
||||
{'id': 'live'}
|
||||
]
|
||||
fmt2qlt = dict(hdflv=4, flv=3, hdmp4=2, mp4=1)
|
||||
|
||||
appkey = 'f3bb208b3d081dc8'
|
||||
SECRETKEY_MINILOADER = '1c15888dc316e05a15fdd0a02ed6584f'
|
||||
@staticmethod
|
||||
def bilibili_stream_type(urls):
|
||||
url = urls[0]
|
||||
if 'hd.flv?' in url:
|
||||
return 'hdflv', 'flv'
|
||||
if '.flv?' in url:
|
||||
return 'flv', 'flv'
|
||||
if 'hd.mp4?' in url:
|
||||
return 'hdmp4', 'mp4'
|
||||
if '.mp4?' in url:
|
||||
return 'mp4', 'mp4'
|
||||
raise Exception('Unknown stream type')
|
||||
|
||||
def get_srt_xml(id):
|
||||
url = 'http://comment.bilibili.com/%s.xml' % id
|
||||
return get_html(url)
|
||||
def api_req(self, cid, quality, bangumi):
|
||||
ts = str(int(time.time()))
|
||||
if not bangumi:
|
||||
params_str = 'cid={}&player=1&quality={}&ts={}'.format(cid, quality, ts)
|
||||
chksum = hashlib.md5(bytes(params_str+self.SEC1, 'utf8')).hexdigest()
|
||||
api_url = self.api_url + params_str + '&sign=' + chksum
|
||||
else:
|
||||
params_str = 'cid={}&module=bangumi&player=1&quality={}&ts={}'.format(cid, quality, ts)
|
||||
chksum = hashlib.md5(bytes(params_str+self.SEC2, 'utf8')).hexdigest()
|
||||
api_url = self.bangumi_api_url + params_str + '&sign=' + chksum
|
||||
|
||||
xml_str = get_content(api_url)
|
||||
return xml_str
|
||||
|
||||
def parse_bili_xml(self, xml_str):
|
||||
urls_list = []
|
||||
total_size = 0
|
||||
doc = parseString(xml_str.encode('utf8'))
|
||||
durls = doc.getElementsByTagName('durl')
|
||||
for durl in durls:
|
||||
size = durl.getElementsByTagName('size')[0]
|
||||
total_size += int(size.firstChild.nodeValue)
|
||||
url = durl.getElementsByTagName('url')[0]
|
||||
urls_list.append(url.firstChild.nodeValue)
|
||||
stream_type, container = self.bilibili_stream_type(urls_list)
|
||||
if stream_type not in self.streams:
|
||||
self.streams[stream_type] = {}
|
||||
self.streams[stream_type]['src'] = urls_list
|
||||
self.streams[stream_type]['size'] = total_size
|
||||
self.streams[stream_type]['container'] = container
|
||||
|
||||
def download_by_vid(self, cid, bangumi, **kwargs):
|
||||
stream_id = kwargs.get('stream_id')
|
||||
# guard here. if stream_id invalid, fallback as not stream_id
|
||||
if stream_id and stream_id in self.fmt2qlt:
|
||||
quality = stream_id
|
||||
else:
|
||||
quality = 'hdflv' if bangumi else 'flv'
|
||||
|
||||
info_only = kwargs.get('info_only')
|
||||
if not info_only or stream_id:
|
||||
# won't be None
|
||||
qlt = self.fmt2qlt.get(quality)
|
||||
api_xml = self.api_req(cid, qlt, bangumi)
|
||||
self.parse_bili_xml(api_xml)
|
||||
self.danmuku = get_danmuku_xml(cid)
|
||||
else:
|
||||
for qlt in range(4, 0, -1):
|
||||
api_xml = self.api_req(cid, qlt, bangumi)
|
||||
self.parse_bili_xml(api_xml)
|
||||
|
||||
def prepare(self, **kwargs):
|
||||
self.ua = fake_headers['User-Agent']
|
||||
self.url = url_locations([self.url])[0]
|
||||
frag = urllib.parse.urlparse(self.url).fragment
|
||||
# http://www.bilibili.com/video/av3141144/index_2.html#page=3
|
||||
if frag:
|
||||
hit = re.search(r'page=(\d+)', frag)
|
||||
if hit is not None:
|
||||
page = hit.group(1)
|
||||
aid = re.search(r'av(\d+)', self.url).group(1)
|
||||
self.url = 'http://www.bilibili.com/video/av{}/index_{}.html'.format(aid, page)
|
||||
self.referer = self.url
|
||||
self.page = get_content(self.url)
|
||||
try:
|
||||
self.title = re.search(r'<h1\s*title="([^"]+)"', self.page).group(1)
|
||||
if 'subtitle' in kwargs:
|
||||
subtitle = kwargs['subtitle']
|
||||
self.title = '{} {}'.format(self.title, subtitle)
|
||||
except Exception:
|
||||
pass
|
||||
if 'bangumi.bilibili.com/movie' in self.url:
|
||||
self.movie_entry(**kwargs)
|
||||
elif 'bangumi.bilibili.com' in self.url:
|
||||
self.bangumi_entry(**kwargs)
|
||||
elif 'live.bilibili.com' in self.url:
|
||||
self.live_entry(**kwargs)
|
||||
else:
|
||||
self.entry(**kwargs)
|
||||
|
||||
def movie_entry(self, **kwargs):
|
||||
patt = r"var\s*aid\s*=\s*'(\d+)'"
|
||||
aid = re.search(patt, self.page).group(1)
|
||||
page_list = json.loads(get_content('http://www.bilibili.com/widget/getPageList?aid={}'.format(aid)))
|
||||
self.title = page_list[0]['pagename']
|
||||
# False for is_bangumi, old interface works for all free items
|
||||
self.download_by_vid(page_list[0]['cid'], False, **kwargs)
|
||||
|
||||
def entry(self, **kwargs):
|
||||
# tencent player
|
||||
tc_flashvars = re.search(r'"bili-cid=\d+&bili-aid=\d+&vid=([^"]+)"', self.page)
|
||||
if tc_flashvars:
|
||||
tc_flashvars = tc_flashvars.group(1)
|
||||
if tc_flashvars is not None:
|
||||
self.out = True
|
||||
qq_download_by_vid(tc_flashvars, self.title, output_dir=kwargs['output_dir'], merge=kwargs['merge'], info_only=kwargs['info_only'])
|
||||
return
|
||||
|
||||
cid = re.search(r'cid=(\d+)', self.page).group(1)
|
||||
if cid is not None:
|
||||
self.download_by_vid(cid, False, **kwargs)
|
||||
else:
|
||||
# flashvars?
|
||||
flashvars = re.search(r'flashvars="([^"]+)"', self.page).group(1)
|
||||
if flashvars is None:
|
||||
raise Exception('Unsupported page {}'.format(self.url))
|
||||
param = flashvars.split('&')[0]
|
||||
t, cid = param.split('=')
|
||||
t = t.strip()
|
||||
cid = cid.strip()
|
||||
if t == 'vid':
|
||||
sina_download_by_vid(cid, self.title, output_dir=kwargs['output_dir'], merge=kwargs['merge'], info_only=kwargs['info_only'])
|
||||
elif t == 'ykid':
|
||||
youku_download_by_vid(cid, self.title, output_dir=kwargs['output_dir'], merge=kwargs['merge'], info_only=kwargs['info_only'])
|
||||
elif t == 'uid':
|
||||
tudou_download_by_id(cid, self.title, output_dir=kwargs['output_dir'], merge=kwargs['merge'], info_only=kwargs['info_only'])
|
||||
else:
|
||||
raise NotImplementedError('Unknown flashvars {}'.format(flashvars))
|
||||
return
|
||||
|
||||
def live_entry(self, **kwargs):
|
||||
self.title = re.search(r'<title>([^<]+)', self.page).group(1)
|
||||
self.room_id = re.search('ROOMID\s*=\s*(\d+)', self.page).group(1)
|
||||
api_url = self.live_api.format(self.room_id)
|
||||
json_data = json.loads(get_content(api_url))
|
||||
urls = [json_data['durl'][0]['url']]
|
||||
|
||||
self.streams['live'] = {}
|
||||
self.streams['live']['src'] = urls
|
||||
self.streams['live']['container'] = 'flv'
|
||||
self.streams['live']['size'] = 0
|
||||
|
||||
def bangumi_entry(self, **kwargs):
|
||||
bangumi_id = re.search(r'(\d+)', self.url).group(1)
|
||||
bangumi_data = get_bangumi_info(bangumi_id)
|
||||
bangumi_payment = bangumi_data.get('payment')
|
||||
if bangumi_payment and bangumi_payment['price'] != '0':
|
||||
log.w("It's a paid item")
|
||||
ep_ids = collect_bangumi_epids(bangumi_data)
|
||||
|
||||
frag = urllib.parse.urlparse(self.url).fragment
|
||||
if frag:
|
||||
episode_id = frag
|
||||
else:
|
||||
episode_id = re.search(r'first_ep_id\s*=\s*"(\d+)"', self.page)
|
||||
cont = post_content('http://bangumi.bilibili.com/web_api/get_source', post_data=dict(episode_id=episode_id))
|
||||
cid = json.loads(cont)['result']['cid']
|
||||
cont = get_content('http://bangumi.bilibili.com/web_api/episode/{}.json'.format(episode_id))
|
||||
ep_info = json.loads(cont)['result']['currentEpisode']
|
||||
|
||||
long_title = ep_info['longTitle']
|
||||
aid = ep_info['avId']
|
||||
|
||||
idx = 0
|
||||
while ep_ids[idx] != episode_id:
|
||||
idx += 1
|
||||
|
||||
self.title = '{} [{} {}]'.format(self.title, idx+1, long_title)
|
||||
self.download_by_vid(cid, bangumi=True, **kwargs)
|
||||
|
||||
|
||||
def parse_srt_p(p):
|
||||
fields = p.split(',')
|
||||
assert len(fields) == 8, fields
|
||||
time, mode, font_size, font_color, pub_time, pool, user_id, history = fields
|
||||
time = float(time)
|
||||
def check_oversea():
|
||||
url = 'https://interface.bilibili.com/player?id=cid:17778881'
|
||||
xml_lines = get_content(url).split('\n')
|
||||
for line in xml_lines:
|
||||
key = line.split('>')[0][1:]
|
||||
if key == 'country':
|
||||
value = line.split('>')[1].split('<')[0]
|
||||
if value != '中国':
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
return False
|
||||
|
||||
mode = int(mode)
|
||||
assert 1 <= mode <= 8
|
||||
# mode 1~3: scrolling
|
||||
# mode 4: bottom
|
||||
# mode 5: top
|
||||
# mode 6: reverse?
|
||||
# mode 7: position
|
||||
# mode 8: advanced
|
||||
def check_sid():
|
||||
if not cookies:
|
||||
return False
|
||||
for cookie in cookies:
|
||||
if cookie.domain == '.bilibili.com' and cookie.name == 'sid':
|
||||
return True
|
||||
return False
|
||||
|
||||
pool = int(pool)
|
||||
assert 0 <= pool <= 2
|
||||
# pool 0: normal
|
||||
# pool 1: srt
|
||||
# pool 2: special?
|
||||
def fetch_sid(cid, aid):
|
||||
url = 'http://interface.bilibili.com/player?id=cid:{}&aid={}'.format(cid, aid)
|
||||
cookies = http.cookiejar.CookieJar()
|
||||
req = urllib.request.Request(url)
|
||||
res = urllib.request.urlopen(url)
|
||||
cookies.extract_cookies(res, req)
|
||||
for c in cookies:
|
||||
if c.domain == '.bilibili.com' and c.name == 'sid':
|
||||
return c.value
|
||||
raise
|
||||
|
||||
font_size = int(font_size)
|
||||
def collect_bangumi_epids(json_data):
|
||||
eps = json_data['result']['episodes']
|
||||
eps = sorted(eps, key=lambda item: int(item['index']))
|
||||
result = []
|
||||
for ep in eps:
|
||||
result.append(ep['episode_id'])
|
||||
return result
|
||||
|
||||
font_color = '#%06x' % int(font_color)
|
||||
|
||||
return pool, mode, font_size, font_color
|
||||
|
||||
|
||||
def parse_srt_xml(xml):
|
||||
d = re.findall(r'<d p="([^"]+)">(.*)</d>', xml)
|
||||
for x, y in d:
|
||||
p = parse_srt_p(x)
|
||||
raise NotImplementedError()
|
||||
def get_bangumi_info(bangumi_id):
|
||||
BASE_URL = 'http://bangumi.bilibili.com/jsonp/seasoninfo/'
|
||||
long_epoch = int(time.time() * 1000)
|
||||
req_url = BASE_URL + bangumi_id + '.ver?callback=seasonListCallback&jsonp=jsonp&_=' + str(long_epoch)
|
||||
season_data = get_content(req_url)
|
||||
season_data = season_data[len('seasonListCallback('):]
|
||||
season_data = season_data[: -1 * len(');')]
|
||||
json_data = json.loads(season_data)
|
||||
return json_data
|
||||
|
||||
def get_danmuku_xml(cid):
|
||||
return get_content('http://comment.bilibili.com/{}.xml'.format(cid))
|
||||
|
||||
def parse_cid_playurl(xml):
|
||||
from xml.dom.minidom import parseString
|
||||
try:
|
||||
urls_list = []
|
||||
total_size = 0
|
||||
doc = parseString(xml.encode('utf-8'))
|
||||
urls = [durl.getElementsByTagName('url')[0].firstChild.nodeValue for durl in doc.getElementsByTagName('durl')]
|
||||
return urls
|
||||
except:
|
||||
return []
|
||||
durls = doc.getElementsByTagName('durl')
|
||||
cdn_cnt = len(durls[0].getElementsByTagName('url'))
|
||||
for i in range(cdn_cnt):
|
||||
urls_list.append([])
|
||||
for durl in durls:
|
||||
size = durl.getElementsByTagName('size')[0]
|
||||
total_size += int(size.firstChild.nodeValue)
|
||||
cnt = len(durl.getElementsByTagName('url'))
|
||||
for i in range(cnt):
|
||||
u = durl.getElementsByTagName('url')[i].firstChild.nodeValue
|
||||
urls_list[i].append(u)
|
||||
return urls_list, total_size
|
||||
except Exception as e:
|
||||
log.w(e)
|
||||
return [], 0
|
||||
|
||||
def bilibili_download_playlist_by_url(url, **kwargs):
|
||||
url = url_locations([url])[0]
|
||||
# a bangumi here? possible?
|
||||
if 'live.bilibili' in url:
|
||||
site.download_by_url(url)
|
||||
elif 'bangumi.bilibili' in url:
|
||||
bangumi_id = re.search(r'(\d+)', url).group(1)
|
||||
bangumi_data = get_bangumi_info(bangumi_id)
|
||||
ep_ids = collect_bangumi_epids(bangumi_data)
|
||||
|
||||
def bilibili_download_by_cids(cids, title, output_dir='.', merge=True, info_only=False):
|
||||
urls = []
|
||||
for cid in cids:
|
||||
sign_this = hashlib.md5(bytes('cid={cid}&from=miniplay&player=1{SECRETKEY_MINILOADER}'.format(cid = cid, SECRETKEY_MINILOADER = SECRETKEY_MINILOADER), 'utf-8')).hexdigest()
|
||||
url = 'http://interface.bilibili.com/playurl?&cid=' + cid + '&from=miniplay&player=1' + '&sign=' + sign_this
|
||||
urls += [i
|
||||
if not re.match(r'.*\.qqvideo\.tc\.qq\.com', i)
|
||||
else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i)
|
||||
for i in parse_cid_playurl(get_content(url))]
|
||||
|
||||
type_ = ''
|
||||
size = 0
|
||||
for url in urls:
|
||||
_, type_, temp = url_info(url)
|
||||
size += temp
|
||||
|
||||
print_info(site_info, title, type_, size)
|
||||
if not info_only:
|
||||
download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge, headers={'Referer': 'http://www.bilibili.com/'})
|
||||
|
||||
|
||||
def bilibili_download_by_cid(cid, title, output_dir='.', merge=True, info_only=False):
|
||||
while True:
|
||||
try:
|
||||
sign_this = hashlib.md5(bytes('cid={cid}&from=miniplay&player=1{SECRETKEY_MINILOADER}'.format(cid = cid, SECRETKEY_MINILOADER = SECRETKEY_MINILOADER), 'utf-8')).hexdigest()
|
||||
url = 'http://interface.bilibili.com/playurl?&cid=' + cid + '&from=miniplay&player=1' + '&sign=' + sign_this
|
||||
urls = [i
|
||||
if not re.match(r'.*\.qqvideo\.tc\.qq\.com', i)
|
||||
else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i)
|
||||
for i in parse_cid_playurl(get_content(url))]
|
||||
|
||||
type_ = ''
|
||||
size = 0
|
||||
for url in urls:
|
||||
_, type_, temp = url_info(url, headers={'Referer': 'http://www.bilibili.com/'})
|
||||
size += temp or 0
|
||||
|
||||
print_info(site_info, title, type_, size)
|
||||
if not info_only:
|
||||
download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge, timeout=1, headers={'Referer': 'http://www.bilibili.com/'})
|
||||
except socket.timeout:
|
||||
continue
|
||||
else:
|
||||
break
|
||||
|
||||
|
||||
def bilibili_live_download_by_cid(cid, title, output_dir='.', merge=True, info_only=False):
|
||||
api_url = 'http://live.bilibili.com/api/playurl?cid=' + cid
|
||||
urls = parse_cid_playurl(get_content(api_url))
|
||||
|
||||
for url in urls:
|
||||
_, type_, _ = url_info(url)
|
||||
size = 0
|
||||
print_info(site_info, title, type_, size)
|
||||
if not info_only:
|
||||
download_urls([url], title, type_, total_size=None, output_dir=output_dir, merge=merge)
|
||||
|
||||
|
||||
def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
html = get_content(url)
|
||||
|
||||
title = r1_of([r'<meta name="title" content="\s*([^<>]{1,999})\s*" />',
|
||||
r'<h1[^>]*>\s*([^<>]+)\s*</h1>'], html)
|
||||
if title:
|
||||
title = unescape_html(title)
|
||||
title = escape_file_path(title)
|
||||
|
||||
if re.match(r'https?://bangumi\.bilibili\.com/', url):
|
||||
# quick hack for bangumi URLs
|
||||
episode_id = r1(r'#(\d+)$', url) or r1(r'first_ep_id = "(\d+)"', html)
|
||||
cont = post_content('http://bangumi.bilibili.com/web_api/get_source',
|
||||
post_data={'episode_id': episode_id})
|
||||
cid = json.loads(cont)['result']['cid']
|
||||
title = '%s [%s]' % (title, episode_id)
|
||||
bilibili_download_by_cid(str(cid), title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
|
||||
base_url = url.split('#')[0]
|
||||
for ep_id in ep_ids:
|
||||
ep_url = '#'.join([base_url, ep_id])
|
||||
Bilibili().download_by_url(ep_url, **kwargs)
|
||||
else:
|
||||
flashvars = r1_of([r'(cid=\d+)', r'(cid: \d+)', r'flashvars="([^"]+)"',
|
||||
r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
|
||||
assert flashvars
|
||||
flashvars = flashvars.replace(': ', '=')
|
||||
t, cid = flashvars.split('=', 1)
|
||||
cid = cid.split('&')[0]
|
||||
if t == 'cid':
|
||||
if re.match(r'https?://live\.bilibili\.com/', url):
|
||||
title = r1(r'<title>\s*([^<>]+)\s*</title>', html)
|
||||
bilibili_live_download_by_cid(cid, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
aid = re.search(r'av(\d+)', url).group(1)
|
||||
page_list = json.loads(get_content('http://www.bilibili.com/widget/getPageList?aid={}'.format(aid)))
|
||||
page_cnt = len(page_list)
|
||||
for no in range(1, page_cnt+1):
|
||||
page_url = 'http://www.bilibili.com/video/av{}/index_{}.html'.format(aid, no)
|
||||
subtitle = page_list[no-1]['pagename']
|
||||
Bilibili().download_by_url(page_url, subtitle=subtitle, **kwargs)
|
||||
|
||||
else:
|
||||
# multi-P
|
||||
cids = []
|
||||
pages = re.findall('<option value=\'([^\']*)\'', html)
|
||||
titles = re.findall('<option value=.*>\s*([^<>]+)\s*</option>', html)
|
||||
for i, page in enumerate(pages):
|
||||
html = get_html("http://www.bilibili.com%s" % page)
|
||||
flashvars = r1_of([r'(cid=\d+)',
|
||||
r'flashvars="([^"]+)"',
|
||||
r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
|
||||
if flashvars:
|
||||
t, cid = flashvars.split('=', 1)
|
||||
cids.append(cid.split('&')[0])
|
||||
if url.endswith(page):
|
||||
cids = [cid.split('&')[0]]
|
||||
titles = [titles[i]]
|
||||
break
|
||||
site = Bilibili()
|
||||
download = site.download_by_url
|
||||
download_playlist = bilibili_download_playlist_by_url
|
||||
|
||||
# no multi-P
|
||||
if not pages:
|
||||
cids = [cid]
|
||||
titles = [r1(r'<option value=.* selected>\s*([^<>]+)\s*</option>', html) or title]
|
||||
for i in range(len(cids)):
|
||||
completeTitle=None
|
||||
if (title == titles[i]):
|
||||
completeTitle=title
|
||||
else:
|
||||
completeTitle=title+"-"+titles[i]#Build Better Title
|
||||
bilibili_download_by_cid(cids[i],
|
||||
completeTitle,
|
||||
output_dir=output_dir,
|
||||
merge=merge,
|
||||
info_only=info_only)
|
||||
|
||||
elif t == 'vid':
|
||||
sina_download_by_vid(cid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
elif t == 'ykid':
|
||||
youku_download_by_vid(cid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
elif t == 'uid':
|
||||
tudou_download_by_id(cid, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
else:
|
||||
raise NotImplementedError(flashvars)
|
||||
|
||||
if not info_only and not dry_run:
|
||||
if not kwargs['caption']:
|
||||
print('Skipping danmaku.')
|
||||
return
|
||||
title = get_filename(title)
|
||||
print('Downloading %s ...\n' % (title + '.cmt.xml'))
|
||||
xml = get_srt_xml(cid)
|
||||
with open(os.path.join(output_dir, title + '.cmt.xml'), 'w', encoding='utf-8') as x:
|
||||
x.write(xml)
|
||||
|
||||
|
||||
site_info = "bilibili.com"
|
||||
download = bilibili_download
|
||||
download_playlist = bilibili_download
|
||||
bilibili_download = download
|
||||
|
Loading…
Reference in New Issue
Block a user