mirror of
https://github.com/soimort/you-get.git
synced 2025-01-23 21:45:02 +03:00
Merge pull request #1376 from cnbeining/fix-bilibili-miniloader
[Bilibili] Change back to extract via interface API with Miniloader's key
This commit is contained in:
commit
a574dcb364
@ -2,17 +2,96 @@
|
||||
|
||||
__all__ = ['bilibili_download']
|
||||
|
||||
import json
|
||||
import re
|
||||
from ..common import *
|
||||
|
||||
def get_srt_xml(cid):
|
||||
return get_html('http://comment.bilibili.com/%s.xml' % cid)
|
||||
from .sina import sina_download_by_vid
|
||||
from .tudou import tudou_download_by_id
|
||||
from .youku import youku_download_by_vid
|
||||
|
||||
def bilibili_download_by_api(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
title = r1(r'cid=(\d+)', url)
|
||||
info = json.loads(get_content(url))
|
||||
urls = [i['url'] for i in info['durl']]
|
||||
import hashlib
|
||||
import re
|
||||
|
||||
appkey = 'f3bb208b3d081dc8'
|
||||
SECRETKEY_MINILOADER = '1c15888dc316e05a15fdd0a02ed6584f'
|
||||
|
||||
def get_srt_xml(id):
|
||||
url = 'http://comment.bilibili.com/%s.xml' % id
|
||||
return get_html(url)
|
||||
|
||||
|
||||
def parse_srt_p(p):
|
||||
fields = p.split(',')
|
||||
assert len(fields) == 8, fields
|
||||
time, mode, font_size, font_color, pub_time, pool, user_id, history = fields
|
||||
time = float(time)
|
||||
|
||||
mode = int(mode)
|
||||
assert 1 <= mode <= 8
|
||||
# mode 1~3: scrolling
|
||||
# mode 4: bottom
|
||||
# mode 5: top
|
||||
# mode 6: reverse?
|
||||
# mode 7: position
|
||||
# mode 8: advanced
|
||||
|
||||
pool = int(pool)
|
||||
assert 0 <= pool <= 2
|
||||
# pool 0: normal
|
||||
# pool 1: srt
|
||||
# pool 2: special?
|
||||
|
||||
font_size = int(font_size)
|
||||
|
||||
font_color = '#%06x' % int(font_color)
|
||||
|
||||
return pool, mode, font_size, font_color
|
||||
|
||||
|
||||
def parse_srt_xml(xml):
|
||||
d = re.findall(r'<d p="([^"]+)">(.*)</d>', xml)
|
||||
for x, y in d:
|
||||
p = parse_srt_p(x)
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
def parse_cid_playurl(xml):
|
||||
from xml.dom.minidom import parseString
|
||||
try:
|
||||
doc = parseString(xml.encode('utf-8'))
|
||||
urls = [durl.getElementsByTagName('url')[0].firstChild.nodeValue for durl in doc.getElementsByTagName('durl')]
|
||||
return urls
|
||||
except:
|
||||
return []
|
||||
|
||||
|
||||
def bilibili_download_by_cids(cids, title, output_dir='.', merge=True, info_only=False):
|
||||
urls = []
|
||||
for cid in cids:
|
||||
sign_this = hashlib.md5(bytes('cid={cid}&from=miniplay&player=1{SECRETKEY_MINILOADER}'.format(cid = cid, SECRETKEY_MINILOADER = SECRETKEY_MINILOADER), 'utf-8')).hexdigest()
|
||||
url = 'http://interface.bilibili.com/playurl?&cid=' + cid + '&from=miniplay&player=1' + '&sign=' + sign_this
|
||||
urls += [i
|
||||
if not re.match(r'.*\.qqvideo\.tc\.qq\.com', i)
|
||||
else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i)
|
||||
for i in parse_cid_playurl(get_content(url))]
|
||||
|
||||
type_ = ''
|
||||
size = 0
|
||||
for url in urls:
|
||||
_, type_, temp = url_info(url)
|
||||
size += temp
|
||||
|
||||
print_info(site_info, title, type_, size)
|
||||
if not info_only:
|
||||
download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge)
|
||||
|
||||
|
||||
def bilibili_download_by_cid(cid, title, output_dir='.', merge=True, info_only=False):
|
||||
sign_this = hashlib.md5(bytes('cid={cid}&from=miniplay&player=1{SECRETKEY_MINILOADER}'.format(cid = cid, SECRETKEY_MINILOADER = SECRETKEY_MINILOADER), 'utf-8')).hexdigest()
|
||||
url = 'http://interface.bilibili.com/playurl?&cid=' + cid + '&from=miniplay&player=1' + '&sign=' + sign_this
|
||||
urls = [i
|
||||
if not re.match(r'.*\.qqvideo\.tc\.qq\.com', i)
|
||||
else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i)
|
||||
for i in parse_cid_playurl(get_content(url))]
|
||||
|
||||
type_ = ''
|
||||
size = 0
|
||||
@ -27,55 +106,82 @@ def bilibili_download_by_api(url, output_dir='.', merge=True, info_only=False, *
|
||||
if not info_only:
|
||||
download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge)
|
||||
|
||||
|
||||
def bilibili_live_download_by_cid(cid, title, output_dir='.', merge=True, info_only=False):
|
||||
api_url = 'http://live.bilibili.com/api/playurl?cid=' + cid
|
||||
urls = parse_cid_playurl(get_content(api_url))
|
||||
|
||||
for url in urls:
|
||||
_, type_, _ = url_info(url)
|
||||
size = 0
|
||||
print_info(site_info, title, type_, size)
|
||||
if not info_only:
|
||||
download_urls([url], title, type_, total_size=None, output_dir=output_dir, merge=merge)
|
||||
|
||||
|
||||
def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
if re.match(r'https?://interface\.bilibili\.com/', url):
|
||||
# quick hack for explicit API
|
||||
bilibili_download_by_api(url, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)
|
||||
return
|
||||
html = get_content(url)
|
||||
|
||||
if re.match(r'https?://bangumi\.bilibili\.com/', url):
|
||||
# quick hack for bangumi URLs
|
||||
html = get_content(url)
|
||||
url = r1(r'"([^"]+)" class="v-av-link"', html)
|
||||
html = get_content(url)
|
||||
|
||||
html = get_content(url)
|
||||
main_title = r1_of([r'<meta name="title" content="\s*([^<>]{1,999})\s*" />',
|
||||
r'<h1[^>]*>\s*([^<>]+)\s*</h1>'], html)
|
||||
cid = r1(r'cid=(\d+)', html)
|
||||
title = r1_of([r'<meta name="title" content="\s*([^<>]{1,999})\s*" />',
|
||||
r'<h1[^>]*>\s*([^<>]+)\s*</h1>'], html)
|
||||
if title:
|
||||
title = unescape_html(title)
|
||||
title = escape_file_path(title)
|
||||
|
||||
aid = r1(r'av(\d+)', url)
|
||||
page = r1(r'index_(\d+)', url)
|
||||
sub_titles = re.findall('<option value=.*>\s*([^<>]+)\s*</option>', html)
|
||||
if page is None and sub_titles: # download all
|
||||
for t in enumerate(sub_titles):
|
||||
page, sub_title = t[0] + 1, t[1]
|
||||
title = main_title + ' - ' + sub_title
|
||||
flashvars = r1_of([r'(cid=\d+)', r'(cid: \d+)', r'flashvars="([^"]+)"',
|
||||
r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
|
||||
assert flashvars
|
||||
flashvars = flashvars.replace(': ', '=')
|
||||
t, cid = flashvars.split('=', 1)
|
||||
cid = cid.split('&')[0]
|
||||
if t == 'cid':
|
||||
if re.match(r'https?://live\.bilibili\.com/', url):
|
||||
title = r1(r'<title>\s*([^<>]+)\s*</title>', html)
|
||||
bilibili_live_download_by_cid(cid, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
|
||||
api = 'http://www.bilibili.com/m/html5?aid=%s&page=%s' % (aid, page)
|
||||
info = json.loads(get_content(api))
|
||||
src = info['src']
|
||||
_, type_, size = url_info(src)
|
||||
print_info(site_info, title, type_, size)
|
||||
if not info_only:
|
||||
download_urls([src], title, type_, total_size=size, output_dir=output_dir, merge=merge)
|
||||
|
||||
else: # download selected
|
||||
if page is None: page = 1
|
||||
sub_title = r1('<option value=.* selected>\s*([^<>]+)\s*</option>', html)
|
||||
if sub_title is None:
|
||||
sub_title = r1('<option value=.*>\s*([^<>]+)\s*</option>', html)
|
||||
if sub_title:
|
||||
title = main_title + ' - ' + sub_title
|
||||
else:
|
||||
title = main_title
|
||||
# multi-P
|
||||
cids = []
|
||||
pages = re.findall('<option value=\'([^\']*)\'', html)
|
||||
titles = re.findall('<option value=.*>\s*([^<>]+)\s*</option>', html)
|
||||
for i, page in enumerate(pages):
|
||||
html = get_html("http://www.bilibili.com%s" % page)
|
||||
flashvars = r1_of([r'(cid=\d+)',
|
||||
r'flashvars="([^"]+)"',
|
||||
r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
|
||||
if flashvars:
|
||||
t, cid = flashvars.split('=', 1)
|
||||
cids.append(cid.split('&')[0])
|
||||
if url.endswith(page):
|
||||
cids = [cid.split('&')[0]]
|
||||
titles = [titles[i]]
|
||||
break
|
||||
|
||||
api = 'http://www.bilibili.com/m/html5?aid=%s&page=%s' % (aid, page)
|
||||
info = json.loads(get_content(api))
|
||||
src = info['src']
|
||||
_, type_, size = url_info(src)
|
||||
print_info(site_info, title, type_, size)
|
||||
if not info_only:
|
||||
download_urls([src], title, type_, total_size=size, output_dir=output_dir, merge=merge)
|
||||
# no multi-P
|
||||
if not pages:
|
||||
cids = [cid]
|
||||
titles = [r1(r'<option value=.* selected>\s*([^<>]+)\s*</option>', html) or title]
|
||||
|
||||
for i in range(len(cids)):
|
||||
bilibili_download_by_cid(cids[i],
|
||||
titles[i],
|
||||
output_dir=output_dir,
|
||||
merge=merge,
|
||||
info_only=info_only)
|
||||
|
||||
elif t == 'vid':
|
||||
sina_download_by_vid(cid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
elif t == 'ykid':
|
||||
youku_download_by_vid(cid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
elif t == 'uid':
|
||||
tudou_download_by_id(cid, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
else:
|
||||
raise NotImplementedError(flashvars)
|
||||
|
||||
if not info_only and not dry_run:
|
||||
if not kwargs['caption']:
|
||||
@ -87,6 +193,7 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs
|
||||
with open(os.path.join(output_dir, title + '.cmt.xml'), 'w', encoding='utf-8') as x:
|
||||
x.write(xml)
|
||||
|
||||
|
||||
site_info = "bilibili.com"
|
||||
download = bilibili_download
|
||||
download_playlist = bilibili_download
|
||||
download_playlist = bilibili_download
|
Loading…
Reference in New Issue
Block a user