mirror of
https://github.com/soimort/you-get.git
synced 2025-02-02 16:24:00 +03:00
add format selection for AcFun
This commit is contained in:
parent
00e2ce3f48
commit
5c9ec6c4f3
@ -1,168 +1,213 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['acfun_download']
|
||||
|
||||
from ..common import *
|
||||
from ..extractor import VideoExtractor
|
||||
|
||||
from .le import letvcloud_download_by_vu
|
||||
from .qq import qq_download_by_vid
|
||||
from .sina import sina_download_by_vid
|
||||
from .tudou import tudou_download_by_iid
|
||||
from .youku import youku_download_by_vid
|
||||
class AcFun(VideoExtractor):
|
||||
name = "AcFun"
|
||||
|
||||
import json
|
||||
import re
|
||||
import base64
|
||||
import time
|
||||
stream_types = [
|
||||
{'id': '2160P', 'qualityType': '2160p'},
|
||||
{'id': '1080P60', 'qualityType': '1080p60'},
|
||||
{'id': '720P60', 'qualityType': '720p60'},
|
||||
{'id': '1080P+', 'qualityType': '1080p+'},
|
||||
{'id': '1080P', 'qualityType': '1080p'},
|
||||
{'id': '720P', 'qualityType': '720p'},
|
||||
{'id': '540P', 'qualityType': '540p'},
|
||||
{'id': '360P', 'qualityType': '360p'}
|
||||
]
|
||||
|
||||
def get_srt_json(id):
|
||||
url = 'http://danmu.aixifan.com/V2/%s' % id
|
||||
return get_content(url)
|
||||
def prepare(self, **kwargs):
|
||||
assert re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/(\D|bangumi)/\D\D(\d+)', self.url)
|
||||
|
||||
def youku_acfun_proxy(vid, sign, ref):
|
||||
endpoint = 'http://player.acfun.cn/flash_data?vid={}&ct=85&ev=3&sign={}&time={}'
|
||||
url = endpoint.format(vid, sign, str(int(time.time() * 1000)))
|
||||
json_data = json.loads(get_content(url, headers=dict(referer=ref)))['data']
|
||||
enc_text = base64.b64decode(json_data)
|
||||
dec_text = rc4(b'8bdc7e1a', enc_text).decode('utf8')
|
||||
youku_json = json.loads(dec_text)
|
||||
if re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/\D/\D\D(\d+)', self.url):
|
||||
html = get_content(self.url, headers=fake_headers)
|
||||
json_text = match1(html, r"(?s)videoInfo\s*=\s*(\{.*?\});")
|
||||
json_data = json.loads(json_text)
|
||||
vid = json_data.get('currentVideoInfo').get('id')
|
||||
up = json_data.get('user').get('name')
|
||||
self.title = json_data.get('title')
|
||||
video_list = json_data.get('videoList')
|
||||
if len(video_list) > 1:
|
||||
self.title += " - " + [p.get('title') for p in video_list if p.get('id') == vid][0]
|
||||
currentVideoInfo = json_data.get('currentVideoInfo')
|
||||
|
||||
elif re.match("https?://[^\.]*\.*acfun\.[^\.]+/bangumi/aa(\d+)", self.url):
|
||||
html = get_content(self.url, headers=fake_headers)
|
||||
tag_script = match1(html, r'<script>\s*window\.pageInfo([^<]+)</script>')
|
||||
json_text = tag_script[tag_script.find('{') : tag_script.find('};') + 1]
|
||||
json_data = json.loads(json_text)
|
||||
self.title = json_data['bangumiTitle'] + " " + json_data['episodeName'] + " " + json_data['title']
|
||||
vid = str(json_data['videoId'])
|
||||
up = "acfun"
|
||||
currentVideoInfo = json_data.get('currentVideoInfo')
|
||||
|
||||
yk_streams = {}
|
||||
for stream in youku_json['stream']:
|
||||
tp = stream['stream_type']
|
||||
yk_streams[tp] = [], stream['total_size']
|
||||
if stream.get('segs'):
|
||||
for seg in stream['segs']:
|
||||
yk_streams[tp][0].append(seg['url'])
|
||||
else:
|
||||
yk_streams[tp] = stream['m3u8'], stream['total_size']
|
||||
raise NotImplemented
|
||||
|
||||
return yk_streams
|
||||
|
||||
def acfun_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
"""str, str, str, bool, bool ->None
|
||||
|
||||
Download Acfun video by vid.
|
||||
|
||||
Call Acfun API, decide which site to use, and pass the job to its
|
||||
extractor.
|
||||
"""
|
||||
|
||||
#first call the main parasing API
|
||||
info = json.loads(get_content('http://www.acfun.cn/video/getVideo.aspx?id=' + vid, headers=fake_headers))
|
||||
|
||||
sourceType = info['sourceType']
|
||||
|
||||
#decide sourceId to know which extractor to use
|
||||
if 'sourceId' in info: sourceId = info['sourceId']
|
||||
# danmakuId = info['danmakuId']
|
||||
|
||||
#call extractor decided by sourceId
|
||||
if sourceType == 'sina':
|
||||
sina_download_by_vid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
elif sourceType == 'youku':
|
||||
youku_download_by_vid(sourceId, title=title, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)
|
||||
elif sourceType == 'tudou':
|
||||
tudou_download_by_iid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
elif sourceType == 'qq':
|
||||
qq_download_by_vid(sourceId, title, True, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
elif sourceType == 'letv':
|
||||
letvcloud_download_by_vu(sourceId, '2d8c027396', title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
elif sourceType == 'zhuzhan':
|
||||
#As in Jul.28.2016, Acfun is using embsig to anti hotlink so we need to pass this
|
||||
#In Mar. 2017 there is a dedicated ``acfun_proxy'' in youku cloud player
|
||||
#old code removed
|
||||
url = 'http://www.acfun.cn/v/ac' + vid
|
||||
yk_streams = youku_acfun_proxy(info['sourceId'], info['encode'], url)
|
||||
seq = ['mp4hd3', 'mp4hd2', 'mp4hd', 'flvhd']
|
||||
for t in seq:
|
||||
if yk_streams.get(t):
|
||||
preferred = yk_streams[t]
|
||||
break
|
||||
#total_size in the json could be incorrect(F.I. 0)
|
||||
size = 0
|
||||
for url in preferred[0]:
|
||||
_, _, seg_size = url_info(url)
|
||||
size += seg_size
|
||||
#fallback to flvhd is not quite possible
|
||||
if re.search(r'fid=[0-9A-Z\-]*.flv', preferred[0][0]):
|
||||
ext = 'flv'
|
||||
else:
|
||||
ext = 'mp4'
|
||||
print_info(site_info, title, ext, size)
|
||||
if not info_only:
|
||||
download_urls(preferred[0], title, ext, size, output_dir=output_dir, merge=merge)
|
||||
else:
|
||||
raise NotImplementedError(sourceType)
|
||||
|
||||
if not info_only and not dry_run:
|
||||
if not kwargs['caption']:
|
||||
print('Skipping danmaku.')
|
||||
return
|
||||
try:
|
||||
title = get_filename(title)
|
||||
print('Downloading %s ...\n' % (title + '.cmt.json'))
|
||||
cmt = get_srt_json(vid)
|
||||
with open(os.path.join(output_dir, title + '.cmt.json'), 'w', encoding='utf-8') as x:
|
||||
x.write(cmt)
|
||||
except:
|
||||
pass
|
||||
|
||||
def acfun_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
assert re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/(\D|bangumi)/\D\D(\d+)', url)
|
||||
|
||||
def getM3u8UrlFromCurrentVideoInfo(currentVideoInfo):
|
||||
if 'playInfos' in currentVideoInfo:
|
||||
return currentVideoInfo['playInfos'][0]['playUrls'][0]
|
||||
elif 'ksPlayJson' in currentVideoInfo:
|
||||
ksPlayJson = json.loads( currentVideoInfo['ksPlayJson'] )
|
||||
if 'ksPlayJson' in currentVideoInfo:
|
||||
durationMillis = currentVideoInfo['durationMillis']
|
||||
ksPlayJson = ksPlayJson = json.loads( currentVideoInfo['ksPlayJson'] )
|
||||
representation = ksPlayJson.get('adaptationSet')[0].get('representation')
|
||||
reps = []
|
||||
for one in representation:
|
||||
reps.append( (one['width']* one['height'], one['url'], one['backupUrl']) )
|
||||
return max(reps)[1]
|
||||
stream_list = representation
|
||||
|
||||
for stream in stream_list:
|
||||
m3u8_url = stream["url"]
|
||||
size = durationMillis * stream["avgBitrate"] / 8
|
||||
# size = float('inf')
|
||||
container = 'mp4'
|
||||
stream_id = stream["qualityLabel"]
|
||||
quality = stream["qualityType"]
|
||||
|
||||
stream_data = dict(src=m3u8_url, size=size, container=container, quality=quality)
|
||||
self.streams[stream_id] = stream_data
|
||||
|
||||
assert self.title and m3u8_url
|
||||
self.title = unescape_html(self.title)
|
||||
self.title = escape_file_path(self.title)
|
||||
p_title = r1('active">([^<]+)', html)
|
||||
self.title = '%s (%s)' % (self.title, up)
|
||||
if p_title:
|
||||
self.title = '%s - %s' % (self.title, p_title)
|
||||
|
||||
|
||||
if re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/\D/\D\D(\d+)', url):
|
||||
html = get_content(url, headers=fake_headers)
|
||||
json_text = match1(html, r"(?s)videoInfo\s*=\s*(\{.*?\});")
|
||||
json_data = json.loads(json_text)
|
||||
vid = json_data.get('currentVideoInfo').get('id')
|
||||
up = json_data.get('user').get('name')
|
||||
title = json_data.get('title')
|
||||
video_list = json_data.get('videoList')
|
||||
if len(video_list) > 1:
|
||||
title += " - " + [p.get('title') for p in video_list if p.get('id') == vid][0]
|
||||
currentVideoInfo = json_data.get('currentVideoInfo')
|
||||
m3u8_url = getM3u8UrlFromCurrentVideoInfo(currentVideoInfo)
|
||||
elif re.match("https?://[^\.]*\.*acfun\.[^\.]+/bangumi/aa(\d+)", url):
|
||||
html = get_content(url, headers=fake_headers)
|
||||
tag_script = match1(html, r'<script>\s*window\.pageInfo([^<]+)</script>')
|
||||
json_text = tag_script[tag_script.find('{') : tag_script.find('};') + 1]
|
||||
json_data = json.loads(json_text)
|
||||
title = json_data['bangumiTitle'] + " " + json_data['episodeName'] + " " + json_data['title']
|
||||
vid = str(json_data['videoId'])
|
||||
up = "acfun"
|
||||
def download(self, **kwargs):
|
||||
if 'json_output' in kwargs and kwargs['json_output']:
|
||||
json_output.output(self)
|
||||
elif 'info_only' in kwargs and kwargs['info_only']:
|
||||
if 'stream_id' in kwargs and kwargs['stream_id']:
|
||||
# Display the stream
|
||||
stream_id = kwargs['stream_id']
|
||||
if 'index' not in kwargs:
|
||||
self.p(stream_id)
|
||||
else:
|
||||
self.p_i(stream_id)
|
||||
else:
|
||||
# Display all available streams
|
||||
if 'index' not in kwargs:
|
||||
self.p([])
|
||||
else:
|
||||
stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag']
|
||||
self.p_i(stream_id)
|
||||
|
||||
currentVideoInfo = json_data.get('currentVideoInfo')
|
||||
m3u8_url = getM3u8UrlFromCurrentVideoInfo(currentVideoInfo)
|
||||
else:
|
||||
if 'stream_id' in kwargs and kwargs['stream_id']:
|
||||
# Download the stream
|
||||
stream_id = kwargs['stream_id']
|
||||
else:
|
||||
stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag']
|
||||
|
||||
else:
|
||||
raise NotImplemented
|
||||
|
||||
assert title and m3u8_url
|
||||
title = unescape_html(title)
|
||||
title = escape_file_path(title)
|
||||
p_title = r1('active">([^<]+)', html)
|
||||
title = '%s (%s)' % (title, up)
|
||||
if p_title:
|
||||
title = '%s - %s' % (title, p_title)
|
||||
|
||||
print_info(site_info, title, 'm3u8', float('inf'))
|
||||
if not info_only:
|
||||
download_url_ffmpeg(m3u8_url, title, 'mp4', output_dir=output_dir, merge=merge)
|
||||
if 'index' not in kwargs:
|
||||
self.p(stream_id)
|
||||
else:
|
||||
self.p_i(stream_id)
|
||||
if stream_id in self.streams:
|
||||
url = self.streams[stream_id]['src']
|
||||
ext = self.streams[stream_id]['container']
|
||||
total_size = self.streams[stream_id]['size']
|
||||
|
||||
|
||||
if ext == 'm3u8' or ext == 'm4a':
|
||||
ext = 'mp4'
|
||||
|
||||
if not url:
|
||||
log.wtf('[Failed] Cannot extract video source.')
|
||||
# For legacy main()
|
||||
headers = {}
|
||||
if self.ua is not None:
|
||||
headers['User-Agent'] = self.ua
|
||||
if self.referer is not None:
|
||||
headers['Referer'] = self.referer
|
||||
|
||||
download_url_ffmpeg(url, self.title, ext, output_dir=kwargs['output_dir'], merge=kwargs['merge'])
|
||||
|
||||
if 'caption' not in kwargs or not kwargs['caption']:
|
||||
print('Skipping captions or danmaku.')
|
||||
return
|
||||
|
||||
for lang in self.caption_tracks:
|
||||
filename = '%s.%s.srt' % (get_filename(self.title), lang)
|
||||
print('Saving %s ... ' % filename, end="", flush=True)
|
||||
srt = self.caption_tracks[lang]
|
||||
with open(os.path.join(kwargs['output_dir'], filename),
|
||||
'w', encoding='utf-8') as x:
|
||||
x.write(srt)
|
||||
print('Done.')
|
||||
|
||||
if self.danmaku is not None and not dry_run:
|
||||
filename = '{}.cmt.xml'.format(get_filename(self.title))
|
||||
print('Downloading {} ...\n'.format(filename))
|
||||
with open(os.path.join(kwargs['output_dir'], filename), 'w', encoding='utf8') as fp:
|
||||
fp.write(self.danmaku)
|
||||
|
||||
if self.lyrics is not None and not dry_run:
|
||||
filename = '{}.lrc'.format(get_filename(self.title))
|
||||
print('Downloading {} ...\n'.format(filename))
|
||||
with open(os.path.join(kwargs['output_dir'], filename), 'w', encoding='utf8') as fp:
|
||||
fp.write(self.lyrics)
|
||||
|
||||
# For main_dev()
|
||||
#download_urls(urls, self.title, self.streams[stream_id]['container'], self.streams[stream_id]['size'])
|
||||
keep_obj = kwargs.get('keep_obj', False)
|
||||
if not keep_obj:
|
||||
self.__init__()
|
||||
|
||||
|
||||
def acfun_download(self, url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
assert re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/(\D|bangumi)/\D\D(\d+)', url)
|
||||
|
||||
def getM3u8UrlFromCurrentVideoInfo(currentVideoInfo):
|
||||
if 'playInfos' in currentVideoInfo:
|
||||
return currentVideoInfo['playInfos'][0]['playUrls'][0]
|
||||
elif 'ksPlayJson' in currentVideoInfo:
|
||||
ksPlayJson = json.loads( currentVideoInfo['ksPlayJson'] )
|
||||
representation = ksPlayJson.get('adaptationSet')[0].get('representation')
|
||||
reps = []
|
||||
for one in representation:
|
||||
reps.append( (one['width']* one['height'], one['url'], one['backupUrl']) )
|
||||
return max(reps)[1]
|
||||
|
||||
|
||||
if re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/\D/\D\D(\d+)', url):
|
||||
html = get_content(url, headers=fake_headers)
|
||||
json_text = match1(html, r"(?s)videoInfo\s*=\s*(\{.*?\});")
|
||||
json_data = json.loads(json_text)
|
||||
vid = json_data.get('currentVideoInfo').get('id')
|
||||
up = json_data.get('user').get('name')
|
||||
title = json_data.get('title')
|
||||
video_list = json_data.get('videoList')
|
||||
if len(video_list) > 1:
|
||||
title += " - " + [p.get('title') for p in video_list if p.get('id') == vid][0]
|
||||
currentVideoInfo = json_data.get('currentVideoInfo')
|
||||
m3u8_url = getM3u8UrlFromCurrentVideoInfo(currentVideoInfo)
|
||||
elif re.match("https?://[^\.]*\.*acfun\.[^\.]+/bangumi/aa(\d+)", url):
|
||||
html = get_content(url, headers=fake_headers)
|
||||
tag_script = match1(html, r'<script>\s*window\.pageInfo([^<]+)</script>')
|
||||
json_text = tag_script[tag_script.find('{') : tag_script.find('};') + 1]
|
||||
json_data = json.loads(json_text)
|
||||
title = json_data['bangumiTitle'] + " " + json_data['episodeName'] + " " + json_data['title']
|
||||
vid = str(json_data['videoId'])
|
||||
up = "acfun"
|
||||
|
||||
currentVideoInfo = json_data.get('currentVideoInfo')
|
||||
m3u8_url = getM3u8UrlFromCurrentVideoInfo(currentVideoInfo)
|
||||
|
||||
else:
|
||||
raise NotImplemented
|
||||
|
||||
assert title and m3u8_url
|
||||
title = unescape_html(title)
|
||||
title = escape_file_path(title)
|
||||
p_title = r1('active">([^<]+)', html)
|
||||
title = '%s (%s)' % (title, up)
|
||||
if p_title:
|
||||
title = '%s - %s' % (title, p_title)
|
||||
|
||||
print_info(site_info, title, 'm3u8', float('inf'))
|
||||
if not info_only:
|
||||
download_url_ffmpeg(m3u8_url, title, 'mp4', output_dir=output_dir, merge=merge)
|
||||
|
||||
site = AcFun()
|
||||
site_info = "AcFun.cn"
|
||||
download = acfun_download
|
||||
download = site.download_by_url
|
||||
download_playlist = playlist_not_supported('acfun')
|
||||
|
Loading…
Reference in New Issue
Block a user