you-get/src/you_get/extractors/acfun.py

176 lines
7.1 KiB
Python
Raw Normal View History

2012-09-01 23:38:23 +04:00
#!/usr/bin/env python
__all__ = ['acfun_download']
from ..common import *
2016-03-03 23:49:47 +03:00
from .le import letvcloud_download_by_vu
2015-06-19 06:46:43 +03:00
from .qq import qq_download_by_vid
2013-07-14 19:34:42 +04:00
from .sina import sina_download_by_vid
2012-09-01 23:38:23 +04:00
from .tudou import tudou_download_by_iid
2017-08-10 23:43:54 +03:00
from .youku import youku_download_by_vid
2012-09-01 23:38:23 +04:00
2017-05-23 21:16:32 +03:00
import json
import re
import base64
import time
2012-09-01 23:38:23 +04:00
def get_srt_json(id):
2016-01-11 02:18:49 +03:00
url = 'http://danmu.aixifan.com/V2/%s' % id
2017-05-23 21:16:32 +03:00
return get_content(url)
def youku_acfun_proxy(vid, sign, ref):
endpoint = 'http://player.acfun.cn/flash_data?vid={}&ct=85&ev=3&sign={}&time={}'
url = endpoint.format(vid, sign, str(int(time.time() * 1000)))
json_data = json.loads(get_content(url, headers=dict(referer=ref)))['data']
2017-05-23 21:16:32 +03:00
enc_text = base64.b64decode(json_data)
2017-05-27 15:14:52 +03:00
dec_text = rc4(b'8bdc7e1a', enc_text).decode('utf8')
2017-05-23 21:16:32 +03:00
youku_json = json.loads(dec_text)
yk_streams = {}
for stream in youku_json['stream']:
tp = stream['stream_type']
yk_streams[tp] = [], stream['total_size']
if stream.get('segs'):
for seg in stream['segs']:
yk_streams[tp][0].append(seg['url'])
else:
yk_streams[tp] = stream['m3u8'], stream['total_size']
return yk_streams
2012-09-01 23:38:23 +04:00
2016-01-11 02:18:49 +03:00
def acfun_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False, **kwargs):
"""str, str, str, bool, bool ->None
Download Acfun video by vid.
Call Acfun API, decide which site to use, and pass the job to its
extractor.
"""
#first call the main parasing API
info = json.loads(get_content('http://www.acfun.cn/video/getVideo.aspx?id=' + vid, headers=fake_headers))
sourceType = info['sourceType']
#decide sourceId to know which extractor to use
2015-11-11 03:41:16 +03:00
if 'sourceId' in info: sourceId = info['sourceId']
# danmakuId = info['danmakuId']
#call extractor decided by sourceId
if sourceType == 'sina':
sina_download_by_vid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)
elif sourceType == 'youku':
youku_download_by_vid(sourceId, title=title, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)
elif sourceType == 'tudou':
tudou_download_by_iid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)
elif sourceType == 'qq':
qq_download_by_vid(sourceId, title, True, output_dir=output_dir, merge=merge, info_only=info_only)
elif sourceType == 'letv':
letvcloud_download_by_vu(sourceId, '2d8c027396', title, output_dir=output_dir, merge=merge, info_only=info_only)
2015-11-11 03:41:16 +03:00
elif sourceType == 'zhuzhan':
#As in Jul.28.2016, Acfun is using embsig to anti hotlink so we need to pass this
2017-05-23 21:16:32 +03:00
#In Mar. 2017 there is a dedicated ``acfun_proxy'' in youku cloud player
#old code removed
url = 'http://www.acfun.cn/v/ac' + vid
yk_streams = youku_acfun_proxy(info['sourceId'], info['encode'], url)
2017-05-23 21:16:32 +03:00
seq = ['mp4hd3', 'mp4hd2', 'mp4hd', 'flvhd']
for t in seq:
if yk_streams.get(t):
preferred = yk_streams[t]
break
#total_size in the json could be incorrect(F.I. 0)
size = 0
for url in preferred[0]:
_, _, seg_size = url_info(url)
size += seg_size
#fallback to flvhd is not quite possible
2018-11-25 15:07:52 +03:00
if re.search(r'fid=[0-9A-Z\-]*.flv', preferred[0][0]):
ext = 'flv'
else:
ext = 'mp4'
print_info(site_info, title, ext, size)
2017-05-23 21:16:32 +03:00
if not info_only:
2018-11-25 15:07:52 +03:00
download_urls(preferred[0], title, ext, size, output_dir=output_dir, merge=merge)
2012-09-01 23:38:23 +04:00
else:
raise NotImplementedError(sourceType)
2014-02-14 05:20:06 +04:00
2016-01-11 02:18:49 +03:00
if not info_only and not dry_run:
if not kwargs['caption']:
print('Skipping danmaku.')
return
try:
2016-01-11 02:18:49 +03:00
title = get_filename(title)
print('Downloading %s ...\n' % (title + '.cmt.json'))
2014-09-18 09:38:50 +04:00
cmt = get_srt_json(vid)
2016-01-11 02:18:49 +03:00
with open(os.path.join(output_dir, title + '.cmt.json'), 'w', encoding='utf-8') as x:
x.write(cmt)
except:
pass
2015-01-27 18:44:45 +03:00
def acfun_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
2019-05-09 12:28:03 +03:00
assert re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/(\D|bangumi)/\D\D(\d+)', url)
2018-11-22 08:45:00 +03:00
2019-05-09 12:28:03 +03:00
if re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/\D/\D\D(\d+)', url):
html = get_content(url, headers=fake_headers)
2019-07-02 05:46:34 +03:00
json_text = match1(html, r"(?s)videoInfo\s*=\s*(\{.*?\});")
json_data = json.loads(json_text)
vid = json_data.get('currentVideoInfo').get('id')
up = json_data.get('user').get('name')
title = json_data.get('title')
video_list = json_data.get('videoList')
if len(video_list) > 1:
title += " - " + [p.get('title') for p in video_list if p.get('id') == vid][0]
2019-11-20 18:07:23 +03:00
currentVideoInfo = json_data.get('currentVideoInfo')
if 'playInfos' in currentVideoInfo:
m3u8_url = currentVideoInfo['playInfos'][0]['playUrls'][0]
elif 'ksPlayJson' in currentVideoInfo:
ksPlayJson = json.loads( currentVideoInfo['ksPlayJson'] )
representation = ksPlayJson.get('adaptationSet').get('representation')
reps = []
for one in representation:
reps.append( (one['width']* one['height'], one['url'], one['backupUrl']) )
m3u8_url = max(reps)[1]
2019-05-09 12:28:03 +03:00
elif re.match("https?://[^\.]*\.*acfun\.[^\.]+/bangumi/ab(\d+)", url):
2019-09-17 13:19:36 +03:00
html = get_content(url, headers=fake_headers)
2019-08-10 14:31:29 +03:00
tag_script = match1(html, r'<script>window\.pageInfo([^<]+)</script>')
json_text = tag_script[tag_script.find('{') : tag_script.find('};') + 1]
json_data = json.loads(json_text)
title = json_data['bangumiTitle'] + " " + json_data['episodeName'] + " " + json_data['title']
vid = str(json_data['videoId'])
2018-11-22 08:45:00 +03:00
up = "acfun"
2019-09-17 13:19:36 +03:00
play_info = get_content("https://www.acfun.cn/rest/pc-direct/play/playInfo/m3u8Auto?videoId=" + vid, headers=fake_headers)
play_url = json.loads(play_info)['playInfo']['streams'][0]['playUrls'][0]
m3u8_all_qualities_file = get_content(play_url)
m3u8_all_qualities_lines = m3u8_all_qualities_file.split('#EXT-X-STREAM-INF:')[1:]
highest_quality_line = m3u8_all_qualities_lines[0]
for line in m3u8_all_qualities_lines:
bandwith = int(match1(line, r'BANDWIDTH=(\d+)'))
if bandwith > int(match1(highest_quality_line, r'BANDWIDTH=(\d+)')):
highest_quality_line = line
#TODO: 应由用户指定清晰度
m3u8_url = match1(highest_quality_line, r'\n([^#\n]+)$')
m3u8_url = play_url[:play_url.rfind("/")+1] + m3u8_url
2018-11-22 08:45:00 +03:00
else:
raise NotImplemented
2014-02-14 05:20:06 +04:00
assert title and m3u8_url
2012-09-01 23:38:23 +04:00
title = unescape_html(title)
title = escape_file_path(title)
2017-10-30 23:07:37 +03:00
p_title = r1('active">([^<]+)', html)
title = '%s (%s)' % (title, up)
2018-11-22 08:45:00 +03:00
if p_title:
title = '%s - %s' % (title, p_title)
print_info(site_info, title, 'm3u8', float('inf'))
if not info_only:
download_url_ffmpeg(m3u8_url, title, 'mp4', output_dir=output_dir, merge=merge)
2012-09-01 23:38:23 +04:00
2018-11-22 08:45:00 +03:00
2019-08-10 14:31:29 +03:00
site_info = "AcFun.cn"
2012-09-01 23:38:23 +04:00
download = acfun_download
download_playlist = playlist_not_supported('acfun')