you-get/src/you_get/extractors/acfun.py

127 lines
4.5 KiB
Python
Raw Normal View History

2012-09-01 23:38:23 +04:00
#!/usr/bin/env python
__all__ = ['acfun_download']
from ..common import *
2016-03-03 23:49:47 +03:00
from .le import letvcloud_download_by_vu
2015-06-19 06:46:43 +03:00
from .qq import qq_download_by_vid
2013-07-14 19:34:42 +04:00
from .sina import sina_download_by_vid
2012-09-01 23:38:23 +04:00
from .tudou import tudou_download_by_iid
2016-04-28 08:29:22 +03:00
from .youku import youku_download_by_vid, youku_open_download_by_vid
2012-09-01 23:38:23 +04:00
2017-05-23 21:16:32 +03:00
import json
import re
import base64
2012-09-01 23:38:23 +04:00
def get_srt_json(id):
2016-01-11 02:18:49 +03:00
url = 'http://danmu.aixifan.com/V2/%s' % id
2017-05-23 21:16:32 +03:00
return get_content(url)
def youku_acfun_proxy(vid, sign):
url = 'http://aplay-vod.cn-beijing.aliyuncs.com/acfun/web?vid={}&ct=85&ev=2&sign={}'.format(vid, sign)
json_data = json.loads(get_content(url))['data']
enc_text = base64.b64decode(json_data)
dec_text = rc4(b'2da3ca9e', enc_text).decode('utf8')
youku_json = json.loads(dec_text)
yk_streams = {}
for stream in youku_json['stream']:
tp = stream['stream_type']
yk_streams[tp] = [], stream['total_size']
if stream.get('segs'):
for seg in stream['segs']:
yk_streams[tp][0].append(seg['url'])
else:
yk_streams[tp] = stream['m3u8'], stream['total_size']
return yk_streams
2012-09-01 23:38:23 +04:00
2016-01-11 02:18:49 +03:00
def acfun_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False, **kwargs):
"""str, str, str, bool, bool ->None
Download Acfun video by vid.
Call Acfun API, decide which site to use, and pass the job to its
extractor.
"""
#first call the main parasing API
2017-05-23 21:16:32 +03:00
info = json.loads(get_content('http://www.acfun.tv/video/getVideo.aspx?id=' + vid))
sourceType = info['sourceType']
#decide sourceId to know which extractor to use
2015-11-11 03:41:16 +03:00
if 'sourceId' in info: sourceId = info['sourceId']
# danmakuId = info['danmakuId']
#call extractor decided by sourceId
if sourceType == 'sina':
sina_download_by_vid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)
elif sourceType == 'youku':
youku_download_by_vid(sourceId, title=title, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)
elif sourceType == 'tudou':
tudou_download_by_iid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)
elif sourceType == 'qq':
2015-06-19 06:46:43 +03:00
qq_download_by_vid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)
elif sourceType == 'letv':
letvcloud_download_by_vu(sourceId, '2d8c027396', title, output_dir=output_dir, merge=merge, info_only=info_only)
2015-11-11 03:41:16 +03:00
elif sourceType == 'zhuzhan':
#As in Jul.28.2016, Acfun is using embsig to anti hotlink so we need to pass this
2017-05-23 21:16:32 +03:00
#In Mar. 2017 there is a dedicated ``acfun_proxy'' in youku cloud player
#old code removed
yk_streams = youku_acfun_proxy(info['sourceId'], info['encode'])
seq = ['mp4hd3', 'mp4hd2', 'mp4hd', 'flvhd']
for t in seq:
if yk_streams.get(t):
preferred = yk_streams[t]
break
#total_size in the json could be incorrect(F.I. 0)
size = 0
for url in preferred[0]:
_, _, seg_size = url_info(url)
size += seg_size
#fallback to flvhd is not quite possible
print_info(site_info, title, 'mp4', size)
if not info_only:
download_urls(preferred[0], title, 'mp4', size, output_dir=output_dir, merge=merge)
2012-09-01 23:38:23 +04:00
else:
raise NotImplementedError(sourceType)
2014-02-14 05:20:06 +04:00
2016-01-11 02:18:49 +03:00
if not info_only and not dry_run:
if not kwargs['caption']:
print('Skipping danmaku.')
return
try:
2016-01-11 02:18:49 +03:00
title = get_filename(title)
print('Downloading %s ...\n' % (title + '.cmt.json'))
2014-09-18 09:38:50 +04:00
cmt = get_srt_json(vid)
2016-01-11 02:18:49 +03:00
with open(os.path.join(output_dir, title + '.cmt.json'), 'w', encoding='utf-8') as x:
x.write(cmt)
except:
pass
2015-01-27 18:44:45 +03:00
def acfun_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
2015-01-20 06:39:29 +03:00
assert re.match(r'http://[^\.]+.acfun.[^\.]+/\D/\D\D(\d+)', url)
2017-05-23 21:16:32 +03:00
html = get_content(url)
2014-02-14 05:20:06 +04:00
2016-10-09 19:13:01 +03:00
title = r1(r'data-title="([^"]+)"', html)
2012-09-01 23:38:23 +04:00
title = unescape_html(title)
title = escape_file_path(title)
2014-02-14 05:20:06 +04:00
assert title
if match1(url, r'_(\d+)$'): # current P
title = title + " " + r1(r'active">([^<]*)', html)
2014-02-14 05:20:06 +04:00
2016-10-09 19:13:01 +03:00
vid = r1('data-vid="(\d+)"', html)
up = r1('data-name="([^"]+)"', html)
title = title + ' - ' + up
acfun_download_by_vid(vid, title,
output_dir=output_dir,
merge=merge,
info_only=info_only,
**kwargs)
2012-09-01 23:38:23 +04:00
2014-07-19 01:53:48 +04:00
site_info = "AcFun.tv"
2012-09-01 23:38:23 +04:00
download = acfun_download
download_playlist = playlist_not_supported('acfun')