you-get/src/you_get/extractor/acfun.py

59 lines
2.2 KiB
Python
Raw Normal View History

2012-09-01 23:38:23 +04:00
#!/usr/bin/env python
__all__ = ['acfun_download']
from ..common import *
2012-12-01 19:14:04 +04:00
from .qq import qq_download_by_id
2013-07-14 19:34:42 +04:00
from .sina import sina_download_by_vid
2012-09-01 23:38:23 +04:00
from .tudou import tudou_download_by_iid
from .youku import youku_download_by_id
import json, re
def get_srt_json(id):
url = 'http://comment.acfun.tv/%s.json' % id
return get_html(url)
def acfun_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
2013-06-12 18:53:33 +04:00
info = json.loads(get_html('http://wenzhou.acfun.tv/api/getVideoByID.aspx?vid=' + id))
2012-09-01 23:38:23 +04:00
t = info['vtype']
vid = info['vid']
if t == 'sina':
2013-07-17 08:54:58 +04:00
sina_download_by_vid(vid, title, output_dir = output_dir, merge = merge, info_only = info_only)
2012-09-01 23:38:23 +04:00
elif t == 'youku':
youku_download_by_id(vid, title, output_dir = output_dir, merge = merge, info_only = info_only)
elif t == 'tudou':
tudou_download_by_iid(vid, title, output_dir = output_dir, merge = merge, info_only = info_only)
elif t == 'qq':
qq_download_by_id(vid, title, output_dir = output_dir, merge = merge, info_only = info_only)
else:
raise NotImplementedError(t)
if not info_only:
print('Downloading %s ...' % (title + '.cmt.json'))
cmt = get_srt_json(vid)
2013-04-14 20:02:01 +04:00
with open(os.path.join(output_dir, title + '.cmt.json'), 'w') as x:
x.write(cmt)
2012-09-01 23:38:23 +04:00
def acfun_download(url, output_dir = '.', merge = True, info_only = False):
2013-06-12 18:53:33 +04:00
assert re.match(r'http://[^\.]+.acfun.tv/v/ac(\d+)', url)
2012-09-01 23:38:23 +04:00
html = get_html(url)
2012-11-29 06:38:03 +04:00
title = r1(r'<h1 id="title-article" class="title"[^<>]*>([^<>]+)<', html)
2012-09-01 23:38:23 +04:00
assert title
title = unescape_html(title)
title = escape_file_path(title)
title = title.replace(' - AcFun.tv', '')
2012-10-02 02:17:52 +04:00
id = r1(r"\[Video\](\d+)\[/Video\]", html) or r1(r"\[video\](\d+)\[/video\]", html)
if not id:
2012-10-30 15:22:04 +04:00
id = r1(r"src=\"/newflvplayer/player.*id=(\d+)", html)
2013-07-17 08:54:58 +04:00
sina_download_by_vid(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
2012-10-02 02:17:52 +04:00
else:
acfun_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
2012-09-01 23:38:23 +04:00
site_info = "AcFun.tv"
download = acfun_download
download_playlist = playlist_not_supported('acfun')