you-get/src/you_get/extractors/acfun.py

135 lines
5.2 KiB
Python
Raw Normal View History

2012-09-01 23:38:23 +04:00
#!/usr/bin/env python
__all__ = ['acfun_download']
from ..common import *
2014-10-31 12:09:12 +03:00
from .letv import letvcloud_download_by_vu
2012-12-01 19:14:04 +04:00
from .qq import qq_download_by_id
2013-07-14 19:34:42 +04:00
from .sina import sina_download_by_vid
2012-09-01 23:38:23 +04:00
from .tudou import tudou_download_by_iid
2014-06-24 05:59:47 +04:00
from .youku import youku_download_by_vid
2012-09-01 23:38:23 +04:00
import json, re
def get_srt_json(id):
2014-09-18 09:38:50 +04:00
# url = 'http://comment.acfun.tv/%s.json' % id
url = 'http://static.comment.acfun.mm111.net/%s' %id
2012-09-01 23:38:23 +04:00
return get_html(url)
def get_srt_lock_json(id):
2014-07-19 01:53:48 +04:00
url = 'http://comment.acfun.tv/%s_lock.json' % id
return get_html(url)
2015-01-27 18:41:17 +03:00
# def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only=False):
# info = json.loads(get_html('http://www.acfun.tv/video/getVideo.aspx?id=' + vid))
# sourceType = info['sourceType']
# sourceId = info['sourceId']
# # danmakuId = info['danmakuId']
# if sourceType == 'sina':
# sina_download_by_vid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)
# elif sourceType == 'youku':
# youku_download_by_vid(sourceId, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
# elif sourceType == 'tudou':
# tudou_download_by_iid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)
# elif sourceType == 'qq':
# qq_download_by_id(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)
# elif sourceType == 'letv':
# letvcloud_download_by_vu(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)
# else:
# raise NotImplementedError(sourceType)
# if not info_only:
# title = get_filename(title)
# try:
# print('Downloading %s ...\n' % (title + '.cmt.json'))
# cmt = get_srt_json(vid)
# with open(os.path.join(output_dir, title + '.cmt.json'), 'w') as x:
# x.write(cmt)
# # print('Downloading %s ...\n' % (title + '.cmt_lock.json'))
# # cmt = get_srt_lock_json(danmakuId)
# # with open(os.path.join(output_dir, title + '.cmt_lock.json'), 'w') as x:
# # x.write(cmt)
# except:
# pass
# decompile from player swf
# protected static const VIDEO_PARSE_API:String = "http://jiexi.acfun.info/index.php?vid=";
# protected static var VIDEO_RATES_CODE:Array = ["C40","C30","C20","C10"];
# public static var VIDEO_RATES_STRING:Array = ["原画","超清","高清","流畅"];
# Sometimes may find C80 but size smaller than C30
2015-01-27 18:44:45 +03:00
2015-01-27 18:41:17 +03:00
def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only=False ,**kwargs):
#api example http://jiexi.acfun.info/index.php?vid=1122870
info = json.loads(get_content("http://jiexi.acfun.info/index.php?vid={}".format(vid)))
assert info["code"] == 200
assert info["success"] == True
support_types = sorted(info["result"].keys(),key= lambda i: int(i[1:]))
stream_id = None
if "stream_id" in kwargs and kwargs["stream_id"] in support_types:
stream_id = kwargs["stream_id"]
2012-09-01 23:38:23 +04:00
else:
2015-01-27 18:41:17 +03:00
print("Current Video Supports:")
for i in support_types:
if info["result"][i]["totalbytes"] != 0:
2015-01-28 17:28:33 +03:00
print("\t--format",i,"<URL>:",info["result"][i]["quality"],"size:","%.2f"% (info["result"][i]["totalbytes"] / 1024.0 /1024.0),"MB")
2015-01-27 18:41:17 +03:00
else:
2015-01-28 17:28:33 +03:00
print("\t--format",i,"<URL>:",info["result"][i]["quality"])
2015-01-27 18:41:17 +03:00
#because C80 is not the best
if "C80" not in support_types:
stream_id = support_types[-1]
else:
stream_id = support_types[-2]
urls = [None] * len(info["result"][stream_id]["files"])
for i in info["result"][stream_id]["files"]:
urls[i["no"]] = i["url"]
ext = info["result"][stream_id]["files"][0]["type"]
size = 0
for i in urls:
_, _, tmp =url_info(i)
size +=tmp
print_info(site_info, title, ext, size)
print("Format: ",stream_id)
print()
2014-02-14 05:20:06 +04:00
if not info_only:
2015-01-27 18:41:17 +03:00
download_urls(urls, title, ext, size, output_dir = output_dir, merge = merge)
title = get_filename(title)
try:
print('Downloading %s ...\n' % (title + '.cmt.json'))
2014-09-18 09:38:50 +04:00
cmt = get_srt_json(vid)
with open(os.path.join(output_dir, title + '.cmt.json'), 'w') as x:
x.write(cmt)
except:
pass
2015-01-27 18:44:45 +03:00
2015-01-27 18:41:17 +03:00
def acfun_download(url, output_dir = '.', merge = True, info_only = False ,**kwargs):
2015-01-20 06:39:29 +03:00
assert re.match(r'http://[^\.]+.acfun.[^\.]+/\D/\D\D(\d+)', url)
2012-09-01 23:38:23 +04:00
html = get_html(url)
2014-02-14 05:20:06 +04:00
2014-04-13 15:31:21 +04:00
title = r1(r'<h1 id="txt-title-view">([^<>]+)<', html)
2012-09-01 23:38:23 +04:00
title = unescape_html(title)
title = escape_file_path(title)
2014-02-14 05:20:06 +04:00
assert title
2014-08-21 17:35:18 +04:00
videos = re.findall("data-vid=\"(\d+)\".*href=\"[^\"]+\".*title=\"([^\"]+)\"", html)
2014-02-14 05:20:06 +04:00
if videos is not None:
for video in videos:
p_vid = video[0]
p_title = title + " - " + video[1]
2015-01-27 18:41:17 +03:00
acfun_download_by_vid(p_vid, p_title, output_dir=output_dir, merge=merge, info_only=info_only ,**kwargs)
2012-10-02 02:17:52 +04:00
else:
2014-02-14 05:20:06 +04:00
# Useless - to be removed?
id = r1(r"src=\"/newflvplayer/player.*id=(\d+)", html)
sina_download_by_vid(id, title, output_dir=output_dir, merge=merge, info_only=info_only)
2012-09-01 23:38:23 +04:00
2014-07-19 01:53:48 +04:00
site_info = "AcFun.tv"
2012-09-01 23:38:23 +04:00
download = acfun_download
download_playlist = playlist_not_supported('acfun')