Catfun: reformat

This commit is contained in:
Mort Yao 2014-07-21 13:41:31 +02:00
parent f78e4c8078
commit 07a224ed7c

View File

@ -8,73 +8,69 @@ from ..common import *
from xml.dom.minidom import * from xml.dom.minidom import *
def parse_item(item): def parse_item(item):
if item["type"]=="youku": if item["type"] == "youku":
page=get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_youku_video_info&youku_id="+item["vid"]) page = get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_youku_video_info&youku_id=" + item["vid"])
dom=parseString(page) dom = parseString(page)
ext=dom.getElementsByTagName("format")[0].firstChild.nodeValue; ext = dom.getElementsByTagName("format")[0].firstChild.nodeValue;
size=0 size = 0
urls=[] urls = []
for i in dom.getElementsByTagName("durl"): for i in dom.getElementsByTagName("durl"):
urls.append(i.getElementsByTagName("url")[0].firstChild.nodeValue) urls.append(i.getElementsByTagName("url")[0].firstChild.nodeValue)
size+=int(i.getElementsByTagName("size")[0].firstChild.nodeValue); size += int(i.getElementsByTagName("size")[0].firstChild.nodeValue);
return urls,ext,size return urls, ext, size
pass
elif item["type"]=="qq": elif item["type"] == "qq":
page=get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_qq_video_info&qq_id="+item["vid"]) page = get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_qq_video_info&qq_id=" + item["vid"])
dom=parseString(page) dom = parseString(page)
size=0 size = 0
urls=[] urls = []
for i in dom.getElementsByTagName("durl"): for i in dom.getElementsByTagName("durl"):
url=i.getElementsByTagName("url")[0].firstChild.nodeValue url = i.getElementsByTagName("url")[0].firstChild.nodeValue
urls.append(url) urls.append(url)
vtype,ext,_size=url_info(url) vtype, ext, _size = url_info(url)
size+=_size size += _size
return urls,ext,size return urls, ext, size
pass
elif item["type"]=="sina": elif item["type"] == "sina":
page=get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_sina_video_info&sina_id=" + item["vid"]) page = get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_sina_video_info&sina_id=" + item["vid"])
try: try:
dom=parseString(page) dom = parseString(page)
except: except:
#refresh page encountered #refresh page encountered
page=get_content(match1(page,r'url=(.+?)"')) page = get_content(match1(page, r'url=(.+?)"'))
dom=parseString(page) dom = parseString(page)
size=0 size = 0
urls=[] urls = []
for i in dom.getElementsByTagName("durl"): for i in dom.getElementsByTagName("durl"):
url=i.getElementsByTagName("url")[0].firstChild.nodeValue url = i.getElementsByTagName("url")[0].firstChild.nodeValue
urls.append(url) urls.append(url)
vtype,ext,_size=url_info(url) vtype, ext, _size = url_info(url)
if not ext: if not ext:
ext=match1(url,r'\.(\w+?)\?') ext = match1(url,r'\.(\w+?)\?')
size+=_size size += _size
#sina's result does not contains content-type #sina's result does not contains content-type
return urls,ext,size return urls, ext, size
pass
def catfun_download(url, output_dir = '.', merge = True, info_only = False): def catfun_download(url, output_dir = '.', merge = True, info_only = False):
# html=get_content(url) # html = get_content(url)
title=match1(get_content(url),r'<h1 class="title">(.+?)</h1>') title = match1(get_content(url), r'<h1 class="title">(.+?)</h1>')
vid=match1(url,r"v\d+/cat(\d+)") vid = match1(url, r"v\d+/cat(\d+)")
j=json.loads(get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_video&modelid=11&id={}".format(vid))) j = json.loads(get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_video&modelid=11&id={}".format(vid)))
for item in j: for item in j:
if item["name"]!="\u672a\u547d\u540d1": if item["name"] != "\u672a\u547d\u540d1":
t=title+"-"+item["name"] t = title + "-" + item["name"]
else: else:
t=title t = title
if item["type"]=="tudou": if item["type"] == "tudou":
tudou_download_by_id(item["vid"], title, output_dir, merge, info_only) tudou_download_by_id(item["vid"], title, output_dir, merge, info_only)
else: else:
urls,ext,size=parse_item(item) urls, ext, size = parse_item(item)
download_urls(urls,t,ext,size,output_dir)
print_info(site_info, title, ext, size)
if not info_only:
download_urls(urls, t, ext, size, output_dir, merge=merge)
site_info = "catfun.com" site_info = "CatFun.tv"
download = catfun_download download = catfun_download
download_playlist = playlist_not_supported('catfun') download_playlist = playlist_not_supported('catfun')