#!/usr/bin/env python __all__ = ['catfun_download'] from .tudou import tudou_download_by_id from .sina import sina_download_by_vid from ..common import * from xml.dom.minidom import * def parse_item(item): if item["type"]=="youku": page=get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_youku_video_info&youku_id="+item["vid"]) dom=parseString(page) ext=dom.getElementsByTagName("format")[0].firstChild.nodeValue; size=0 urls=[] for i in dom.getElementsByTagName("durl"): urls.append(i.getElementsByTagName("url")[0].firstChild.nodeValue) size+=int(i.getElementsByTagName("size")[0].firstChild.nodeValue); return urls,ext,size pass elif item["type"]=="qq": page=get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_qq_video_info&qq_id="+item["vid"]) dom=parseString(page) size=0 urls=[] for i in dom.getElementsByTagName("durl"): url=i.getElementsByTagName("url")[0].firstChild.nodeValue urls.append(url) vtype,ext,_size=url_info(url) size+=_size return urls,ext,size pass elif item["type"]=="sina": page=get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_sina_video_info&sina_id=" + item["vid"]) try: dom=parseString(page) except: #refresh page encountered page=get_content(match1(page,r'url=(.+?)"')) dom=parseString(page) size=0 urls=[] for i in dom.getElementsByTagName("durl"): url=i.getElementsByTagName("url")[0].firstChild.nodeValue urls.append(url) vtype,ext,_size=url_info(url) if not ext: ext=match1(url,r'\.(\w+?)\?') size+=_size #sina's result does not contains content-type return urls,ext,size pass def catfun_download(url, output_dir = '.', merge = True, info_only = False): # html=get_content(url) title=match1(get_content(url),r'