diff --git a/src/you_get/extractors/catfun.py b/src/you_get/extractors/catfun.py index de8552ba..8b547982 100644 --- a/src/you_get/extractors/catfun.py +++ b/src/you_get/extractors/catfun.py @@ -8,73 +8,69 @@ from ..common import * from xml.dom.minidom import * def parse_item(item): - if item["type"]=="youku": - page=get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_youku_video_info&youku_id="+item["vid"]) - dom=parseString(page) - ext=dom.getElementsByTagName("format")[0].firstChild.nodeValue; - size=0 - urls=[] + if item["type"] == "youku": + page = get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_youku_video_info&youku_id=" + item["vid"]) + dom = parseString(page) + ext = dom.getElementsByTagName("format")[0].firstChild.nodeValue; + size = 0 + urls = [] for i in dom.getElementsByTagName("durl"): urls.append(i.getElementsByTagName("url")[0].firstChild.nodeValue) - size+=int(i.getElementsByTagName("size")[0].firstChild.nodeValue); - return urls,ext,size - pass + size += int(i.getElementsByTagName("size")[0].firstChild.nodeValue); + return urls, ext, size - elif item["type"]=="qq": - page=get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_qq_video_info&qq_id="+item["vid"]) - dom=parseString(page) - size=0 - urls=[] + elif item["type"] == "qq": + page = get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_qq_video_info&qq_id=" + item["vid"]) + dom = parseString(page) + size = 0 + urls = [] for i in dom.getElementsByTagName("durl"): - url=i.getElementsByTagName("url")[0].firstChild.nodeValue - urls.append(url) - vtype,ext,_size=url_info(url) - size+=_size - return urls,ext,size - pass + url = i.getElementsByTagName("url")[0].firstChild.nodeValue + urls.append(url) + vtype, ext, _size = url_info(url) + size += _size + return urls, ext, size - elif item["type"]=="sina": - page=get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_sina_video_info&sina_id=" + item["vid"]) + elif item["type"] == "sina": + page = get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_sina_video_info&sina_id=" + item["vid"]) try: - dom=parseString(page) + dom = parseString(page) except: #refresh page encountered - page=get_content(match1(page,r'url=(.+?)"')) - dom=parseString(page) - size=0 - urls=[] + page = get_content(match1(page, r'url=(.+?)"')) + dom = parseString(page) + size = 0 + urls = [] for i in dom.getElementsByTagName("durl"): - url=i.getElementsByTagName("url")[0].firstChild.nodeValue + url = i.getElementsByTagName("url")[0].firstChild.nodeValue urls.append(url) - vtype,ext,_size=url_info(url) + vtype, ext, _size = url_info(url) if not ext: - ext=match1(url,r'\.(\w+?)\?') - size+=_size - #sina's result does not contains content-type - return urls,ext,size - pass - - + ext = match1(url,r'\.(\w+?)\?') + size += _size + #sina's result does not contains content-type + return urls, ext, size def catfun_download(url, output_dir = '.', merge = True, info_only = False): - # html=get_content(url) - title=match1(get_content(url),r'