you-get/src/you_get/extractors/catfun.py

77 lines
2.9 KiB
Python
Raw Normal View History

2014-07-18 04:14:34 +04:00
#!/usr/bin/env python
__all__ = ['catfun_download']
2014-07-18 18:28:30 +04:00
from .tudou import tudou_download_by_id
from .sina import sina_download_by_vid
2014-07-18 04:14:34 +04:00
from ..common import *
2014-07-18 18:28:30 +04:00
from xml.dom.minidom import *
2014-07-18 04:14:34 +04:00
def parse_item(item):
2014-07-21 15:41:31 +04:00
if item["type"] == "youku":
page = get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_youku_video_info&youku_id=" + item["vid"])
dom = parseString(page)
ext = dom.getElementsByTagName("format")[0].firstChild.nodeValue;
size = 0
urls = []
2014-07-18 18:28:30 +04:00
for i in dom.getElementsByTagName("durl"):
urls.append(i.getElementsByTagName("url")[0].firstChild.nodeValue)
2014-07-21 15:41:31 +04:00
size += int(i.getElementsByTagName("size")[0].firstChild.nodeValue);
return urls, ext, size
2014-07-18 04:14:34 +04:00
2014-07-21 15:41:31 +04:00
elif item["type"] == "qq":
page = get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_qq_video_info&qq_id=" + item["vid"])
dom = parseString(page)
size = 0
urls = []
2014-07-18 18:28:30 +04:00
for i in dom.getElementsByTagName("durl"):
2014-07-21 15:41:31 +04:00
url = i.getElementsByTagName("url")[0].firstChild.nodeValue
urls.append(url)
vtype, ext, _size = url_info(url)
size += _size
return urls, ext, size
2014-07-18 04:14:34 +04:00
2014-07-21 15:41:31 +04:00
elif item["type"] == "sina":
page = get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_sina_video_info&sina_id=" + item["vid"])
2014-07-18 18:28:30 +04:00
try:
2014-07-21 15:41:31 +04:00
dom = parseString(page)
2014-07-18 18:28:30 +04:00
except:
#refresh page encountered
2014-07-21 15:41:31 +04:00
page = get_content(match1(page, r'url=(.+?)"'))
dom = parseString(page)
size = 0
urls = []
2014-07-18 18:28:30 +04:00
for i in dom.getElementsByTagName("durl"):
2014-07-21 15:41:31 +04:00
url = i.getElementsByTagName("url")[0].firstChild.nodeValue
2014-07-18 18:28:30 +04:00
urls.append(url)
2014-07-21 15:41:31 +04:00
vtype, ext, _size = url_info(url)
2014-07-18 18:28:30 +04:00
if not ext:
2014-07-21 15:41:31 +04:00
ext = match1(url,r'\.(\w+?)\?')
size += _size
#sina's result does not contains content-type
return urls, ext, size
2014-07-18 04:14:34 +04:00
def catfun_download(url, output_dir = '.', merge = True, info_only = False):
2014-07-21 15:41:31 +04:00
# html = get_content(url)
title = match1(get_content(url), r'<h1 class="title">(.+?)</h1>')
vid = match1(url, r"v\d+/cat(\d+)")
j = json.loads(get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_video&modelid=11&id={}".format(vid)))
2014-07-18 04:14:34 +04:00
for item in j:
2014-07-21 15:41:31 +04:00
if item["name"] != "\u672a\u547d\u540d1":
t = title + "-" + item["name"]
2014-07-18 18:28:30 +04:00
else:
2014-07-21 15:41:31 +04:00
t = title
if item["type"] == "tudou":
2014-07-18 18:28:30 +04:00
tudou_download_by_id(item["vid"], title, output_dir, merge, info_only)
else:
2014-07-21 15:41:31 +04:00
urls, ext, size = parse_item(item)
2014-07-18 04:14:34 +04:00
2014-07-21 15:41:31 +04:00
print_info(site_info, title, ext, size)
if not info_only:
download_urls(urls, t, ext, size, output_dir, merge=merge)
2014-07-18 04:14:34 +04:00
2014-07-21 15:41:31 +04:00
site_info = "CatFun.tv"
2014-07-18 04:14:34 +04:00
download = catfun_download
2014-07-21 15:41:31 +04:00
download_playlist = playlist_not_supported('catfun')