you-get/src/you_get/extractors/catfun.py

#!/usr/bin/env python

__all__ = ['catfun_download']
from .tudou import tudou_download_by_id
from .sina import sina_download_by_vid

from ..common import *
from xml.dom.minidom import *

def parse_item(item):
    if item["type"] == "youku":
        page = get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_youku_video_info&youku_id=" + item["vid"])
        dom = parseString(page)
        ext = dom.getElementsByTagName("format")[0].firstChild.nodeValue;
        size = 0
        urls = []
        for i in dom.getElementsByTagName("durl"):
            urls.append(i.getElementsByTagName("url")[0].firstChild.nodeValue)
            size += int(i.getElementsByTagName("size")[0].firstChild.nodeValue);
        return urls, ext, size

    elif item["type"] == "qq":
        page = get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_qq_video_info&qq_id=" + item["vid"])
        dom = parseString(page)
        size = 0
        urls = []
        for i in dom.getElementsByTagName("durl"):
            url = i.getElementsByTagName("url")[0].firstChild.nodeValue
            urls.append(url)
            vtype, ext, _size = url_info(url)
            size += _size
        return urls, ext, size

    elif item["type"] == "sina":
        page = get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_sina_video_info&sina_id=" + item["vid"])
        try:
            dom = parseString(page)
        except:
            #refresh page encountered
            page = get_content(match1(page, r'url=(.+?)"'))
            dom = parseString(page)
        size = 0
        urls = []
        for i in dom.getElementsByTagName("durl"):
            url = i.getElementsByTagName("url")[0].firstChild.nodeValue
            urls.append(url)
            vtype, ext, _size = url_info(url)
            if not ext:
                ext = match1(url,r'\.(\w+?)\?')
            size += _size
        #sina's result does not contains content-type
        return urls, ext, size

def catfun_download(url, output_dir = '.', merge = True, info_only = False):
    # html = get_content(url)
    title = match1(get_content(url), r'<h1 class="title">(.+?)</h1>')
    vid = match1(url, r"v\d+/cat(\d+)")
    j = json.loads(get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_video&modelid=11&id={}".format(vid)))
    for item in j:
        if item["name"] != "\u672a\u547d\u540d1":
            t = title + "-" + item["name"]
        else:
            t = title
        if item["type"] == "tudou":
            tudou_download_by_id(item["vid"], title, output_dir, merge, info_only)

        else:
            urls, ext, size = parse_item(item)

            print_info(site_info, title, ext, size)
            if not info_only:
                download_urls(urls, t, ext, size, output_dir, merge=merge)

site_info = "CatFun.tv"
download = catfun_download
download_playlist = playlist_not_supported('catfun')
add catfun temp 2014-07-18 04:14:34 +04:00			`#!/usr/bin/env python`

			`__all__ = ['catfun_download']`
add support for catfun 2014-07-18 18:28:30 +04:00			`from .tudou import tudou_download_by_id`
			`from .sina import sina_download_by_vid`
add catfun temp 2014-07-18 04:14:34 +04:00
			`from ..common import *`
add support for catfun 2014-07-18 18:28:30 +04:00			`from xml.dom.minidom import *`
add catfun temp 2014-07-18 04:14:34 +04:00
			`def parse_item(item):`
Catfun: reformat 2014-07-21 15:41:31 +04:00			`if item["type"] == "youku":`
			`page = get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_youku_video_info&youku_id=" + item["vid"])`
			`dom = parseString(page)`
			`ext = dom.getElementsByTagName("format")[0].firstChild.nodeValue;`
			`size = 0`
			`urls = []`
add support for catfun 2014-07-18 18:28:30 +04:00			`for i in dom.getElementsByTagName("durl"):`
			`urls.append(i.getElementsByTagName("url")[0].firstChild.nodeValue)`
Catfun: reformat 2014-07-21 15:41:31 +04:00			`size += int(i.getElementsByTagName("size")[0].firstChild.nodeValue);`
			`return urls, ext, size`
add catfun temp 2014-07-18 04:14:34 +04:00
Catfun: reformat 2014-07-21 15:41:31 +04:00			`elif item["type"] == "qq":`
			`page = get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_qq_video_info&qq_id=" + item["vid"])`
			`dom = parseString(page)`
			`size = 0`
			`urls = []`
add support for catfun 2014-07-18 18:28:30 +04:00			`for i in dom.getElementsByTagName("durl"):`
Catfun: reformat 2014-07-21 15:41:31 +04:00			`url = i.getElementsByTagName("url")[0].firstChild.nodeValue`
			`urls.append(url)`
			`vtype, ext, _size = url_info(url)`
			`size += _size`
			`return urls, ext, size`
add catfun temp 2014-07-18 04:14:34 +04:00
Catfun: reformat 2014-07-21 15:41:31 +04:00			`elif item["type"] == "sina":`
			`page = get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_sina_video_info&sina_id=" + item["vid"])`
add support for catfun 2014-07-18 18:28:30 +04:00			`try:`
Catfun: reformat 2014-07-21 15:41:31 +04:00			`dom = parseString(page)`
add support for catfun 2014-07-18 18:28:30 +04:00			`except:`
			`#refresh page encountered`
Catfun: reformat 2014-07-21 15:41:31 +04:00			`page = get_content(match1(page, r'url=(.+?)"'))`
			`dom = parseString(page)`
			`size = 0`
			`urls = []`
add support for catfun 2014-07-18 18:28:30 +04:00			`for i in dom.getElementsByTagName("durl"):`
Catfun: reformat 2014-07-21 15:41:31 +04:00			`url = i.getElementsByTagName("url")[0].firstChild.nodeValue`
add support for catfun 2014-07-18 18:28:30 +04:00			`urls.append(url)`
Catfun: reformat 2014-07-21 15:41:31 +04:00			`vtype, ext, _size = url_info(url)`
add support for catfun 2014-07-18 18:28:30 +04:00			`if not ext:`
Catfun: reformat 2014-07-21 15:41:31 +04:00			`ext = match1(url,r'\.(\w+?)\?')`
			`size += _size`
			`#sina's result does not contains content-type`
			`return urls, ext, size`
add catfun temp 2014-07-18 04:14:34 +04:00
			`def catfun_download(url, output_dir = '.', merge = True, info_only = False):`
Catfun: reformat 2014-07-21 15:41:31 +04:00			`# html = get_content(url)`
			`title = match1(get_content(url), r'<h1 class="title">(.+?)</h1>')`
			`vid = match1(url, r"v\d+/cat(\d+)")`
			`j = json.loads(get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_video&modelid=11&id={}".format(vid)))`
add catfun temp 2014-07-18 04:14:34 +04:00			`for item in j:`
Catfun: reformat 2014-07-21 15:41:31 +04:00			`if item["name"] != "\u672a\u547d\u540d1":`
			`t = title + "-" + item["name"]`
add support for catfun 2014-07-18 18:28:30 +04:00			`else:`
Catfun: reformat 2014-07-21 15:41:31 +04:00			`t = title`
			`if item["type"] == "tudou":`
add support for catfun 2014-07-18 18:28:30 +04:00			`tudou_download_by_id(item["vid"], title, output_dir, merge, info_only)`

			`else:`
Catfun: reformat 2014-07-21 15:41:31 +04:00			`urls, ext, size = parse_item(item)`
add catfun temp 2014-07-18 04:14:34 +04:00
Catfun: reformat 2014-07-21 15:41:31 +04:00			`print_info(site_info, title, ext, size)`
			`if not info_only:`
			`download_urls(urls, t, ext, size, output_dir, merge=merge)`
add catfun temp 2014-07-18 04:14:34 +04:00
Catfun: reformat 2014-07-21 15:41:31 +04:00			`site_info = "CatFun.tv"`
add catfun temp 2014-07-18 04:14:34 +04:00			`download = catfun_download`
Catfun: reformat 2014-07-21 15:41:31 +04:00			`download_playlist = playlist_not_supported('catfun')`