you-get/src/you_get/extractors/ku6.py

#!/usr/bin/env python

__all__ = ['ku6_download', 'ku6_download_by_id']

from ..common import *

import json
import re

def ku6_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
    data = json.loads(get_html('http://v.ku6.com/fetchVideo4Player/%s...html' % id))['data']
    t = data['t']
    f = data['f']
    title = title or t
    assert title
    urls = f.split(',')
    ext = match1(urls[0], r'.*\.(\w+)\??[^\.]*')
    assert ext in ('flv', 'mp4', 'f4v'), ext
    ext = {'f4v': 'flv'}.get(ext, ext)
    size = 0
    for url in urls:
        _, _, temp = url_info(url)
        size += temp
    
    print_info(site_info, title, ext, size)
    if not info_only:
        download_urls(urls, title, ext, size, output_dir, merge = merge)

def ku6_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
    id = None

    if match1(url, r'http://baidu.ku6.com/watch/(.*)\.html') is not None:
        id = baidu_ku6(url)
    else:
        patterns = [r'http://v.ku6.com/special/show_\d+/(.*)\.\.\.html',
                r'http://v.ku6.com/show/(.*)\.\.\.html',
                r'http://my.ku6.com/watch\?.*v=(.*)\.\..*']
        id = r1_of(patterns, url)

    if id is None:
        # http://www.ku6.com/2017/detail-zt.html?vid=xvqTmvZrH8MNvErpvRxFn3
        page = get_content(url)
        meta = re.search(r'detailDataMap=(\{.+?\});', page)
        if meta is not None:
            meta = meta.group(1)
        else:
            raise Exception('Unsupported url')
        vid = re.search(r'vid=([^&]+)', url)
        if vid is not None:
            vid = vid.group(1)
        else:
            raise Exception('Unsupported url')
        this_meta = re.search(vid+':\{(.+?)\}', meta)
        if this_meta is not None:
            this_meta = this_meta.group(1)
            title = re.search('title:"(.+?)"', this_meta).group(1)
            video_url = re.search('playUrl:"(.+?)"', this_meta).group(1)
        video_size = url_size(video_url)
        print_info(site_info, title, 'mp4', video_size)
        if not info_only:
            download_urls([video_url], title, 'mp4', video_size, output_dir, merge=merge, **kwargs)
        return

    ku6_download_by_id(id, output_dir = output_dir, merge = merge, info_only = info_only)

def baidu_ku6(url):
    id = None

    h1 = get_html(url)
    isrc = match1(h1, r'<iframe id="innerFrame" src="([^"]*)"')

    if isrc is not None:
        h2 = get_html(isrc)
        id = match1(h2, r'http://v.ku6.com/show/(.*)\.\.\.html')
#fix #1746
#some ku6 urls really ends with three dots? A bug?
        if id is None:
            id = match1(h2, r'http://v.ku6.com/show/(.*)\.html')

    return id

site_info = "Ku6.com"
download = ku6_download
download_playlist = playlist_not_supported('ku6')
add support for Ku6 2012-09-01 19:45:30 +04:00			`#!/usr/bin/env python`

			`__all__ = ['ku6_download', 'ku6_download_by_id']`

			`from ..common import *`

			`import json`
			`import re`

			`def ku6_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):`
			`data = json.loads(get_html('http://v.ku6.com/fetchVideo4Player/%s...html' % id))['data']`
			`t = data['t']`
			`f = data['f']`
			`title = title or t`
			`assert title`
			`urls = f.split(',')`
[ku6]fix error when parse url's extension 2017-05-07 02:20:00 +03:00			`ext = match1(urls[0], r'.\.(\w+)\??[^\.]')`
add support for Ku6 2012-09-01 19:45:30 +04:00			`assert ext in ('flv', 'mp4', 'f4v'), ext`
			`ext = {'f4v': 'flv'}.get(ext, ext)`
			`size = 0`
			`for url in urls:`
			`_, _, temp = url_info(url)`
			`size += temp`

			`print_info(site_info, title, ext, size)`
			`if not info_only:`
			`download_urls(urls, title, ext, size, output_dir, merge = merge)`

fix extractors not use VideoExtractor after add --json option 2015-09-26 08:45:39 +03:00			`def ku6_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):`
Support http://baidu.ku6.com #1163 2016-05-25 10:35:41 +03:00			`id = None`

			`if match1(url, r'http://baidu.ku6.com/watch/(.*)\.html') is not None:`
			`id = baidu_ku6(url)`
			`else:`
			`patterns = [r'http://v.ku6.com/special/show_\d+/(.*)\.\.\.html',`
			`r'http://v.ku6.com/show/(.*)\.\.\.html',`
			`r'http://my.ku6.com/watch\?.v=(.)\.\..*']`
			`id = r1_of(patterns, url)`
[ku6]new url pattern 2017-08-29 09:15:53 +03:00
			`if id is None:`
			`# http://www.ku6.com/2017/detail-zt.html?vid=xvqTmvZrH8MNvErpvRxFn3`
			`page = get_content(url)`
			`meta = re.search(r'detailDataMap=(\{.+?\});', page)`
			`if meta is not None:`
			`meta = meta.group(1)`
			`else:`
			`raise Exception('Unsupported url')`
			`vid = re.search(r'vid=([^&]+)', url)`
			`if vid is not None:`
			`vid = vid.group(1)`
			`else:`
			`raise Exception('Unsupported url')`
			`this_meta = re.search(vid+':\{(.+?)\}', meta)`
			`if this_meta is not None:`
			`this_meta = this_meta.group(1)`
			`title = re.search('title:"(.+?)"', this_meta).group(1)`
			`video_url = re.search('playUrl:"(.+?)"', this_meta).group(1)`
			`video_size = url_size(video_url)`
			`print_info(site_info, title, 'mp4', video_size)`
			`if not info_only:`
			`download_urls([video_url], title, 'mp4', video_size, output_dir, merge=merge, **kwargs)`
			`return`

add support for Ku6 2012-09-01 19:45:30 +04:00			`ku6_download_by_id(id, output_dir = output_dir, merge = merge, info_only = info_only)`

Support http://baidu.ku6.com #1163 2016-05-25 10:35:41 +03:00			`def baidu_ku6(url):`
			`id = None`

			`h1 = get_html(url)`
			`isrc = match1(h1, r'<iframe id="innerFrame" src="([^"]*)"')`

			`if isrc is not None:`
			`h2 = get_html(isrc)`
			`id = match1(h2, r'http://v.ku6.com/show/(.*)\.\.\.html')`
[ku6] add baidu.ku6 matching pattern 2017-03-09 15:47:35 +03:00			`#fix #1746`
			`#some ku6 urls really ends with three dots? A bug?`
			`if id is None:`
			`id = match1(h2, r'http://v.ku6.com/show/(.*)\.html')`
Support http://baidu.ku6.com #1163 2016-05-25 10:35:41 +03:00
			`return id`

add support for Ku6 2012-09-01 19:45:30 +04:00			`site_info = "Ku6.com"`
			`download = ku6_download`
			`download_playlist = playlist_not_supported('ku6')`