you-get/src/you_get/extractors/ku6.py

85 lines
2.7 KiB
Python
Raw Normal View History

2012-09-01 19:45:30 +04:00
#!/usr/bin/env python
__all__ = ['ku6_download', 'ku6_download_by_id']
from ..common import *
import json
import re
def ku6_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
data = json.loads(get_html('http://v.ku6.com/fetchVideo4Player/%s...html' % id))['data']
t = data['t']
f = data['f']
title = title or t
assert title
urls = f.split(',')
ext = match1(urls[0], r'.*\.(\w+)\??[^\.]*')
2012-09-01 19:45:30 +04:00
assert ext in ('flv', 'mp4', 'f4v'), ext
ext = {'f4v': 'flv'}.get(ext, ext)
size = 0
for url in urls:
_, _, temp = url_info(url)
size += temp
print_info(site_info, title, ext, size)
if not info_only:
download_urls(urls, title, ext, size, output_dir, merge = merge)
def ku6_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
2016-05-25 10:35:41 +03:00
id = None
if match1(url, r'http://baidu.ku6.com/watch/(.*)\.html') is not None:
id = baidu_ku6(url)
else:
patterns = [r'http://v.ku6.com/special/show_\d+/(.*)\.\.\.html',
r'http://v.ku6.com/show/(.*)\.\.\.html',
r'http://my.ku6.com/watch\?.*v=(.*)\.\..*']
id = r1_of(patterns, url)
2017-08-29 09:15:53 +03:00
if id is None:
# http://www.ku6.com/2017/detail-zt.html?vid=xvqTmvZrH8MNvErpvRxFn3
page = get_content(url)
meta = re.search(r'detailDataMap=(\{.+?\});', page)
if meta is not None:
meta = meta.group(1)
else:
raise Exception('Unsupported url')
vid = re.search(r'vid=([^&]+)', url)
if vid is not None:
vid = vid.group(1)
else:
raise Exception('Unsupported url')
this_meta = re.search(vid+':\{(.+?)\}', meta)
if this_meta is not None:
this_meta = this_meta.group(1)
title = re.search('title:"(.+?)"', this_meta).group(1)
video_url = re.search('playUrl:"(.+?)"', this_meta).group(1)
video_size = url_size(video_url)
print_info(site_info, title, 'mp4', video_size)
if not info_only:
download_urls([video_url], title, 'mp4', video_size, output_dir, merge=merge, **kwargs)
return
2012-09-01 19:45:30 +04:00
ku6_download_by_id(id, output_dir = output_dir, merge = merge, info_only = info_only)
2016-05-25 10:35:41 +03:00
def baidu_ku6(url):
id = None
h1 = get_html(url)
isrc = match1(h1, r'<iframe id="innerFrame" src="([^"]*)"')
if isrc is not None:
h2 = get_html(isrc)
id = match1(h2, r'http://v.ku6.com/show/(.*)\.\.\.html')
2017-03-09 15:47:35 +03:00
#fix #1746
#some ku6 urls really ends with three dots? A bug?
if id is None:
id = match1(h2, r'http://v.ku6.com/show/(.*)\.html')
2016-05-25 10:35:41 +03:00
return id
2012-09-01 19:45:30 +04:00
site_info = "Ku6.com"
download = ku6_download
download_playlist = playlist_not_supported('ku6')