mirror of
https://github.com/soimort/you-get.git
synced 2025-03-13 11:24:02 +03:00
Youku: fix #331, refactoring
This commit is contained in:
parent
72be4176f9
commit
1411c8986e
@ -16,6 +16,7 @@ from .util import log, sogou_proxy_server, get_filename, unescape_html
|
|||||||
dry_run = False
|
dry_run = False
|
||||||
force = False
|
force = False
|
||||||
player = None
|
player = None
|
||||||
|
extractor_proxy = None
|
||||||
sogou_proxy = None
|
sogou_proxy = None
|
||||||
sogou_env = None
|
sogou_env = None
|
||||||
cookies_txt = None
|
cookies_txt = None
|
||||||
@ -824,14 +825,15 @@ def script_main(script_name, download, download_playlist = None):
|
|||||||
-o | --output-dir <PATH> Set the output directory for downloaded videos.
|
-o | --output-dir <PATH> Set the output directory for downloaded videos.
|
||||||
-p | --player <PLAYER [options]> Directly play the video with PLAYER like vlc/smplayer.
|
-p | --player <PLAYER [options]> Directly play the video with PLAYER like vlc/smplayer.
|
||||||
-x | --http-proxy <HOST:PORT> Use specific HTTP proxy for downloading.
|
-x | --http-proxy <HOST:PORT> Use specific HTTP proxy for downloading.
|
||||||
|
-y | --extractor-proxy <HOST:PORT> Use specific HTTP proxy for extracting stream data.
|
||||||
--no-proxy Don't use any proxy. (ignore $http_proxy)
|
--no-proxy Don't use any proxy. (ignore $http_proxy)
|
||||||
-S | --sogou Use a Sogou proxy server for downloading.
|
-S | --sogou Use a Sogou proxy server for downloading.
|
||||||
--sogou-proxy <HOST:PORT> Run a standalone Sogou proxy server.
|
--sogou-proxy <HOST:PORT> Run a standalone Sogou proxy server.
|
||||||
--debug Show traceback on KeyboardInterrupt.
|
--debug Show traceback on KeyboardInterrupt.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
short_opts = 'Vhfiuc:nSo:p:x:'
|
short_opts = 'Vhfiuc:nSo:p:x:y:'
|
||||||
opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-merge', 'no-proxy', 'debug', 'sogou', 'output-dir=', 'player=', 'http-proxy=', 'sogou-proxy=', 'sogou-env=']
|
opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-merge', 'no-proxy', 'debug', 'sogou', 'output-dir=', 'player=', 'http-proxy=', 'extractor-proxy=', 'sogou-proxy=', 'sogou-env=']
|
||||||
if download_playlist:
|
if download_playlist:
|
||||||
short_opts = 'l' + short_opts
|
short_opts = 'l' + short_opts
|
||||||
opts = ['playlist'] + opts
|
opts = ['playlist'] + opts
|
||||||
@ -846,6 +848,7 @@ def script_main(script_name, download, download_playlist = None):
|
|||||||
global force
|
global force
|
||||||
global dry_run
|
global dry_run
|
||||||
global player
|
global player
|
||||||
|
global extractor_proxy
|
||||||
global sogou_proxy
|
global sogou_proxy
|
||||||
global sogou_env
|
global sogou_env
|
||||||
global cookies_txt
|
global cookies_txt
|
||||||
@ -856,6 +859,7 @@ def script_main(script_name, download, download_playlist = None):
|
|||||||
merge = True
|
merge = True
|
||||||
output_dir = '.'
|
output_dir = '.'
|
||||||
proxy = None
|
proxy = None
|
||||||
|
extractor_proxy = None
|
||||||
traceback = False
|
traceback = False
|
||||||
for o, a in opts:
|
for o, a in opts:
|
||||||
if o in ('-V', '--version'):
|
if o in ('-V', '--version'):
|
||||||
@ -889,6 +893,8 @@ def script_main(script_name, download, download_playlist = None):
|
|||||||
player = a
|
player = a
|
||||||
elif o in ('-x', '--http-proxy'):
|
elif o in ('-x', '--http-proxy'):
|
||||||
proxy = a
|
proxy = a
|
||||||
|
elif o in ('-y', '--extractor-proxy'):
|
||||||
|
extractor_proxy = a
|
||||||
elif o in ('-S', '--sogou'):
|
elif o in ('-S', '--sogou'):
|
||||||
sogou_proxy = ("0.0.0.0", 0)
|
sogou_proxy = ("0.0.0.0", 0)
|
||||||
elif o in ('--sogou-proxy',):
|
elif o in ('--sogou-proxy',):
|
||||||
@ -924,3 +930,114 @@ def script_main(script_name, download, download_playlist = None):
|
|||||||
raise
|
raise
|
||||||
else:
|
else:
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class VideoExtractor():
|
||||||
|
def __init__(self, *args):
|
||||||
|
self.url = None
|
||||||
|
self.title = None
|
||||||
|
self.vid = None
|
||||||
|
self.streams = {}
|
||||||
|
self.streams_sorted = []
|
||||||
|
|
||||||
|
if args:
|
||||||
|
self.url = args[0]
|
||||||
|
|
||||||
|
def download_by_url(self, url, **kwargs):
|
||||||
|
self.url = url
|
||||||
|
|
||||||
|
self.prepare(**kwargs)
|
||||||
|
|
||||||
|
self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams]
|
||||||
|
|
||||||
|
global extractor_proxy
|
||||||
|
if extractor_proxy:
|
||||||
|
set_proxy(parse_host(extractor_proxy))
|
||||||
|
self.extract(**kwargs)
|
||||||
|
if extractor_proxy:
|
||||||
|
unset_proxy()
|
||||||
|
|
||||||
|
self.download(**kwargs)
|
||||||
|
|
||||||
|
def download_by_vid(self, vid, **kwargs):
|
||||||
|
self.vid = vid
|
||||||
|
|
||||||
|
self.prepare(**kwargs)
|
||||||
|
|
||||||
|
self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams]
|
||||||
|
|
||||||
|
global extractor_proxy
|
||||||
|
if extractor_proxy:
|
||||||
|
set_proxy(parse_host(extractor_proxy))
|
||||||
|
self.extract(**kwargs)
|
||||||
|
if extractor_proxy:
|
||||||
|
unset_proxy()
|
||||||
|
|
||||||
|
self.download(**kwargs)
|
||||||
|
|
||||||
|
def prepare(self, **kwargs):
|
||||||
|
pass
|
||||||
|
#raise NotImplementedError()
|
||||||
|
|
||||||
|
def extract(self, **kwargs):
|
||||||
|
pass
|
||||||
|
#raise NotImplementedError()
|
||||||
|
|
||||||
|
def p_stream(self, stream_id):
|
||||||
|
stream = self.streams[stream_id]
|
||||||
|
print(" - id: \033[7m%s\033[0m" % stream_id)
|
||||||
|
print(" container: %s" % stream['container'])
|
||||||
|
print(" video-profile: %s" % stream['video_profile'])
|
||||||
|
print(" size: %s MiB (%s bytes)" % (round(stream['size'] / 1048576, 1), stream['size']))
|
||||||
|
#print(" # download-with: \033[4myou-get --stream=%s\033[0m" % stream_id)
|
||||||
|
print()
|
||||||
|
|
||||||
|
def p(self, stream_id=None):
|
||||||
|
print("site: %s" % self.__class__.name)
|
||||||
|
print("title: %s" % self.title)
|
||||||
|
if stream_id:
|
||||||
|
# Print the stream
|
||||||
|
print("stream:")
|
||||||
|
self.p_stream(stream_id)
|
||||||
|
|
||||||
|
elif stream_id is None:
|
||||||
|
# Print stream with best quality
|
||||||
|
print("stream: # Best quality")
|
||||||
|
stream_id = self.streams_sorted[0]['id']
|
||||||
|
self.p_stream(stream_id)
|
||||||
|
|
||||||
|
elif stream_id == []:
|
||||||
|
# Print all available streams
|
||||||
|
print("streams: # Available quality and codecs")
|
||||||
|
for stream in self.streams_sorted:
|
||||||
|
self.p_stream(stream['id'])
|
||||||
|
|
||||||
|
def download(self, **kwargs):
|
||||||
|
if 'info_only' in kwargs and kwargs['info_only']:
|
||||||
|
if 'stream_id' in kwargs and kwargs['stream_id']:
|
||||||
|
# Display the stream
|
||||||
|
stream_id = kwargs['stream_id']
|
||||||
|
self.p(stream_id)
|
||||||
|
else:
|
||||||
|
# Display all available streams
|
||||||
|
self.p([])
|
||||||
|
else:
|
||||||
|
if 'stream_id' in kwargs and kwargs['stream_id']:
|
||||||
|
# Download the stream
|
||||||
|
stream_id = kwargs['stream_id']
|
||||||
|
else:
|
||||||
|
# Download stream with the best quality
|
||||||
|
stream_id = self.streams_sorted[0]['id']
|
||||||
|
|
||||||
|
self.p(None)
|
||||||
|
|
||||||
|
urls = self.streams[stream_id]['src']
|
||||||
|
if not urls:
|
||||||
|
log.e('[Failed] Cannot extract video source.')
|
||||||
|
log.e('This is most likely because the video has not been made available in your country.')
|
||||||
|
log.e('You may try to use a proxy via \'-y\' for extracting stream data.')
|
||||||
|
exit(1)
|
||||||
|
download_urls(urls, self.title, self.streams[stream_id]['container'], self.streams[stream_id]['size'], output_dir=kwargs['output_dir'], merge=kwargs['merge'])
|
||||||
|
|
||||||
|
self.__init__()
|
||||||
|
@ -7,7 +7,7 @@ from ..common import *
|
|||||||
from .qq import qq_download_by_id
|
from .qq import qq_download_by_id
|
||||||
from .sina import sina_download_by_vid
|
from .sina import sina_download_by_vid
|
||||||
from .tudou import tudou_download_by_iid
|
from .tudou import tudou_download_by_iid
|
||||||
from .youku import youku_download_by_id
|
from .youku import youku_download_by_vid
|
||||||
|
|
||||||
import json, re
|
import json, re
|
||||||
|
|
||||||
@ -27,7 +27,7 @@ def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only
|
|||||||
if sourceType == 'sina':
|
if sourceType == 'sina':
|
||||||
sina_download_by_vid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
sina_download_by_vid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||||
elif sourceType == 'youku':
|
elif sourceType == 'youku':
|
||||||
youku_download_by_id(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
youku_download_by_vid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||||
elif sourceType == 'tudou':
|
elif sourceType == 'tudou':
|
||||||
tudou_download_by_iid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
tudou_download_by_iid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||||
elif sourceType == 'qq':
|
elif sourceType == 'qq':
|
||||||
|
@ -6,7 +6,7 @@ from ..common import *
|
|||||||
|
|
||||||
from .sina import sina_download_by_vid
|
from .sina import sina_download_by_vid
|
||||||
from .tudou import tudou_download_by_id
|
from .tudou import tudou_download_by_id
|
||||||
from .youku import youku_download_by_id
|
from .youku import youku_download_by_vid
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
@ -91,7 +91,7 @@ def bilibili_download(url, output_dir = '.', merge = True, info_only = False):
|
|||||||
elif t == 'vid':
|
elif t == 'vid':
|
||||||
sina_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
sina_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||||
elif t == 'ykid':
|
elif t == 'ykid':
|
||||||
youku_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
youku_download_by_vid(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||||
elif t == 'uid':
|
elif t == 'uid':
|
||||||
tudou_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
tudou_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||||
else:
|
else:
|
||||||
|
@ -6,7 +6,7 @@ from ..common import *
|
|||||||
|
|
||||||
from .sina import sina_download_by_vid
|
from .sina import sina_download_by_vid
|
||||||
from .tudou import tudou_download_by_id
|
from .tudou import tudou_download_by_id
|
||||||
from .youku import youku_download_by_id
|
from .youku import youku_download_by_vid
|
||||||
|
|
||||||
def miomio_download(url, output_dir = '.', merge = True, info_only = False):
|
def miomio_download(url, output_dir = '.', merge = True, info_only = False):
|
||||||
html = get_html(url)
|
html = get_html(url)
|
||||||
@ -17,7 +17,7 @@ def miomio_download(url, output_dir = '.', merge = True, info_only = False):
|
|||||||
t = r1(r'type=(\w+)', flashvars)
|
t = r1(r'type=(\w+)', flashvars)
|
||||||
id = r1(r'vid=([^"]+)', flashvars)
|
id = r1(r'vid=([^"]+)', flashvars)
|
||||||
if t == 'youku':
|
if t == 'youku':
|
||||||
youku_download_by_id(id, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
youku_download_by_vid(id, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||||
elif t == 'tudou':
|
elif t == 'tudou':
|
||||||
tudou_download_by_id(id, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
tudou_download_by_id(id, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||||
elif t == 'sina':
|
elif t == 'sina':
|
||||||
|
@ -46,8 +46,8 @@ def tudou_download(url, output_dir = '.', merge = True, info_only = False):
|
|||||||
|
|
||||||
vcode = r1(r'vcode\s*[:=]\s*\'([^\']+)\'', html)
|
vcode = r1(r'vcode\s*[:=]\s*\'([^\']+)\'', html)
|
||||||
if vcode:
|
if vcode:
|
||||||
from .youku import youku_download_by_id
|
from .youku import youku_download_by_vid
|
||||||
return youku_download_by_id(vcode, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
return youku_download_by_vid(vcode, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||||
|
|
||||||
iid = r1(r'iid\s*[:=]\s*(\d+)', html)
|
iid = r1(r'iid\s*[:=]\s*(\d+)', html)
|
||||||
if not iid:
|
if not iid:
|
||||||
|
@ -1,222 +1,74 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
__all__ = ['youku_download', 'youku_download_playlist', 'youku_download_by_id']
|
|
||||||
|
|
||||||
from ..common import *
|
from ..common import *
|
||||||
|
|
||||||
import json
|
class Youku(VideoExtractor):
|
||||||
from random import randint
|
name = "优酷 (Youku)"
|
||||||
from time import time
|
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
|
|
||||||
def trim_title(title):
|
stream_types = [
|
||||||
title = title.replace(' - 视频 - 优酷视频 - 在线观看', '')
|
{'id': 'hd3', 'container': 'flv', 'video_profile': '1080P'},
|
||||||
title = title.replace(' - 专辑 - 优酷视频', '')
|
{'id': 'hd2', 'container': 'flv', 'video_profile': '超清'},
|
||||||
title = re.sub(r'—([^—]+)—优酷网,视频高清在线观看', '', title)
|
{'id': 'mp4', 'container': 'mp4', 'video_profile': '高清'},
|
||||||
return title
|
{'id': 'flv', 'container': 'flv', 'video_profile': '标清'},
|
||||||
|
{'id': '3gphd', 'container': '3gp', 'video_profile': '高清(3GP)'},
|
||||||
|
]
|
||||||
|
|
||||||
def find_video_id_from_url(url):
|
def __init__(self, *args):
|
||||||
patterns = [r'^http://v.youku.com/v_show/id_([\w=]+).htm[l]?',
|
super().__init__(args)
|
||||||
r'^http://player.youku.com/player.php/sid/([\w=]+)/v.swf',
|
|
||||||
r'^loader\.swf\?VideoIDS=([\w=]+)',
|
|
||||||
r'^([\w=]+)$']
|
|
||||||
return r1_of(patterns, url)
|
|
||||||
|
|
||||||
def find_video_id_from_show_page(url):
|
def get_vid_from_url(url):
|
||||||
return re.search(r'<a class="btnShow btnplay.*href="([^"]+)"', get_html(url)).group(1)
|
"""Extracts video ID from URL.
|
||||||
|
"""
|
||||||
def youku_url(url):
|
patterns = [
|
||||||
id = find_video_id_from_url(url)
|
'youku.com/v_show/id_([\w=]+)',
|
||||||
if id:
|
'player.youku.com/player.php/sid/([\w=]+)/v.swf',
|
||||||
return 'http://v.youku.com/v_show/id_%s.html' % id
|
'loader\.swf\?VideoIDS=([\w=]+)',
|
||||||
if re.match(r'http://www.youku.com/show_page/id_\w+.html', url):
|
]
|
||||||
return find_video_id_from_show_page(url)
|
matches = match1(url, *patterns)
|
||||||
if re.match(r'http://v.youku.com/v_playlist/\w+.html', url):
|
if matches:
|
||||||
return url
|
return matches[0]
|
||||||
return None
|
|
||||||
|
|
||||||
def parse_video_title(url, page):
|
|
||||||
if re.search(r'v_playlist', url):
|
|
||||||
# if we are playing a viedo from play list, the meta title might be incorrect
|
|
||||||
title = r1_of([r'<div class="show_title" title="([^"]+)">[^<]', r'<title>([^<>]*)</title>'], page)
|
|
||||||
else:
|
|
||||||
title = r1_of([r'<div class="show_title" title="([^"]+)">[^<]', r'<title>([^-]+)—在线播放.*</title>', r'<meta name="title" content="([^"]*)"'], page)
|
|
||||||
assert title
|
|
||||||
title = trim_title(title)
|
|
||||||
if re.search(r'v_playlist', url) and re.search(r'-.*\S+', title):
|
|
||||||
title = re.sub(r'^[^-]+-\s*', '', title) # remove the special name from title for playlist video
|
|
||||||
title = re.sub(r'—专辑:.*', '', title) # remove the special name from title for playlist video
|
|
||||||
title = unescape_html(title)
|
|
||||||
|
|
||||||
subtitle = re.search(r'<span class="subtitle" id="subtitle">([^<>]*)</span>', page)
|
|
||||||
if subtitle:
|
|
||||||
subtitle = subtitle.group(1).strip()
|
|
||||||
if subtitle == title:
|
|
||||||
subtitle = None
|
|
||||||
if subtitle:
|
|
||||||
title += '-' + subtitle
|
|
||||||
return title
|
|
||||||
|
|
||||||
def parse_playlist_title(url, page):
|
|
||||||
if re.search(r'v_playlist', url):
|
|
||||||
# if we are playing a video from play list, the meta title might be incorrect
|
|
||||||
title = re.search(r'<title>([^<>]*)</title>', page).group(1)
|
|
||||||
else:
|
|
||||||
title = re.search(r'<meta name="title" content="([^"]*)"', page).group(1)
|
|
||||||
title = trim_title(title)
|
|
||||||
if re.search(r'v_playlist', url) and re.search(r'-.*\S+', title):
|
|
||||||
title = re.sub(r'^[^-]+-\s*', '', title)
|
|
||||||
title = re.sub(r'^.*—专辑:《(.+)》', r'\1', title)
|
|
||||||
title = unescape_html(title)
|
|
||||||
return title
|
|
||||||
|
|
||||||
def parse_page(url):
|
|
||||||
url = youku_url(url)
|
|
||||||
page = get_html(url)
|
|
||||||
id2 = re.search(r"var\s+videoId2\s*=\s*'(\S+)'", page).group(1)
|
|
||||||
title = parse_video_title(url, page)
|
|
||||||
return id2, title
|
|
||||||
|
|
||||||
def get_info(videoId2):
|
|
||||||
return json.loads(get_html('http://v.youku.com/player/getPlayList/VideoIDS/' + videoId2 + '/timezone/+08/version/5/source/out/Sc/2'))
|
|
||||||
|
|
||||||
def find_video(info, stream_type = None):
|
|
||||||
#key = '%s%x' % (info['data'][0]['key2'], int(info['data'][0]['key1'], 16) ^ 0xA55AA5A5)
|
|
||||||
segs = info['data'][0]['segs']
|
|
||||||
types = segs.keys()
|
|
||||||
if not stream_type:
|
|
||||||
for x in ['hd3', 'hd2', 'mp4', 'flv']:
|
|
||||||
if x in types:
|
|
||||||
stream_type = x
|
|
||||||
break
|
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError()
|
return None
|
||||||
assert stream_type in ('hd3', 'hd2', 'mp4', 'flv')
|
|
||||||
file_type = {'hd3': 'flv', 'hd2': 'flv', 'mp4': 'mp4', 'flv': 'flv'}[stream_type]
|
|
||||||
|
|
||||||
seed = info['data'][0]['seed']
|
def parse_m3u8(m3u8):
|
||||||
source = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\\:._-1234567890")
|
return re.findall(r'(http://[^?]+)\?ts_start=0', m3u8)
|
||||||
mixed = ''
|
|
||||||
while source:
|
|
||||||
seed = (seed * 211 + 30031) & 0xFFFF
|
|
||||||
index = seed * len(source) >> 16
|
|
||||||
c = source.pop(index)
|
|
||||||
mixed += c
|
|
||||||
|
|
||||||
ids = info['data'][0]['streamfileids'][stream_type].split('*')[:-1]
|
def prepare(self, **kwargs):
|
||||||
vid = ''.join(mixed[int(i)] for i in ids)
|
assert self.url or self.vid
|
||||||
|
if self.url and not self.vid:
|
||||||
|
self.vid = __class__.get_vid_from_url(self.url)
|
||||||
|
|
||||||
sid = '%s%s%s' % (int(time() * 1000), randint(1000, 1999), randint(1000, 9999))
|
meta = json.loads(get_html('http://v.youku.com/player/getPlayList/VideoIDS/%s' % self.vid))
|
||||||
|
metadata0 = meta['data'][0]
|
||||||
|
|
||||||
urls = []
|
self.title = metadata0['title']
|
||||||
for s in segs[stream_type]:
|
|
||||||
no = '%02x' % int(s['no'])
|
|
||||||
url = 'http://f.youku.com/player/getFlvPath/sid/%s_%s/st/%s/fileid/%s%s%s?K=%s&ts=%s' % (sid, no, file_type, vid[:8], no.upper(), vid[10:], s['k'], s['seconds'])
|
|
||||||
urls.append((url, int(s['size'])))
|
|
||||||
return urls
|
|
||||||
|
|
||||||
def file_type_of_url(url):
|
for stream_type in self.stream_types:
|
||||||
return str(re.search(r'/st/([^/]+)/', url).group(1))
|
if stream_type['id'] in metadata0['streamsizes']:
|
||||||
|
stream_id = stream_type['id']
|
||||||
|
stream_size = int(metadata0['streamsizes'][stream_id])
|
||||||
|
self.streams[stream_id] = {'container': stream_type['container'], 'video_profile': stream_type['video_profile'], 'size': stream_size}
|
||||||
|
|
||||||
def youku_download_by_id(id, title, output_dir = '.', stream_type = None, merge = True, info_only = False):
|
def extract(self, **kwargs):
|
||||||
# Open Sogou proxy if required
|
if 'stream_id' in kwargs and kwargs['stream_id']:
|
||||||
if get_sogou_proxy() is not None:
|
# Extract the stream
|
||||||
server = sogou_proxy_server(get_sogou_proxy(), ostream=open(os.devnull, 'w'))
|
stream_id = kwargs['stream_id']
|
||||||
server_thread = threading.Thread(target=server.serve_forever)
|
else:
|
||||||
server_thread.daemon = True
|
# Extract stream with the best quality
|
||||||
server_thread.start()
|
stream_id = self.streams_sorted[0]['id']
|
||||||
set_proxy(server.server_address)
|
|
||||||
|
|
||||||
info = get_info(id)
|
m3u8_url = "http://v.youku.com/player/getM3U8/vid/{vid}/type/{stream_id}/video.m3u8".format(vid=self.vid, stream_id=stream_id)
|
||||||
|
m3u8 = get_html(m3u8_url)
|
||||||
|
if not m3u8:
|
||||||
|
log.w('[Warning] This video can only be streamed within Mainland China!')
|
||||||
|
log.w('Use \'-y\' to specify a proxy server for extracting stream data.\n')
|
||||||
|
|
||||||
# Close Sogou proxy if required
|
self.streams[stream_id]['src'] = __class__.parse_m3u8(m3u8)
|
||||||
if get_sogou_proxy() is not None:
|
|
||||||
server.shutdown()
|
|
||||||
unset_proxy()
|
|
||||||
|
|
||||||
urls, sizes = zip(*find_video(info, stream_type))
|
site = Youku()
|
||||||
ext = file_type_of_url(urls[0])
|
download = site.download_by_url
|
||||||
total_size = sum(sizes)
|
download_playlist = playlist_not_supported('youku')
|
||||||
|
|
||||||
print_info(site_info, title, ext, total_size)
|
youku_download_by_vid = site.download_by_vid
|
||||||
if not info_only:
|
# Used by: acfun.py bilibili.py miomio.py tudou.py
|
||||||
download_urls(urls, title, ext, total_size, output_dir, merge = merge)
|
|
||||||
|
|
||||||
def parse_playlist_videos(html):
|
|
||||||
return re.findall(r'id="A_(\w+)"', html)
|
|
||||||
|
|
||||||
def parse_playlist_pages(html):
|
|
||||||
m = re.search(r'<ul class="pages">.*?</ul>', html, flags = re.S)
|
|
||||||
if m:
|
|
||||||
urls = re.findall(r'href="([^"]+)"', m.group())
|
|
||||||
x1, x2, x3 = re.match(r'^(.*page_)(\d+)(_.*)$', urls[-1]).groups()
|
|
||||||
return ['http://v.youku.com%s%s%s?__rt=1&__ro=listShow' % (x1, i, x3) for i in range(2, int(x2) + 1)]
|
|
||||||
else:
|
|
||||||
return []
|
|
||||||
|
|
||||||
def parse_playlist(url):
|
|
||||||
html = get_html(url)
|
|
||||||
video_id = re.search(r"var\s+videoId\s*=\s*'(\d+)'", html).group(1)
|
|
||||||
show_id = re.search(r'var\s+showid\s*=\s*"(\d+)"', html).group(1)
|
|
||||||
list_url = 'http://v.youku.com/v_vpofficiallist/page_1_showid_%s_id_%s.html?__rt=1&__ro=listShow' % (show_id, video_id)
|
|
||||||
html = get_html(list_url)
|
|
||||||
ids = parse_playlist_videos(html)
|
|
||||||
for url in parse_playlist_pages(html):
|
|
||||||
ids.extend(parse_playlist_videos(get_html(url)))
|
|
||||||
return ids
|
|
||||||
|
|
||||||
def parse_vplaylist(url):
|
|
||||||
id = r1_of([r'^http://www.youku.com/playlist_show/id_(\d+)(?:_ascending_\d_mode_pic(?:_page_\d+)?)?.html',
|
|
||||||
r'^http://v.youku.com/v_playlist/f(\d+)o[01]p\d+.html',
|
|
||||||
r'^http://u.youku.com/user_playlist/pid_(\d+)_id_[\w=]+(?:_page_\d+)?.html'],
|
|
||||||
url)
|
|
||||||
assert id, 'not valid vplaylist url: ' + url
|
|
||||||
url = 'http://www.youku.com/playlist_show/id_%s.html' % id
|
|
||||||
n = int(re.search(r'<span class="num">(\d+)</span>', get_html(url)).group(1))
|
|
||||||
return ['http://v.youku.com/v_playlist/f%so0p%s.html' % (id, i) for i in range(n)]
|
|
||||||
|
|
||||||
def youku_download_playlist(url, output_dir='.', merge=True, info_only=False):
|
|
||||||
"""Downloads a Youku playlist.
|
|
||||||
"""
|
|
||||||
|
|
||||||
if re.match(r'http://www.youku.com/playlist_show/id_\d+(?:_ascending_\d_mode_pic(?:_page_\d+)?)?.html', url):
|
|
||||||
ids = parse_vplaylist(url)
|
|
||||||
elif re.match(r'http://v.youku.com/v_playlist/f\d+o[01]p\d+.html', url):
|
|
||||||
ids = parse_vplaylist(url)
|
|
||||||
elif re.match(r'http://u.youku.com/user_playlist/pid_(\d+)_id_[\w=]+(?:_page_\d+)?.html', url):
|
|
||||||
ids = parse_vplaylist(url)
|
|
||||||
elif re.match(r'http://www.youku.com/show_page/id_\w+.html', url):
|
|
||||||
url = find_video_id_from_show_page(url)
|
|
||||||
assert re.match(r'http://v.youku.com/v_show/id_([\w=]+).html', url), 'URL not supported as playlist'
|
|
||||||
ids = parse_playlist(url)
|
|
||||||
else:
|
|
||||||
ids = []
|
|
||||||
assert ids != []
|
|
||||||
|
|
||||||
title = parse_playlist_title(url, get_html(url))
|
|
||||||
title = filenameable(title)
|
|
||||||
output_dir = os.path.join(output_dir, title)
|
|
||||||
|
|
||||||
for i, id in enumerate(ids):
|
|
||||||
print('Processing %s of %s videos...' % (i + 1, len(ids)))
|
|
||||||
try:
|
|
||||||
id, title = parse_page(youku_url(id))
|
|
||||||
youku_download_by_id(id, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
|
||||||
except:
|
|
||||||
continue
|
|
||||||
|
|
||||||
def youku_download(url, output_dir='.', merge=True, info_only=False):
|
|
||||||
"""Downloads Youku videos by URL.
|
|
||||||
"""
|
|
||||||
|
|
||||||
try:
|
|
||||||
youku_download_playlist(url, output_dir=output_dir, merge=merge, info_only=info_only)
|
|
||||||
except:
|
|
||||||
id, title = parse_page(url)
|
|
||||||
youku_download_by_id(id, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
|
|
||||||
|
|
||||||
site_info = "Youku.com"
|
|
||||||
download = youku_download
|
|
||||||
download_playlist = youku_download_playlist
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user