support full length of youku's videos

This commit is contained in:
lh 2015-11-30 16:11:50 +08:00
parent d42950f63d
commit d27d2f876d
3 changed files with 75 additions and 6042 deletions

File diff suppressed because it is too large Load Diff

View File

@ -1,149 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__author__ = 'johnx'
__date__ = '6/18/14 10:56 AM'
import time
import urllib
import base64
import pdb
#import requests
def wget(url, **kwargs):
kwargs.setdefault('timeout', 30)
headers = DEFAULT_HEADERS.copy()
headers.update(kwargs.get('headers', {}))
kwargs['headers'] = headers
return requests.get(url, **kwargs).content
def wget2(url, type_=None, **kwargs):
content = wget(url)
if type_ == 'json':
return json.loads(content, **kwargs)
return content
def trans_e(a, c):
b = range(256)
f = 0
result = ''
h = 0
while h < 256:
f = (f + b[h] + ord(a[h % len(a)])) % 256
b[h], b[f] = b[f], b[h]
h += 1
q = f = h = 0
while q < len(c):
h = (h + 1) % 256
f = (f + b[h]) % 256
b[h], b[f] = b[f], b[h]
result += chr(ord(c[q]) ^ b[(b[h] + b[f]) % 256])
q += 1
return result
def trans_f(a, c):
"""
:argument a: list
:param c:
:return:
"""
b = []
for f in range(len(a)):
i = ord(a[f][0]) - 97 if "a" <= a[f] <= "z" else int(a[f]) + 26
e = 0
while e < 36:
if c[e] == i:
i = e
break
e += 1
v = i - 26 if i > 25 else chr(i + 97)
b.append(str(v))
return ''.join(b)
# array_1 = [
# 19, 1, 4, 7, 30, 14, 28, 8, 24, 17, 6, 35,
# 34, 16, 9, 10, 13, 22, 32, 29, 31, 21, 18,
# 3, 2, 23, 25, 27, 11, 20, 5, 15, 12, 0, 33, 26
# ]
# array_2 = [
# 19, 1, 4, 7, 30, 14, 28, 8, 24, 17,
# 6, 35, 34, 16, 9, 10, 13, 22, 32, 29,
# 31, 21, 18, 3, 2, 23, 25, 27, 11, 20,
# 5, 15, 12, 0, 33, 26
# ]
# code_1 = 'b4eto0b4'
# code_2 = 'boa4poz1'
# f_code_1 = trans_f(code_1, array_1)
# f_code_2 = trans_f(code_2, array_2)
f_code_1 = 'becaf9be'
f_code_2 = 'bf7e5f01'
# print `trans_e(f_code_1, trans_na('NgXQTQ0fJr7d0vHA8OJxA4nz6xJs1wnJXx8='))`
def parse(seed, ):
sl = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890"
seed = float(seed)
while sl:
seed = (seed * 211 + 30031) % 65536
idx = int(seed / 65536 * len(sl))
yield sl[idx]
sl = sl[:idx] + sl[idx+1:]
def parse2(file_id, seed):
mix = ''.join(parse(seed))
return ''.join(mix[int(idx)] for idx in file_id[:-1].split('*'))
def calc_ep2(vid, ep):
e_code = trans_e(f_code_1, base64.b64decode(ep))
sid, token = e_code.split('_')
new_ep = trans_e(f_code_2, '%s_%s_%s' % (sid, vid, token))
return base64.b64encode(new_ep), token, sid
def test2(evid):
pdb.set_trace()
base_url = 'http://v.youku.com/player/getPlayList/VideoIDS/%s/Pf/4/ctype/12/ev/1'
json = wget2(base_url % evid, 'json')
data = json['data'][0]
file_ids = data['streamfileids']
seed = data['seed']
video_id = data['videoid']
for type_, file_id in file_ids.items():
if type_ != 'mp4':
continue
if '*' in file_id:
file_id = file_ids[type_] = parse2(file_id, seed)
# print '%s: %s' % (type_, file_id)
new_ep, token, sid = calc_ep2(video_id, data['ep'])
# print new_ep, token, sid
query = urllib.urlencode(dict(
vid=video_id, ts=int(time.time()), keyframe=1, type=type_,
ep=new_ep, oip=data['ip'], ctype=12, ev=1, token=token, sid=sid,
))
url = 'http://pl.youku.com/playlist/m3u8?' + query
# print
# print url
# print wget2(url)
test2('XNzI2MjY2MTAw')

View File

@ -6,27 +6,16 @@ from ..extractor import VideoExtractor
import base64 import base64
import time import time
<<<<<<< HEAD import traceback
import urllib.parse import urllib.parse
import math import math
import pdb import pdb
=======
import traceback
>>>>>>> 370b183d816ebd4b56fc176a4fdad52a8188f7a8
class Youku(VideoExtractor): class Youku(VideoExtractor):
name = "优酷 (Youku)" name = "优酷 (Youku)"
# Last updated: 2015-11-24 # Last updated: 2015-11-24
stream_types = [ stream_types = [
<<<<<<< HEAD
{'id': 'hd3', 'container': 'flv', 'video_profile': '1080P'},
{'id': 'hd2', 'container': 'flv', 'video_profile': '超清'},
{'id': 'mp4', 'container': 'mp4', 'video_profile': '高清'},
{'id': 'flvhd', 'container': 'flv', 'video_profile': '高清'},
{'id': 'flv', 'container': 'flv', 'video_profile': '标清'},
{'id': '3gphd', 'container': 'mp4', 'video_profile': '高清3GP'},
=======
{'id': 'mp4hd3', 'alias-of' : 'hd3'}, {'id': 'mp4hd3', 'alias-of' : 'hd3'},
{'id': 'hd3', 'container': 'flv', 'video_profile': '1080P'}, {'id': 'hd3', 'container': 'flv', 'video_profile': '1080P'},
{'id': 'mp4hd2', 'alias-of' : 'hd2'}, {'id': 'mp4hd2', 'alias-of' : 'hd2'},
@ -35,85 +24,10 @@ class Youku(VideoExtractor):
{'id': 'mp4', 'container': 'mp4', 'video_profile': '高清'}, {'id': 'mp4', 'container': 'mp4', 'video_profile': '高清'},
{'id': 'flvhd', 'container': 'flv', 'video_profile': '标清'}, {'id': 'flvhd', 'container': 'flv', 'video_profile': '标清'},
{'id': 'flv', 'container': 'flv', 'video_profile': '标清'}, {'id': 'flv', 'container': 'flv', 'video_profile': '标清'},
{'id': '3gphd', 'container': '3gp', 'video_profile': '标清3GP'}, {'id': '3gphd', 'container': 'mp4', 'video_profile': '标清3GP'},
>>>>>>> 370b183d816ebd4b56fc176a4fdad52a8188f7a8
] ]
#{'id': '3gphd', 'container': '3gp', 'video_profile': '高清3GP'},
def trans_e(a, c):
f = h = 0
b = list(range(256))
result = ''
while h < 256:
f = (f + b[h] + ord(a[h % len(a)])) % 256
b[h], b[f] = b[f], b[h]
h += 1
q = f = h = 0
while q < len(c):
h = (h + 1) % 256
f = (f + b[h]) % 256
b[h], b[f] = b[f], b[h]
if isinstance(c[q], int):
result += chr(c[q] ^ b[(b[h] + b[f]) % 256])
else:
result += chr(ord(c[q]) ^ b[(b[h] + b[f]) % 256])
q += 1
return result
def generate_ep_prepare(streamfileids,seed,ep): #execute once
f_code_1 = 'becaf9be'
def trans_e(a, c):
f = h = 0
b = list(range(256))
result = ''
while h < 256:
f = (f + b[h] + ord(a[h % len(a)])) % 256
b[h], b[f] = b[f], b[h]
h += 1
q = f = h = 0
while q < len(c):
h = (h + 1) % 256
f = (f + b[h]) % 256
b[h], b[f] = b[f], b[h]
if isinstance(c[q], int):
result += chr(c[q] ^ b[(b[h] + b[f]) % 256])
else:
result += chr(ord(c[q]) ^ b[(b[h] + b[f]) % 256])
q += 1
return result
def getFileIdMixed(seed):
mixed=[]
source = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOP' +'QRSTUVWXYZ/\\:._-1234567890'
len1 = len(source)
for i in range(0,len1):
seed = (seed * 211 + 30031) % 65536;
index = math.floor(seed / 65536 * len(source));
mixed += source[index]
source = source.replace(source[index], '');
return mixed
def getFileId(fileId,seed):
mixed = getFileIdMixed(seed)
ids = fileId.split('*')
len1 = len(ids) - 1
realId = ''
for i in range(0,len1):
idx = int(ids[i])
realId += mixed[idx]
return realId
e_code = trans_e(f_code_1, base64.b64decode(bytes(ep, 'ascii')))
sid, token = e_code.split('_')
fileId0 = getFileId(streamfileids, seed)
return fileId0,sid,token
def generate_ep(no,fileId0,sid,token):
def trans_e(a, c): def trans_e(a, c):
f = h = 0 f = h = 0
b = list(range(256)) b = list(range(256))
@ -135,18 +49,41 @@ class Youku(VideoExtractor):
return result return result
def generate_ep(no,streamfileids,sid,token):
f_code_2 = 'bf7e5f01' f_code_2 = 'bf7e5f01'
def trans_e(a, c):
f = h = 0
b = list(range(256))
result = ''
while h < 256:
f = (f + b[h] + ord(a[h % len(a)])) % 256
b[h], b[f] = b[f], b[h]
h += 1
q = f = h = 0
while q < len(c):
h = (h + 1) % 256
f = (f + b[h]) % 256
b[h], b[f] = b[f], b[h]
if isinstance(c[q], int):
result += chr(c[q] ^ b[(b[h] + b[f]) % 256])
else:
result += chr(ord(c[q]) ^ b[(b[h] + b[f]) % 256])
q += 1
return result
number = hex(int(str(no),10))[2:].upper() number = hex(int(str(no),10))[2:].upper()
if len(number) == 1: if len(number) == 1:
number = '0' + number number = '0' + number
fileId = fileId0[0:8] + number + fileId0[10:] fileId = streamfileids[0:8] + number + streamfileids[10:]
ep = urllib.parse.quote(base64.b64encode(''.join(trans_e(f_code_2,sid+'_'+fileId+'_'+token)).encode('latin1')),safe='~()*!.\'') ep = urllib.parse.quote(base64.b64encode(''.join(trans_e(f_code_2,sid+'_'+fileId+'_'+token)).encode('latin1')),safe='~()*!.\'')
return fileId,ep return fileId,ep
def parse_m3u8(m3u8): def parse_m3u8(m3u8):
return re.findall('(http://[^\r]+)\r',m3u8) return re.findall('(http://[^\r]+)\r',m3u8)
# return re.findall(r'(http://[^?]+)\?ts_start=0', m3u8)
def get_vid_from_url(url): def get_vid_from_url(url):
"""Extracts video ID from URL. """Extracts video ID from URL.
@ -193,6 +130,7 @@ class Youku(VideoExtractor):
traceback.print_exception(exc_type, exc_value, exc_traceback) traceback.print_exception(exc_type, exc_value, exc_traceback)
def prepare(self, **kwargs): def prepare(self, **kwargs):
self.streams_parameter = {}
assert self.url or self.vid assert self.url or self.vid
if self.url and not self.vid: if self.url and not self.vid:
@ -202,44 +140,13 @@ class Youku(VideoExtractor):
self.download_playlist_by_url(self.url, **kwargs) self.download_playlist_by_url(self.url, **kwargs)
exit(0) exit(0)
<<<<<<< HEAD
meta = json.loads(get_html('http://v.youku.com/player/getPlayList/VideoIDS/%s/Pf/4/ctype/12/ev/1' % self.vid))
if not meta['data']:
log.wtf('[Failed] Video not found.')
metadata0 = meta['data'][0]
if 'error_code' in metadata0 and metadata0['error_code']:
if metadata0['error_code'] == -6:
log.w('[Warning] This video is password protected.')
self.password_protected = True
password = input(log.sprint('Password: ', log.YELLOW))
meta = json.loads(get_html('http://v.youku.com/player/getPlayList/VideoIDS/%s/Pf/4/ctype/12/ev/1/password/' % self.vid + password))
if not meta['data']:
log.wtf('[Failed] Video not found.')
metadata0 = meta['data'][0]
if 'error_code' in metadata0 and metadata0['error_code']:
if metadata0['error_code'] == -8 or metadata0['error_code'] == -26:
log.w('[Warning] This video can only be streamed within Mainland China!')
log.w('Use \'-y\' to specify a proxy server for extracting stream data.\n')
else:
log.w(metadata0['error'])
self.title = metadata0['title']
self.metadata = metadata0
self.ep = metadata0['ep']
self.ip = metadata0['ip']
##
self.seed = metadata0['seed']
##
if 'dvd' in metadata0 and 'audiolang' in metadata0['dvd']:
self.audiolang = metadata0['dvd']['audiolang']
=======
api_url = 'http://play.youku.com/play/get.json?vid=%s&ct=12' % self.vid api_url = 'http://play.youku.com/play/get.json?vid=%s&ct=12' % self.vid
api_url1 = 'http://play.youku.com/play/get.json?vid=%s&ct=10' % self.vid
try: try:
meta = json.loads(get_html(api_url)) meta = json.loads(get_html(api_url))
meta1 = json.loads(get_html(api_url1))
data = meta['data'] data = meta['data']
data1 = meta1['data']
assert 'stream' in data assert 'stream' in data
except: except:
if 'error' in data: if 'error' in data:
@ -248,7 +155,10 @@ class Youku(VideoExtractor):
self.password_protected = True self.password_protected = True
self.password = input(log.sprint('Password: ', log.YELLOW)) self.password = input(log.sprint('Password: ', log.YELLOW))
api_url += '&pwd={}'.format(self.password) api_url += '&pwd={}'.format(self.password)
api_url1 += '&pwd={}'.format(self.password)
meta1 = json.loads(get_html(api_url1))
meta = json.loads(get_html(api_url)) meta = json.loads(get_html(api_url))
data1 = meta1['data']
data = meta['data'] data = meta['data']
else: else:
log.wtf('[Failed] ' + data['error']['note']) log.wtf('[Failed] ' + data['error']['note'])
@ -260,6 +170,18 @@ class Youku(VideoExtractor):
self.ip = data['security']['ip'] self.ip = data['security']['ip']
stream_types = dict([(i['id'], i) for i in self.stream_types]) stream_types = dict([(i['id'], i) for i in self.stream_types])
for stream in data1['stream']:
stream_id = stream['stream_type']
if stream_id in stream_types:
if 'alias-of' in stream_types[stream_id]:
stream_id = stream_types[stream_id]['alias-of']
if stream_id not in self.streams_parameter:
self.streams_parameter[stream_id] = {
'fileid': stream['stream_fileid'],
'segs': stream['segs']
}
for stream in data['stream']: for stream in data['stream']:
stream_id = stream['stream_type'] stream_id = stream['stream_type']
if stream_id in stream_types: if stream_id in stream_types:
@ -270,11 +192,15 @@ class Youku(VideoExtractor):
'video_profile': stream_types[stream_id]['video_profile'], 'video_profile': stream_types[stream_id]['video_profile'],
'size': stream['size'] 'size': stream['size']
} }
if stream_id not in self.streams_parameter:
self.streams_parameter[stream_id] = {
'fileid': stream['stream_fileid'],
'segs': stream['segs']
}
# Audio languages # Audio languages
if 'dvd' in data and 'audiolang' in data['dvd']: if 'dvd' in data and 'audiolang' in data['dvd']:
self.audiolang = data['dvd']['audiolang'] self.audiolang = data['dvd']['audiolang']
>>>>>>> 370b183d816ebd4b56fc176a4fdad52a8188f7a8
for i in self.audiolang: for i in self.audiolang:
i['url'] = 'http://v.youku.com/v_show/id_{}'.format(i['vid']) i['url'] = 'http://v.youku.com/v_show/id_{}'.format(i['vid'])
@ -291,22 +217,23 @@ class Youku(VideoExtractor):
# Extract stream with the best quality # Extract stream with the best quality
stream_id = self.streams_sorted[0]['id'] stream_id = self.streams_sorted[0]['id']
container = self.streams[stream_id]['container'] f_code_1 = 'becaf9be'
self.streamfileids = self.metadata['streamfileids'][stream_id] e_code = self.__class__.trans_e(f_code_1, base64.b64decode(bytes(self.ep, 'ascii')))
sid, token = e_code.split('_')
fileId0,sid,token = self.__class__.generate_ep_prepare(self.streamfileids,self.seed,self.ep)
m3u8 = '' m3u8 = ''
stream_list=self.metadata['segs'][stream_id] segs = self.streams_parameter[stream_id]['segs']
for nu in range(0,len(stream_list)): streamfileid = self.streams_parameter[stream_id]['fileid']
k = stream_list[nu]['k'] for no in range(0,len(segs)):
k = segs[no]['key']
if k == -1: if k == -1:
log.e('Error') log.e('Error')
exit() exit()
no = stream_list[nu]['no'] fileId,ep = self.__class__.generate_ep(no,streamfileid ,sid,token)
fileId,ep = self.__class__.generate_ep(no,fileId0,sid,token) # pdb.set_trace()
#pdb.set_trace()
m3u8 += 'http://k.youku.com/player/getFlvPath/sid/'+ sid m3u8 += 'http://k.youku.com/player/getFlvPath/sid/'+ sid
m3u8+='_00/st/'+ container m3u8+='_00/st/'+ self.streams[stream_id]['container']
m3u8+='/fileid/'+ fileId m3u8+='/fileid/'+ fileId
m3u8+='?K='+ k m3u8+='?K='+ k
m3u8+='&ctype=12&ev=1&token='+ token m3u8+='&ctype=12&ev=1&token='+ token
@ -314,18 +241,21 @@ class Youku(VideoExtractor):
m3u8+='&ep='+ ep+'\r\n' m3u8+='&ep='+ ep+'\r\n'
if not kwargs['info_only']: if not kwargs['info_only']:
<<<<<<< HEAD
=======
if self.password_protected:
m3u8_url += '&password={}'.format(self.password)
m3u8 = get_html(m3u8_url)
>>>>>>> 370b183d816ebd4b56fc176a4fdad52a8188f7a8
self.streams[stream_id]['src'] = self.__class__.parse_m3u8(m3u8) self.streams[stream_id]['src'] = self.__class__.parse_m3u8(m3u8)
if not self.streams[stream_id]['src'] and self.password_protected: if not self.streams[stream_id]['src'] and self.password_protected:
log.e('[Failed] Wrong password.') log.e('[Failed] Wrong password.')
# if not kwargs['info_only']:
# if self.password_protected:
# m3u8_url += '&password={}'.format(self.password)
#
# m3u8 = get_html(m3u8_url)
#
# self.streams[stream_id]['src'] = self.__class__.parse_m3u8(m3u8)
# if not self.streams[stream_id]['src'] and self.password_protected:
# log.e('[Failed] Wrong password.')
site = Youku() site = Youku()
download = site.download_by_url download = site.download_by_url
download_playlist = site.download_playlist_by_url download_playlist = site.download_playlist_by_url