From 127318d5d0ec050c48a1bc9f8b40ce55e65bc596 Mon Sep 17 00:00:00 2001 From: lh <5linhua5@gmail.com> Date: Thu, 3 Dec 2015 03:49:07 +0800 Subject: [PATCH] support full length of youku's video --- src/you_get/extractors/youku.py | 120 +++++++++++++++++++++++++------- 1 file changed, 93 insertions(+), 27 deletions(-) diff --git a/src/you_get/extractors/youku.py b/src/you_get/extractors/youku.py index 006e5a72..ca565b53 100644 --- a/src/you_get/extractors/youku.py +++ b/src/you_get/extractors/youku.py @@ -7,6 +7,9 @@ from ..extractor import VideoExtractor import base64 import time import traceback +import urllib.parse +import math +import pdb class Youku(VideoExtractor): name = "优酷 (Youku)" @@ -21,11 +24,32 @@ class Youku(VideoExtractor): {'id': 'mp4', 'container': 'mp4', 'video_profile': '高清'}, {'id': 'flvhd', 'container': 'flv', 'video_profile': '标清'}, {'id': 'flv', 'container': 'flv', 'video_profile': '标清'}, - {'id': '3gphd', 'container': '3gp', 'video_profile': '标清(3GP)'}, + {'id': '3gphd', 'container': 'mp4', 'video_profile': '标清(3GP)'}, ] - def generate_ep(vid, ep): - f_code_1 = 'becaf9be' + + def trans_e(a, c): + f = h = 0 + b = list(range(256)) + result = '' + while h < 256: + f = (f + b[h] + ord(a[h % len(a)])) % 256 + b[h], b[f] = b[f], b[h] + h += 1 + q = f = h = 0 + while q < len(c): + h = (h + 1) % 256 + f = (f + b[h]) % 256 + b[h], b[f] = b[f], b[h] + if isinstance(c[q], int): + result += chr(c[q] ^ b[(b[h] + b[f]) % 256]) + else: + result += chr(ord(c[q]) ^ b[(b[h] + b[f]) % 256]) + q += 1 + + return result + + def generate_ep(no,streamfileids,sid,token): f_code_2 = 'bf7e5f01' def trans_e(a, c): @@ -49,13 +73,17 @@ class Youku(VideoExtractor): return result - e_code = trans_e(f_code_1, base64.b64decode(bytes(ep, 'ascii'))) - sid, token = e_code.split('_') - new_ep = trans_e(f_code_2, '%s_%s_%s' % (sid, vid, token)) - return base64.b64encode(bytes(new_ep, 'latin')), sid, token + number = hex(int(str(no),10))[2:].upper() + if len(number) == 1: + number = '0' + number + fileId = streamfileids[0:8] + number + streamfileids[10:] + + ep = urllib.parse.quote(base64.b64encode(''.join(trans_e(f_code_2,sid+'_'+fileId+'_'+token)).encode('latin1')),safe='~()*!.\'') + return fileId,ep def parse_m3u8(m3u8): - return re.findall(r'(http://[^?]+)\?ts_start=0', m3u8) + return re.findall('(http://[^\r]+)\r',m3u8) +# return re.findall(r'(http://[^?]+)\?ts_start=0', m3u8) def get_vid_from_url(url): """Extracts video ID from URL. @@ -102,6 +130,7 @@ class Youku(VideoExtractor): traceback.print_exception(exc_type, exc_value, exc_traceback) def prepare(self, **kwargs): + self.streams_parameter = {} assert self.url or self.vid if self.url and not self.vid: @@ -112,9 +141,12 @@ class Youku(VideoExtractor): exit(0) api_url = 'http://play.youku.com/play/get.json?vid=%s&ct=12' % self.vid + api_url1 = 'http://play.youku.com/play/get.json?vid=%s&ct=10' % self.vid try: meta = json.loads(get_html(api_url)) + meta1 = json.loads(get_html(api_url1)) data = meta['data'] + data1 = meta1['data'] assert 'stream' in data except: if 'error' in data: @@ -123,7 +155,10 @@ class Youku(VideoExtractor): self.password_protected = True self.password = input(log.sprint('Password: ', log.YELLOW)) api_url += '&pwd={}'.format(self.password) + api_url1 += '&pwd={}'.format(self.password) + meta1 = json.loads(get_html(api_url1)) meta = json.loads(get_html(api_url)) + data1 = meta1['data'] data = meta['data'] else: log.wtf('[Failed] ' + data['error']['note']) @@ -135,6 +170,18 @@ class Youku(VideoExtractor): self.ip = data['security']['ip'] stream_types = dict([(i['id'], i) for i in self.stream_types]) + + for stream in data1['stream']: + stream_id = stream['stream_type'] + if stream_id in stream_types: + if 'alias-of' in stream_types[stream_id]: + stream_id = stream_types[stream_id]['alias-of'] + if stream_id not in self.streams_parameter: + self.streams_parameter[stream_id] = { + 'fileid': stream['stream_fileid'], + 'segs': stream['segs'] + } + for stream in data['stream']: stream_id = stream['stream_type'] if stream_id in stream_types: @@ -145,6 +192,11 @@ class Youku(VideoExtractor): 'video_profile': stream_types[stream_id]['video_profile'], 'size': stream['size'] } + if stream_id not in self.streams_parameter: + self.streams_parameter[stream_id] = { + 'fileid': stream['stream_fileid'], + 'segs': stream['segs'] + } # Audio languages if 'dvd' in data and 'audiolang' in data['dvd']: @@ -165,31 +217,45 @@ class Youku(VideoExtractor): # Extract stream with the best quality stream_id = self.streams_sorted[0]['id'] - new_ep, sid, token = self.__class__.generate_ep(self.vid, self.ep) - m3u8_query = parse.urlencode(dict( - ctype=12, - ep=new_ep, - ev=1, - keyframe=1, - oip=self.ip, - sid=sid, - token=token, - ts=int(time.time()), - type=stream_id, - vid=self.vid, - )) - m3u8_url = 'http://pl.youku.com/playlist/m3u8?' + m3u8_query + f_code_1 = 'becaf9be' + e_code = self.__class__.trans_e(f_code_1, base64.b64decode(bytes(self.ep, 'ascii'))) + + sid, token = e_code.split('_') + + m3u8 = '' + segs = self.streams_parameter[stream_id]['segs'] + streamfileid = self.streams_parameter[stream_id]['fileid'] + for no in range(0,len(segs)): + k = segs[no]['key'] + if k == -1: + log.e('Error') + exit() + fileId,ep = self.__class__.generate_ep(no,streamfileid ,sid,token) +# pdb.set_trace() + m3u8 += 'http://k.youku.com/player/getFlvPath/sid/'+ sid + m3u8+='_00/st/'+ self.streams[stream_id]['container'] + m3u8+='/fileid/'+ fileId + m3u8+='?K='+ k + m3u8+='&ctype=12&ev=1&token='+ token + m3u8+='&oip='+ str(self.ip) + m3u8+='&ep='+ ep+'\r\n' if not kwargs['info_only']: - if self.password_protected: - m3u8_url += '&password={}'.format(self.password) - - m3u8 = get_html(m3u8_url) - self.streams[stream_id]['src'] = self.__class__.parse_m3u8(m3u8) if not self.streams[stream_id]['src'] and self.password_protected: log.e('[Failed] Wrong password.') + +# if not kwargs['info_only']: +# if self.password_protected: +# m3u8_url += '&password={}'.format(self.password) +# +# m3u8 = get_html(m3u8_url) +# +# self.streams[stream_id]['src'] = self.__class__.parse_m3u8(m3u8) +# if not self.streams[stream_id]['src'] and self.password_protected: +# log.e('[Failed] Wrong password.') + site = Youku() download = site.download_by_url download_playlist = site.download_playlist_by_url