#!/usr/bin/env python # -*- coding: utf-8 -*- from ..common import * from ..extractor import VideoExtractor import base64 import time class Youku(VideoExtractor): name = "优酷 (Youku)" stream_types = [ {'id': 'hd3', 'container': 'flv', 'video_profile': '1080P'}, {'id': 'hd2', 'container': 'flv', 'video_profile': '超清'}, {'id': 'mp4', 'container': 'mp4', 'video_profile': '高清'}, {'id': 'flvhd', 'container': 'flv', 'video_profile': '高清'}, {'id': 'flv', 'container': 'flv', 'video_profile': '标清'}, {'id': '3gphd', 'container': '3gp', 'video_profile': '高清(3GP)'}, ] def generate_ep(vid, ep): f_code_1 = 'becaf9be' f_code_2 = 'bf7e5f01' def trans_e(a, c): f = h = 0 b = list(range(256)) result = '' while h < 256: f = (f + b[h] + ord(a[h % len(a)])) % 256 b[h], b[f] = b[f], b[h] h += 1 q = f = h = 0 while q < len(c): h = (h + 1) % 256 f = (f + b[h]) % 256 b[h], b[f] = b[f], b[h] if isinstance(c[q], int): result += chr(c[q] ^ b[(b[h] + b[f]) % 256]) else: result += chr(ord(c[q]) ^ b[(b[h] + b[f]) % 256]) q += 1 return result e_code = trans_e(f_code_1, base64.b64decode(bytes(ep, 'ascii'))) sid, token = e_code.split('_') new_ep = trans_e(f_code_2, '%s_%s_%s' % (sid, vid, token)) return base64.b64encode(bytes(new_ep, 'latin')), sid, token def parse_m3u8(m3u8): return re.findall(r'(http://[^?]+)\?ts_start=0', m3u8) def get_vid_from_url(url): """Extracts video ID from URL. """ return match1(url, r'youku\.com/v_show/id_([a-zA-Z0-9=]+)') or \ match1(url, r'player\.youku\.com/player\.php/sid/([a-zA-Z0-9=]+)/v\.swf') or \ match1(url, r'loader\.swf\?VideoIDS=([a-zA-Z0-9=]+)') or \ match1(url, r'player\.youku\.com/embed/([a-zA-Z0-9=]+)') def get_playlist_id_from_url(url): """Extracts playlist ID from URL. """ return match1(url, r'youku\.com/playlist_show/id_([a-zA-Z0-9=]+)') def download_playlist_by_url(self, url, **kwargs): self.url = url playlist_id = self.__class__.get_playlist_id_from_url(self.url) if playlist_id is None: log.wtf('[Failed] Unsupported URL pattern.') video_page = get_content('http://www.youku.com/playlist_show/id_%s' % playlist_id) videos = set(re.findall(r'href="(http://v\.youku\.com/[^?"]+)', video_page)) self.title = re.search(r'