From 9c8d8b0023d8399e047de0ab503e29f12881118b Mon Sep 17 00:00:00 2001 From: Zhang Ning Date: Sat, 25 Jun 2016 15:11:43 +0800 Subject: [PATCH 01/19] iqiyi: use html5 to fix #1211 , but lose some stream quality thanks @yan12125 Signed-off-by: Zhang Ning --- src/you_get/extractors/iqiyi.py | 51 ++++++++--------- src/you_get/extractors/iqiyi_sc.py | 92 ++++++++++++++++++++++++++++++ 2 files changed, 115 insertions(+), 28 deletions(-) create mode 100644 src/you_get/extractors/iqiyi_sc.py diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py index 7431b91f..320520fa 100644 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -9,6 +9,10 @@ from math import floor from zlib import decompress import hashlib +import time + +from .iqiyi_sc import gen_sc + ''' Changelog: -> http://www.iqiyi.com/common/flashplayer/20150916/MainPlayer_5_2_28_c3_3_7_4.swf @@ -43,6 +47,7 @@ bid meaning for quality 10 4k 96 topspeed +''' ''' def mix(tvid): salt = '4a1caba4b4465345366f28da7c117d20' @@ -75,42 +80,32 @@ def getDispathKey(rid): time=json.loads(get_content("http://data.video.qiyi.com/t?tn="+str(random())))["t"] t=str(int(floor(int(time)/(10*60.0)))) return hashlib.new("md5",bytes(t+tp+rid,"utf-8")).hexdigest() +''' class Iqiyi(VideoExtractor): name = "爱奇艺 (Iqiyi)" stream_types = [ - {'id': '4k', 'container': 'f4v', 'video_profile': '4K'}, - {'id': 'fullhd', 'container': 'f4v', 'video_profile': '全高清'}, - {'id': 'suprt-high', 'container': 'f4v', 'video_profile': '超高清'}, - {'id': 'super', 'container': 'f4v', 'video_profile': '超清'}, - {'id': 'high', 'container': 'f4v', 'video_profile': '高清'}, - {'id': 'standard', 'container': 'f4v', 'video_profile': '标清'}, - {'id': 'topspeed', 'container': 'f4v', 'video_profile': '最差'}, + {'id': 'high', 'container': 'mp4', 'video_profile': '高清'}, + {'id': 'standard', 'container': 'mp4', 'video_profile': '标清'}, ] + supported_stream_types = [ 'high', 'standard'] + + stream_to_bid = { '4k': 10, 'fullhd' : 5, 'suprt-high' : 4, 'super' : 3, 'high' : 2, 'standard' :1, 'topspeed' :96} - stream_urls = { '4k': [] , 'fullhd' : [], 'suprt-high' : [], 'super' : [], 'high' : [], 'standard' :[], 'topspeed' :[]} - - baseurl = '' - - gen_uid = '' - def getVMS(self): + def getVMS(self,rate): #tm ->the flash run time for md5 usage #um -> vip 1 normal 0 #authkey -> for password protected video ,replace '' with your password #puid user.passportid may empty? #TODO: support password protected video tvid, vid = self.vid - tm, sc, src = mix(tvid) - uid = self.gen_uid - vmsreq='http://cache.video.qiyi.com/vms?key=fvip&src=1702633101b340d8917a69cf8a4b8c7' +\ - "&tvId="+tvid+"&vid="+vid+"&vinfo=1&tm="+tm+\ - "&enc="+sc+\ - "&qyid="+uid+"&tn="+str(random()) +"&um=1" +\ - "&authkey="+hashlib.new('md5',bytes(hashlib.new('md5', b'').hexdigest()+str(tm)+tvid,'utf-8')).hexdigest() - return json.loads(get_content(vmsreq)) + t = int(time.time() * 1000) + sc = gen_sc(tvid, t).decode('utf-8') + vmsreq= 'http://cache.m.iqiyi.com/jp/tmts/{}/{}/?platForm=h5&rate={}&tvid={}&vid={}&cupid=qc_100001_100186&type=mp4&olimit=0&agenttype=13&src=d846d0c32d664d32b6b54ea48997a589&sc={}&t={}&__jsT=null'.format(tvid, vid, rate, tvid, vid, sc, t - 7) + return json.loads(get_content(vmsreq)[13:]) def download_playlist_by_url(self, url, **kwargs): self.url = url @@ -134,13 +129,12 @@ class Iqiyi(VideoExtractor): r1(r'data-player-videoid="([^"]+)"', html) self.vid = (tvid, videoid) - self.gen_uid = uuid4().hex - try: - info = self.getVMS() - except: - self.download_playlist_by_url(self.url, **kwargs) - exit(0) - + for stream in self.supported_stream_types: + info = self.getVMS(self.stream_to_bid[stream]) + if info["code"] == "A00000": + self.title = info['data']['playInfo']['vn'] + self.streams[stream] = {'container': 'mp4', 'video_profile': stream, 'src' : [info['data']['m3u']], 'size' : url_size(info['data']['m3u'])} +''' if info["code"] != "A000000": log.e("[error] outdated iQIYI key") log.wtf("is your you-get up-to-date?") @@ -208,6 +202,7 @@ class Iqiyi(VideoExtractor): #because the url is generated before start downloading #and the key may be expired after 10 minutes self.streams[stream_id]['src'] = urls +''' site = Iqiyi() download = site.download_by_url diff --git a/src/you_get/extractors/iqiyi_sc.py b/src/you_get/extractors/iqiyi_sc.py new file mode 100644 index 00000000..4fa4ccdf --- /dev/null +++ b/src/you_get/extractors/iqiyi_sc.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python +import binascii +import math +import time + +M = [1732584193, -271733879] +M.extend([~M[0], ~M[1]]) +I_table = [7, 12, 17, 22, 5, 9, 14, 20, 4, 11, 16, 23, 6, 10, 15, 21] +C_base = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8388608, 432] + + +def L(n, t): + if t is None: + t = 0 + return trunc(((n >> 1) + (t >> 1) << 1) + (n & 1) + (t & 1)) + + +def rshift(val, n): + return val >> n if val >= 0 else (val+0x100000000) >> n + + +def trunc(n): + n = n % 0x100000000 + if n > 0x7fffffff: + n -= 0x100000000 + return n + + +def gen_sc(tvid, Z): + def transform(string, mod): + num = int(string, 16) + return (num >> 8 * (i % 4) & 255 ^ i % mod) << ((a & 3) << 3) + + C = list(C_base) + o = list(M) + k = str(Z - 7) + for i in range(13): + a = i + C[a >> 2] |= ord(k[a]) << 8 * (a % 4) + + for i in range(16): + a = i + 13 + start = (i >> 2) * 8 + r = '03967743b643f66763d623d637e30733' + C[a >> 2] |= transform(''.join(reversed(r[start:start + 8])), 7) + + for i in range(16): + a = i + 29 + start = (i >> 2) * 8 + r = '7038766939776a32776a32706b337139' + C[a >> 2] |= transform(r[start:start + 8], 1) + + for i in range(9): + a = i + 45 + if i < len(tvid): + C[a >> 2] |= ord(tvid[i]) << 8 * (a % 4) + + for a in range(64): + i = a + I = i >> 4 + C_index = [i, 5 * i + 1, 3 * i + 5, 7 * i][I] % 16 + rshift(a, 6) + m = L( + L( + o[0], + [ + trunc(o[1] & o[2]) | trunc(~o[1] & o[3]), + trunc(o[3] & o[1]) | trunc(~o[3] & o[2]), + o[1] ^ o[2] ^ o[3], + o[2] ^ trunc(o[1] | ~o[3]) + ][I] + ), + L( + trunc(int(abs(math.sin(i + 1)) * 4294967296)), + C[C_index] if C_index < len(C) else None + ) + ) + I = I_table[4 * I + i % 4] + o = [ + o[3], + L(o[1], trunc(trunc(m << I) | rshift(m, 32 - I))), + o[1], + o[2], + ] + + new_M = [L(o[0], M[0]), L(o[1], M[1]), L(o[2], M[2]), L(o[3], M[3])] + s = [new_M[a >> 3] >> (1 ^ a & 7) * 4 & 15 for a in range(32)] + return binascii.hexlify(bytes(s))[1::2] + +if __name__ == '__main__': + print(gen_sc("494496100", 1466495259194)) + print(gen_sc("397768800", 1466795077775)) + print(gen_sc("397768800", 1466796325746)) From d8aca8f5427d4e365da817288906cff9dd88bade Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 26 Jun 2016 16:50:28 +0200 Subject: [PATCH 02/19] [bilibili] quick hack for bangumi URLs (fix #1226) --- src/you_get/extractors/bilibili.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index d355eabd..24782598 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -120,6 +120,11 @@ def bilibili_live_download_by_cid(cid, title, output_dir='.', merge=True, info_o def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = get_content(url) + if re.match(r'https?://bangumi\.bilibili\.com/', url): + # quick hack for bangumi URLs + url = r1(r'"([^"]+)" class="v-av-link"', html) + html = get_content(url) + title = r1_of([r'', r']*>([^<>]+)'], html) if title: From d370e9952f46d4e56e2abb6a53c8955c7a2f0d7c Mon Sep 17 00:00:00 2001 From: Chuntao Hong Date: Tue, 28 Jun 2016 16:44:54 +0800 Subject: [PATCH 03/19] enable --socks-proxy --- src/you_get/common.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 100f3869..119640d5 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -1071,12 +1071,13 @@ def script_main(script_name, download, download_playlist, **kwargs): -x | --http-proxy Use an HTTP proxy for downloading. -y | --extractor-proxy Use an HTTP proxy for extracting only. --no-proxy Never use a proxy. + -s | --socks-proxy Use an SOCKS5 proxy for downloading. -t | --timeout Set socket timeout. -d | --debug Show traceback and other debug info. ''' - short_opts = 'Vhfiuc:ndF:O:o:p:x:y:t:' - opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-caption', 'no-merge', 'no-proxy', 'debug', 'json', 'format=', 'stream=', 'itag=', 'output-filename=', 'output-dir=', 'player=', 'http-proxy=', 'extractor-proxy=', 'lang=', 'timeout='] + short_opts = 'Vhfiuc:ndF:O:o:p:x:y:s:t:' + opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-caption', 'no-merge', 'no-proxy', 'debug', 'json', 'format=', 'stream=', 'itag=', 'output-filename=', 'output-dir=', 'player=', 'http-proxy=', 'socks-proxy=', 'extractor-proxy=', 'lang=', 'timeout='] if download_playlist: short_opts = 'l' + short_opts opts = ['playlist'] + opts @@ -1104,6 +1105,7 @@ def script_main(script_name, download, download_playlist, **kwargs): lang = None output_dir = '.' proxy = None + socks_proxy = None extractor_proxy = None traceback = False timeout = 600 @@ -1176,6 +1178,8 @@ def script_main(script_name, download, download_playlist, **kwargs): caption = False elif o in ('-x', '--http-proxy'): proxy = a + elif o in ('-s', '--socks-proxy'): + socks_proxy = a elif o in ('-y', '--extractor-proxy'): extractor_proxy = a elif o in ('--lang',): @@ -1189,7 +1193,21 @@ def script_main(script_name, download, download_playlist, **kwargs): print(help) sys.exit() - set_http_proxy(proxy) + if (socks_proxy): + try: + import socket + import socks + socks_proxy_addrs = socks_proxy.split(':') + socks.set_default_proxy(socks.SOCKS5, + socks_proxy_addrs[0], + int(socks_proxy_addrs[1])) + socket.socket = socks.socksocket + except ImportError: + log.w('Error importing PySocks library, socks proxy ignored.' + 'In order to use use socks proxy, please install PySocks.') + else: + import socket + set_http_proxy(proxy) socket.setdefaulttimeout(timeout) From 8bc81d9a70448e5078af86e07c35b12f3f8b1b6a Mon Sep 17 00:00:00 2001 From: Zhang Ning Date: Wed, 29 Jun 2016 20:18:18 +0800 Subject: [PATCH 04/19] iqiyi: support more stream quality algorism form @ERioK thank you @ERioK Signed-off-by: Zhang Ning --- src/you_get/extractors/iqiyi.py | 45 +++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py index 320520fa..bda2c2e8 100644 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -11,8 +11,6 @@ import hashlib import time -from .iqiyi_sc import gen_sc - ''' Changelog: -> http://www.iqiyi.com/common/flashplayer/20150916/MainPlayer_5_2_28_c3_3_7_4.swf @@ -86,26 +84,31 @@ class Iqiyi(VideoExtractor): name = "爱奇艺 (Iqiyi)" stream_types = [ - {'id': 'high', 'container': 'mp4', 'video_profile': '高清'}, - {'id': 'standard', 'container': 'mp4', 'video_profile': '标清'}, + {'id': 'BD', 'container': 'm3u8', 'video_profile': '全高清'}, + {'id': 'FD', 'container': 'm3u8', 'video_profile': '超高清'}, + {'id': 'TD', 'container': 'm3u8', 'video_profile': '超清'}, + {'id': 'HD', 'container': 'm3u8', 'video_profile': '高清'}, + {'id': 'SD', 'container': 'm3u8', 'video_profile': '标清'}, + {'id': 'LD', 'container': 'm3u8', 'video_profile': '流畅'}, ] - + ''' supported_stream_types = [ 'high', 'standard'] stream_to_bid = { '4k': 10, 'fullhd' : 5, 'suprt-high' : 4, 'super' : 3, 'high' : 2, 'standard' :1, 'topspeed' :96} + ''' + ids = ['BD', 'FD', 'OD', 'TD', 'HD', 'SD', 'LD'] + vd_2_id = {21: 'TD', 2: 'HD', 4: 'FD', 17: 'BD', 96: 'LD', 1: 'SD'} + vd_2_profile = {21: u'超清', 2: u'高清', 4: u'超高清', 17: u'全高清', 96: u'流畅', 1: u'标清'} - def getVMS(self,rate): - #tm ->the flash run time for md5 usage - #um -> vip 1 normal 0 - #authkey -> for password protected video ,replace '' with your password - #puid user.passportid may empty? - #TODO: support password protected video + def getVMS(self): tvid, vid = self.vid t = int(time.time() * 1000) - sc = gen_sc(tvid, t).decode('utf-8') - vmsreq= 'http://cache.m.iqiyi.com/jp/tmts/{}/{}/?platForm=h5&rate={}&tvid={}&vid={}&cupid=qc_100001_100186&type=mp4&olimit=0&agenttype=13&src=d846d0c32d664d32b6b54ea48997a589&sc={}&t={}&__jsT=null'.format(tvid, vid, rate, tvid, vid, sc, t - 7) - return json.loads(get_content(vmsreq)[13:]) + src = '76f90cbd92f94a2e925d83e8ccd22cb7' + key = 'd5fb4bd9d50c4be6948c97edd7254b0e' + sc = hashlib.new('md5', bytes(str(t) + key + vid, 'utf-8')).hexdigest() + vmsreq= url = 'http://cache.m.iqiyi.com/tmts/{0}/{1}/?t={2}&sc={3}&src={4}'.format(tvid,vid,t,sc,src) + return json.loads(get_content(vmsreq)) def download_playlist_by_url(self, url, **kwargs): self.url = url @@ -128,12 +131,16 @@ class Iqiyi(VideoExtractor): r1(r'vid=([^&]+)', self.url) or \ r1(r'data-player-videoid="([^"]+)"', html) self.vid = (tvid, videoid) + self.title = match1(html, '([^<]+)').split('-')[0] + + info = self.getVMS() + assert info['code'] == 'A00000', 'can\'t play this video' + + for stream in info['data']['vidl']: + stream_id = self.vd_2_id[stream['vd']] + stream_profile = self.vd_2_profile[stream['vd']] + self.streams[stream_id] = {'video_profile': stream_profile, 'container': 'm3u8', 'src': [stream['m3u']], 'size' : 0} - for stream in self.supported_stream_types: - info = self.getVMS(self.stream_to_bid[stream]) - if info["code"] == "A00000": - self.title = info['data']['playInfo']['vn'] - self.streams[stream] = {'container': 'mp4', 'video_profile': stream, 'src' : [info['data']['m3u']], 'size' : url_size(info['data']['m3u'])} ''' if info["code"] != "A000000": log.e("[error] outdated iQIYI key") From 5966a090f478b876b31632f2b654f27c382ebdba Mon Sep 17 00:00:00 2001 From: David Zhuang <david.zhuang@mail.utoronto.ca> Date: Wed, 29 Jun 2016 15:56:39 -0400 Subject: [PATCH 05/19] [Common, ffmpeg]Exp: Add a ffmpeg downloader and player for any URL --- src/you_get/common.py | 17 +++++++++ src/you_get/processor/ffmpeg.py | 61 +++++++++++++++++++++++++++++++ src/you_get/processor/rtmpdump.py | 1 + 3 files changed, 79 insertions(+) diff --git a/src/you_get/common.py b/src/you_get/common.py index 119640d5..c0097a4e 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -898,6 +898,23 @@ def download_rtmp_url(url,title, ext,params={}, total_size=0, output_dir='.', re assert has_rtmpdump_installed(), "RTMPDump not installed." download_rtmpdump_stream(url, title, ext,params, output_dir) +def download_url_ffmpeg(url,title, ext,params={}, total_size=0, output_dir='.', refer=None, merge=True, faker=False): + assert url + if dry_run: + print('Real URL:\n%s\n' % [url]) + if params.get("-y",False): #None or unset ->False + print('Real Playpath:\n%s\n' % [params.get("-y")]) + return + + if player: + from .processor.ffmpeg import ffmpeg_play_stream + ffmpeg_play_stream(player, url, params) + return + + from .processor.ffmpeg import has_ffmpeg_installed, ffmpeg_download_streaming + assert has_ffmpeg_installed(), "FFmpeg not installed." + ffmpeg_download_stream(url, title, ext, params, output_dir) + def playlist_not_supported(name): def f(*args, **kwargs): raise NotImplementedError('Playlist is not supported for ' + name) diff --git a/src/you_get/processor/ffmpeg.py b/src/you_get/processor/ffmpeg.py index e7ee35d6..1d5d850c 100644 --- a/src/you_get/processor/ffmpeg.py +++ b/src/you_get/processor/ffmpeg.py @@ -199,3 +199,64 @@ def ffmpeg_concat_mp4_to_mp4(files, output='output.mp4'): for file in files: os.remove(file + '.ts') return True + +def ffmpeg_download_stream(files, title, ext, params={}, output_dir='.'): + """str, str->True + WARNING: NOT THE SAME PARMS AS OTHER FUNCTIONS!!!!!! + You can basicly download anything with this function + but better leave it alone with + """ + output = title + '.' + ext + + if not (output_dir == '.'): + output = output_dir + output + + ffmpeg_params = [] + #should these exist... + if len(params) > 0: + for k, v in params: + ffmpeg_params.append(k) + ffmpeg_params.append(v) + + print('Downloading streaming content with FFmpeg, press Ctrl+C to stop recording...') + ffmpeg_params = [FFMPEG] + LOGLEVEL + ['-y', '-i'] + ffmpeg_params.append(files) #not the same here!!!! + + if FFMPEG == 'avconv': #who cares? + ffmpeg_params += ['-c', 'copy', output] + else: + ffmpeg_params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc', '-bsf:v', 'h264_mp4toannexb', output] + + ffmpeg_params.append(output) + + subprocess.call(ffmpeg_params) + + return True + +# +#To be refactor +#Direct copy of rtmpdump.py +# +def ffmpeg_play_stream(player, url, params={}): + ffmpeg_params = [] + #should these exist... + if len(params) > 0: + for k, v in params: + ffmpeg_params.append(k) + ffmpeg_params.append(v) + + print('Playing streaming content with FFmpeg, press Ctrl+C to stop recording...') + ffmpeg_params = [FFMPEG] + LOGLEVEL + ['-y', '-i'] + ffmpeg_params.append(url) #not the same here!!!! + + if FFMPEG == 'avconv': #who cares? + ffmpeg_params += ['-c', 'copy', '|'] + else: + ffmpeg_params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc', '-bsf:v', 'h264_mp4toannexb', '|'] + + ffmpeg_params += [player, '-'] + + print(' '.join(ffmpeg_params)) + + subprocess.call(ffmpeg_params) + return diff --git a/src/you_get/processor/rtmpdump.py b/src/you_get/processor/rtmpdump.py index aadb6887..cf5f822c 100644 --- a/src/you_get/processor/rtmpdump.py +++ b/src/you_get/processor/rtmpdump.py @@ -43,6 +43,7 @@ def download_rtmpdump_stream(url, title, ext,params={},output_dir='.'): # #To be refactor +#To the future myself: Remember to refactor the same function in ffmpeg.py # def play_rtmpdump_stream(player, url, params={}): cmdline="rtmpdump -r '%s' "%url From 0d06c260c0ca535d7652e50c60f676f0bdceba14 Mon Sep 17 00:00:00 2001 From: David Zhuang <david.zhuang@mail.utoronto.ca> Date: Wed, 29 Jun 2016 21:28:49 -0400 Subject: [PATCH 06/19] [iQiyi]try to fix CI complaint --- src/you_get/extractors/iqiyi.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py index bda2c2e8..4544552d 100644 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -137,9 +137,9 @@ class Iqiyi(VideoExtractor): assert info['code'] == 'A00000', 'can\'t play this video' for stream in info['data']['vidl']: - stream_id = self.vd_2_id[stream['vd']] - stream_profile = self.vd_2_profile[stream['vd']] - self.streams[stream_id] = {'video_profile': stream_profile, 'container': 'm3u8', 'src': [stream['m3u']], 'size' : 0} + stream_id = self.vd_2_id[stream['vd']] + stream_profile = self.vd_2_profile[stream['vd']] + self.streams[stream_id] = {'video_profile': stream_profile, 'container': 'm3u8', 'src': [stream['m3u']], 'size' : 0} ''' if info["code"] != "A000000": From 2d542c2cd3b2b6a56886342f53ceb476a08818eb Mon Sep 17 00:00:00 2001 From: David Zhuang <david.zhuang@mail.utoronto.ca> Date: Wed, 29 Jun 2016 21:54:46 -0400 Subject: [PATCH 07/19] [iQiyi]CI complaint CI complaint go away --- src/you_get/extractors/iqiyi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py index 4544552d..de8756bf 100644 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -99,7 +99,7 @@ class Iqiyi(VideoExtractor): ''' ids = ['BD', 'FD', 'OD', 'TD', 'HD', 'SD', 'LD'] vd_2_id = {21: 'TD', 2: 'HD', 4: 'FD', 17: 'BD', 96: 'LD', 1: 'SD'} - vd_2_profile = {21: u'超清', 2: u'高清', 4: u'超高清', 17: u'全高清', 96: u'流畅', 1: u'标清'} + vd_2_profile = {21: '超清', 2: '高清', 4: '超高清', 17: '全高清', 96: '流畅', 1: '标清'} def getVMS(self): tvid, vid = self.vid From fd93b0380d0c04c4766e520e53a6256437e39067 Mon Sep 17 00:00:00 2001 From: David Zhuang <david.zhuang@mail.utoronto.ca> Date: Wed, 29 Jun 2016 23:15:28 -0400 Subject: [PATCH 08/19] [FFmpeg] Change arguments to record M3U --- src/you_get/processor/ffmpeg.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/you_get/processor/ffmpeg.py b/src/you_get/processor/ffmpeg.py index 1d5d850c..a387be0a 100644 --- a/src/you_get/processor/ffmpeg.py +++ b/src/you_get/processor/ffmpeg.py @@ -219,16 +219,18 @@ def ffmpeg_download_stream(files, title, ext, params={}, output_dir='.'): ffmpeg_params.append(v) print('Downloading streaming content with FFmpeg, press Ctrl+C to stop recording...') - ffmpeg_params = [FFMPEG] + LOGLEVEL + ['-y', '-i'] + ffmpeg_params = [FFMPEG] + ['-y', '-i'] ffmpeg_params.append(files) #not the same here!!!! if FFMPEG == 'avconv': #who cares? ffmpeg_params += ['-c', 'copy', output] else: - ffmpeg_params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc', '-bsf:v', 'h264_mp4toannexb', output] + ffmpeg_params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc'] ffmpeg_params.append(output) + print(' '.join(ffmpeg_params)) + subprocess.call(ffmpeg_params) return True @@ -252,7 +254,7 @@ def ffmpeg_play_stream(player, url, params={}): if FFMPEG == 'avconv': #who cares? ffmpeg_params += ['-c', 'copy', '|'] else: - ffmpeg_params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc', '-bsf:v', 'h264_mp4toannexb', '|'] + ffmpeg_params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc', '|'] ffmpeg_params += [player, '-'] From 18bc44ea4155d979f8e5fb864e2233d9b3f36eda Mon Sep 17 00:00:00 2001 From: David Zhuang <david.zhuang@mail.utoronto.ca> Date: Wed, 29 Jun 2016 23:42:19 -0400 Subject: [PATCH 09/19] [FFmpeg] Fix stream corrupted if Ctrl+C Use q instead. --- src/you_get/processor/ffmpeg.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/src/you_get/processor/ffmpeg.py b/src/you_get/processor/ffmpeg.py index a387be0a..c23b0eaf 100644 --- a/src/you_get/processor/ffmpeg.py +++ b/src/you_get/processor/ffmpeg.py @@ -218,8 +218,8 @@ def ffmpeg_download_stream(files, title, ext, params={}, output_dir='.'): ffmpeg_params.append(k) ffmpeg_params.append(v) - print('Downloading streaming content with FFmpeg, press Ctrl+C to stop recording...') - ffmpeg_params = [FFMPEG] + ['-y', '-i'] + print('Downloading streaming content with FFmpeg, press q to stop recording...') + ffmpeg_params = [FFMPEG] + ['-y', '-re', '-i'] ffmpeg_params.append(files) #not the same here!!!! if FFMPEG == 'avconv': #who cares? @@ -231,7 +231,14 @@ def ffmpeg_download_stream(files, title, ext, params={}, output_dir='.'): print(' '.join(ffmpeg_params)) - subprocess.call(ffmpeg_params) + try: + a = subprocess.Popen(ffmpeg_params, stdin= subprocess.PIPE) + a.communicate() + except KeyboardInterrupt: + try: + a.stdin.write('q'.encode('utf-8')) + except: + pass return True @@ -247,8 +254,8 @@ def ffmpeg_play_stream(player, url, params={}): ffmpeg_params.append(k) ffmpeg_params.append(v) - print('Playing streaming content with FFmpeg, press Ctrl+C to stop recording...') - ffmpeg_params = [FFMPEG] + LOGLEVEL + ['-y', '-i'] + print('Playing streaming content with FFmpeg, press 1 to stop recording...') + ffmpeg_params = [FFMPEG] + LOGLEVEL + ['-y', '-re', '-i'] ffmpeg_params.append(url) #not the same here!!!! if FFMPEG == 'avconv': #who cares? @@ -260,5 +267,13 @@ def ffmpeg_play_stream(player, url, params={}): print(' '.join(ffmpeg_params)) - subprocess.call(ffmpeg_params) - return + try: + a = subprocess.Popen(ffmpeg_params, stdin= subprocess.PIPE) + a.communicate() + except KeyboardInterrupt: + try: + a.stdin.write('q'.encode('utf-8')) + except: + pass + + return True \ No newline at end of file From 62a9e35ed0cc0aebb9ef13edb76394c9efd543e6 Mon Sep 17 00:00:00 2001 From: David Zhuang <david.zhuang@mail.utoronto.ca> Date: Thu, 30 Jun 2016 02:36:07 -0400 Subject: [PATCH 10/19] [CNTV] Fix CCAV domain, Add pattern, fix #1193 ,#909 , replace #1031 --- src/you_get/common.py | 1 + src/you_get/extractors/cntv.py | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 119640d5..d30397c7 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -9,6 +9,7 @@ SITES = { 'bandcamp' : 'bandcamp', 'baomihua' : 'baomihua', 'bilibili' : 'bilibili', + 'cctv' : 'cntv', 'cntv' : 'cntv', 'cbs' : 'cbs', 'dailymotion' : 'dailymotion', diff --git a/src/you_get/extractors/cntv.py b/src/you_get/extractors/cntv.py index cfd96e59..a32808cb 100644 --- a/src/you_get/extractors/cntv.py +++ b/src/you_get/extractors/cntv.py @@ -7,6 +7,7 @@ from ..common import * import json import re + def cntv_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): assert id info = json.loads(get_html('http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid=' + id)) @@ -31,7 +32,12 @@ def cntv_download_by_id(id, title = None, output_dir = '.', merge = True, info_o def cntv_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): if re.match(r'http://tv\.cntv\.cn/video/(\w+)/(\w+)', url): id = match1(url, r'http://tv\.cntv\.cn/video/\w+/(\w+)') - elif re.match(r'http://\w+\.cntv\.cn/(\w+/\w+/(classpage/video/)?)?\d+/\d+\.shtml', url) or re.match(r'http://\w+.cntv.cn/(\w+/)*VIDE\d+.shtml', url): + elif re.match(r'http://\w+.cctv.com/\d+/\d+/\d+/\w+.shtml', url) or re.match(r'http://\w+.cntv.cn/\d+/\d+/\d+/\w+.shtml', url): + html = get_content(url) + id = match1(html, r'guid = \"(.+)\"') + elif re.match(r'http://\w+\.cntv\.cn/(\w+/\w+/(classpage/video/)?)?\d+/\d+\.shtml', url) or \ + re.match(r'http://\w+.cntv.cn/(\w+/)*VIDE\d+.shtml', url) or \ + re.match(r'http://(\w+).cntv.cn/(\w+)/classpage/video/(\d+)/(\d+).shtml', url): id = r1(r'videoCenterId","(\w+)"', get_html(url)) elif re.match(r'http://xiyou.cntv.cn/v-[\w-]+\.html', url): id = r1(r'http://xiyou.cntv.cn/v-([\w-]+)\.html', url) From 48159051de49b16b4d3b721baa076c9416dbfe75 Mon Sep 17 00:00:00 2001 From: David Zhuang <david.zhuang@mail.utoronto.ca> Date: Thu, 30 Jun 2016 02:43:41 -0400 Subject: [PATCH 11/19] [CNTV] reduce code --- src/you_get/extractors/cntv.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/you_get/extractors/cntv.py b/src/you_get/extractors/cntv.py index a32808cb..e25fa961 100644 --- a/src/you_get/extractors/cntv.py +++ b/src/you_get/extractors/cntv.py @@ -32,12 +32,11 @@ def cntv_download_by_id(id, title = None, output_dir = '.', merge = True, info_o def cntv_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): if re.match(r'http://tv\.cntv\.cn/video/(\w+)/(\w+)', url): id = match1(url, r'http://tv\.cntv\.cn/video/\w+/(\w+)') - elif re.match(r'http://\w+.cctv.com/\d+/\d+/\d+/\w+.shtml', url) or re.match(r'http://\w+.cntv.cn/\d+/\d+/\d+/\w+.shtml', url): - html = get_content(url) - id = match1(html, r'guid = \"(.+)\"') elif re.match(r'http://\w+\.cntv\.cn/(\w+/\w+/(classpage/video/)?)?\d+/\d+\.shtml', url) or \ re.match(r'http://\w+.cntv.cn/(\w+/)*VIDE\d+.shtml', url) or \ - re.match(r'http://(\w+).cntv.cn/(\w+)/classpage/video/(\d+)/(\d+).shtml', url): + re.match(r'http://(\w+).cntv.cn/(\w+)/classpage/video/(\d+)/(\d+).shtml', url) or \ + re.match(r'http://\w+.cctv.com/\d+/\d+/\d+/\w+.shtml', url) or \ + re.match(r'http://\w+.cntv.cn/\d+/\d+/\d+/\w+.shtml', url): id = r1(r'videoCenterId","(\w+)"', get_html(url)) elif re.match(r'http://xiyou.cntv.cn/v-[\w-]+\.html', url): id = r1(r'http://xiyou.cntv.cn/v-([\w-]+)\.html', url) From 78ee9fffe32f4738264554b854c99a572b7fa4f4 Mon Sep 17 00:00:00 2001 From: Zhang Ning <zhangn1985@gmail.com> Date: Wed, 29 Jun 2016 20:18:18 +0800 Subject: [PATCH 12/19] iqiyi: support more stream quality algorism form @ERioK thank you @ERioK v2. remove iqiyi_sc.py, and remove Unicode literal syntax v3. indent mistake v4. support vip streams v5. combine stream profile by size v6. support 4k profiles Signed-off-by: Zhang Ning <zhangn1985@gmail.com> --- src/you_get/extractors/iqiyi.py | 81 +++++++++++++++++++------- src/you_get/extractors/iqiyi_sc.py | 92 ------------------------------ 2 files changed, 60 insertions(+), 113 deletions(-) delete mode 100644 src/you_get/extractors/iqiyi_sc.py diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py index 320520fa..9761f3d1 100644 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -8,11 +8,10 @@ import json from math import floor from zlib import decompress import hashlib +from ..util import log import time -from .iqiyi_sc import gen_sc - ''' Changelog: -> http://www.iqiyi.com/common/flashplayer/20150916/MainPlayer_5_2_28_c3_3_7_4.swf @@ -81,31 +80,36 @@ def getDispathKey(rid): t=str(int(floor(int(time)/(10*60.0)))) return hashlib.new("md5",bytes(t+tp+rid,"utf-8")).hexdigest() ''' +def getVMS(tvid, vid): + t = int(time.time() * 1000) + src = '76f90cbd92f94a2e925d83e8ccd22cb7' + key = 'd5fb4bd9d50c4be6948c97edd7254b0e' + sc = hashlib.new('md5', bytes(str(t) + key + vid, 'utf-8')).hexdigest() + vmsreq= url = 'http://cache.m.iqiyi.com/tmts/{0}/{1}/?t={2}&sc={3}&src={4}'.format(tvid,vid,t,sc,src) + return json.loads(get_content(vmsreq)) class Iqiyi(VideoExtractor): name = "爱奇艺 (Iqiyi)" stream_types = [ - {'id': 'high', 'container': 'mp4', 'video_profile': '高清'}, - {'id': 'standard', 'container': 'mp4', 'video_profile': '标清'}, + {'id': '4k', 'container': 'm3u8', 'video_profile': '4k'}, + {'id': 'BD', 'container': 'm3u8', 'video_profile': '1080p'}, + {'id': 'TD', 'container': 'm3u8', 'video_profile': '720p'}, + {'id': 'HD', 'container': 'm3u8', 'video_profile': '540p'}, + {'id': 'SD', 'container': 'm3u8', 'video_profile': '360p'}, + {'id': 'LD', 'container': 'm3u8', 'video_profile': '210p'}, ] - + ''' supported_stream_types = [ 'high', 'standard'] stream_to_bid = { '4k': 10, 'fullhd' : 5, 'suprt-high' : 4, 'super' : 3, 'high' : 2, 'standard' :1, 'topspeed' :96} + ''' + ids = ['4k','BD', 'TD', 'HD', 'SD', 'LD'] + vd_2_id = {10: '4k', 19: '4k', 5:'BD', 18: 'BD', 21: 'HD', 2: 'HD', 4: 'TD', 17: 'TD', 96: 'LD', 1: 'SD'} + id_2_profile = {'4k':'4k', 'BD': '1080p','TD': '720p', 'HD': '540p', 'SD': '360p', 'LD': '210p'} + - def getVMS(self,rate): - #tm ->the flash run time for md5 usage - #um -> vip 1 normal 0 - #authkey -> for password protected video ,replace '' with your password - #puid user.passportid may empty? - #TODO: support password protected video - tvid, vid = self.vid - t = int(time.time() * 1000) - sc = gen_sc(tvid, t).decode('utf-8') - vmsreq= 'http://cache.m.iqiyi.com/jp/tmts/{}/{}/?platForm=h5&rate={}&tvid={}&vid={}&cupid=qc_100001_100186&type=mp4&olimit=0&agenttype=13&src=d846d0c32d664d32b6b54ea48997a589&sc={}&t={}&__jsT=null'.format(tvid, vid, rate, tvid, vid, sc, t - 7) - return json.loads(get_content(vmsreq)[13:]) def download_playlist_by_url(self, url, **kwargs): self.url = url @@ -128,12 +132,47 @@ class Iqiyi(VideoExtractor): r1(r'vid=([^&]+)', self.url) or \ r1(r'data-player-videoid="([^"]+)"', html) self.vid = (tvid, videoid) + self.title = match1(html, '<title>([^<]+)').split('-')[0] + tvid, videoid = self.vid + info = getVMS(tvid, videoid) + assert info['code'] == 'A00000', 'can\'t play this video' - for stream in self.supported_stream_types: - info = self.getVMS(self.stream_to_bid[stream]) - if info["code"] == "A00000": - self.title = info['data']['playInfo']['vn'] - self.streams[stream] = {'container': 'mp4', 'video_profile': stream, 'src' : [info['data']['m3u']], 'size' : url_size(info['data']['m3u'])} + for stream in info['data']['vidl']: + try: + stream_id = self.vd_2_id[stream['vd']] + if stream_id in self.stream_types: + continue + stream_profile = self.id_2_profile[stream_id] + self.streams[stream_id] = {'video_profile': stream_profile, 'container': 'm3u8', 'src': [stream['m3u']], 'size' : 0} + except: + log.i("vd: {} is not handled".format(stream['vd'])) + log.i("info is {}".format(stream)) + # why I need do below??? + if not 'BD' in self.stream_types: + p1080_vids = [] + if 18 in info['data']['ctl']['vip']['bids']: + p1080_vids.append(info['data']['ctl']['configs']['18']['vid']) + if 5 in info['data']['ctl']['vip']['bids']: + p1080_vids.append(info['data']['ctl']['configs']['5']['vid']) + for v in p1080_vids: + p1080_info = getVMS(tvid, v) + if info['code'] == 'A00000': + p1080_url = p1080_info['data']['m3u'] + self.streams['BD'] = {'video_profile': '1080p', 'container': 'm3u8', 'src': [p1080_url], 'size' : 0} + break + + if not '4k' in self.stream_types: + k4_vids = [] + if 19 in info['data']['ctl']['vip']['bids']: + k4_vids.append(info['data']['ctl']['configs']['19']['vid']) + if 10 in info['data']['ctl']['vip']['bids']: + k4_vids.append(info['data']['ctl']['configs']['10']['vid']) + for v in k4_vids: + k4_info = getVMS(tvid, v) + if info['code'] == 'A00000': + k4_url = k4_info['data']['m3u'] + self.streams['4k'] = {'video_profile': '4k', 'container': 'm3u8', 'src': [k4_url], 'size' : 0} + break ''' if info["code"] != "A000000": log.e("[error] outdated iQIYI key") diff --git a/src/you_get/extractors/iqiyi_sc.py b/src/you_get/extractors/iqiyi_sc.py deleted file mode 100644 index 4fa4ccdf..00000000 --- a/src/you_get/extractors/iqiyi_sc.py +++ /dev/null @@ -1,92 +0,0 @@ -#!/usr/bin/env python -import binascii -import math -import time - -M = [1732584193, -271733879] -M.extend([~M[0], ~M[1]]) -I_table = [7, 12, 17, 22, 5, 9, 14, 20, 4, 11, 16, 23, 6, 10, 15, 21] -C_base = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8388608, 432] - - -def L(n, t): - if t is None: - t = 0 - return trunc(((n >> 1) + (t >> 1) << 1) + (n & 1) + (t & 1)) - - -def rshift(val, n): - return val >> n if val >= 0 else (val+0x100000000) >> n - - -def trunc(n): - n = n % 0x100000000 - if n > 0x7fffffff: - n -= 0x100000000 - return n - - -def gen_sc(tvid, Z): - def transform(string, mod): - num = int(string, 16) - return (num >> 8 * (i % 4) & 255 ^ i % mod) << ((a & 3) << 3) - - C = list(C_base) - o = list(M) - k = str(Z - 7) - for i in range(13): - a = i - C[a >> 2] |= ord(k[a]) << 8 * (a % 4) - - for i in range(16): - a = i + 13 - start = (i >> 2) * 8 - r = '03967743b643f66763d623d637e30733' - C[a >> 2] |= transform(''.join(reversed(r[start:start + 8])), 7) - - for i in range(16): - a = i + 29 - start = (i >> 2) * 8 - r = '7038766939776a32776a32706b337139' - C[a >> 2] |= transform(r[start:start + 8], 1) - - for i in range(9): - a = i + 45 - if i < len(tvid): - C[a >> 2] |= ord(tvid[i]) << 8 * (a % 4) - - for a in range(64): - i = a - I = i >> 4 - C_index = [i, 5 * i + 1, 3 * i + 5, 7 * i][I] % 16 + rshift(a, 6) - m = L( - L( - o[0], - [ - trunc(o[1] & o[2]) | trunc(~o[1] & o[3]), - trunc(o[3] & o[1]) | trunc(~o[3] & o[2]), - o[1] ^ o[2] ^ o[3], - o[2] ^ trunc(o[1] | ~o[3]) - ][I] - ), - L( - trunc(int(abs(math.sin(i + 1)) * 4294967296)), - C[C_index] if C_index < len(C) else None - ) - ) - I = I_table[4 * I + i % 4] - o = [ - o[3], - L(o[1], trunc(trunc(m << I) | rshift(m, 32 - I))), - o[1], - o[2], - ] - - new_M = [L(o[0], M[0]), L(o[1], M[1]), L(o[2], M[2]), L(o[3], M[3])] - s = [new_M[a >> 3] >> (1 ^ a & 7) * 4 & 15 for a in range(32)] - return binascii.hexlify(bytes(s))[1::2] - -if __name__ == '__main__': - print(gen_sc("494496100", 1466495259194)) - print(gen_sc("397768800", 1466795077775)) - print(gen_sc("397768800", 1466796325746)) From 7452a4bb0e7aac32db89d871e7636741fddf641a Mon Sep 17 00:00:00 2001 From: David Zhuang <david.zhuang@mail.utoronto.ca> Date: Fri, 1 Jul 2016 00:38:26 -0400 Subject: [PATCH 13/19] [iQiyi] fix for PR conflict --- src/you_get/extractors/iqiyi.py | 78 +++++++++++++++++++++++---------- 1 file changed, 55 insertions(+), 23 deletions(-) mode change 100644 => 100755 src/you_get/extractors/iqiyi.py diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py old mode 100644 new mode 100755 index de8756bf..a1552fe4 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -8,6 +8,7 @@ import json from math import floor from zlib import decompress import hashlib +from ..util import log import time @@ -79,17 +80,24 @@ def getDispathKey(rid): t=str(int(floor(int(time)/(10*60.0)))) return hashlib.new("md5",bytes(t+tp+rid,"utf-8")).hexdigest() ''' +def getVMS(tvid, vid): + t = int(time.time() * 1000) + src = '76f90cbd92f94a2e925d83e8ccd22cb7' + key = 'd5fb4bd9d50c4be6948c97edd7254b0e' + sc = hashlib.new('md5', bytes(str(t) + key + vid, 'utf-8')).hexdigest() + vmsreq= url = 'http://cache.m.iqiyi.com/tmts/{0}/{1}/?t={2}&sc={3}&src={4}'.format(tvid,vid,t,sc,src) + return json.loads(get_content(vmsreq)) class Iqiyi(VideoExtractor): name = "爱奇艺 (Iqiyi)" stream_types = [ - {'id': 'BD', 'container': 'm3u8', 'video_profile': '全高清'}, - {'id': 'FD', 'container': 'm3u8', 'video_profile': '超高清'}, - {'id': 'TD', 'container': 'm3u8', 'video_profile': '超清'}, - {'id': 'HD', 'container': 'm3u8', 'video_profile': '高清'}, - {'id': 'SD', 'container': 'm3u8', 'video_profile': '标清'}, - {'id': 'LD', 'container': 'm3u8', 'video_profile': '流畅'}, + {'id': '4k', 'container': 'm3u8', 'video_profile': '4k'}, + {'id': 'BD', 'container': 'm3u8', 'video_profile': '1080p'}, + {'id': 'TD', 'container': 'm3u8', 'video_profile': '720p'}, + {'id': 'HD', 'container': 'm3u8', 'video_profile': '540p'}, + {'id': 'SD', 'container': 'm3u8', 'video_profile': '360p'}, + {'id': 'LD', 'container': 'm3u8', 'video_profile': '210p'}, ] ''' supported_stream_types = [ 'high', 'standard'] @@ -97,18 +105,11 @@ class Iqiyi(VideoExtractor): stream_to_bid = { '4k': 10, 'fullhd' : 5, 'suprt-high' : 4, 'super' : 3, 'high' : 2, 'standard' :1, 'topspeed' :96} ''' - ids = ['BD', 'FD', 'OD', 'TD', 'HD', 'SD', 'LD'] - vd_2_id = {21: 'TD', 2: 'HD', 4: 'FD', 17: 'BD', 96: 'LD', 1: 'SD'} - vd_2_profile = {21: '超清', 2: '高清', 4: '超高清', 17: '全高清', 96: '流畅', 1: '标清'} + ids = ['4k','BD', 'TD', 'HD', 'SD', 'LD'] + vd_2_id = {10: '4k', 19: '4k', 5:'BD', 18: 'BD', 21: 'HD', 2: 'HD', 4: 'TD', 17: 'TD', 96: 'LD', 1: 'SD'} + id_2_profile = {'4k':'4k', 'BD': '1080p','TD': '720p', 'HD': '540p', 'SD': '360p', 'LD': '210p'} + - def getVMS(self): - tvid, vid = self.vid - t = int(time.time() * 1000) - src = '76f90cbd92f94a2e925d83e8ccd22cb7' - key = 'd5fb4bd9d50c4be6948c97edd7254b0e' - sc = hashlib.new('md5', bytes(str(t) + key + vid, 'utf-8')).hexdigest() - vmsreq= url = 'http://cache.m.iqiyi.com/tmts/{0}/{1}/?t={2}&sc={3}&src={4}'.format(tvid,vid,t,sc,src) - return json.loads(get_content(vmsreq)) def download_playlist_by_url(self, url, **kwargs): self.url = url @@ -132,15 +133,46 @@ class Iqiyi(VideoExtractor): r1(r'data-player-videoid="([^"]+)"', html) self.vid = (tvid, videoid) self.title = match1(html, '<title>([^<]+)').split('-')[0] - - info = self.getVMS() + tvid, videoid = self.vid + info = getVMS(tvid, videoid) assert info['code'] == 'A00000', 'can\'t play this video' for stream in info['data']['vidl']: - stream_id = self.vd_2_id[stream['vd']] - stream_profile = self.vd_2_profile[stream['vd']] - self.streams[stream_id] = {'video_profile': stream_profile, 'container': 'm3u8', 'src': [stream['m3u']], 'size' : 0} + try: + stream_id = self.vd_2_id[stream['vd']] + if stream_id in self.stream_types: + continue + stream_profile = self.id_2_profile[stream_id] + self.streams[stream_id] = {'video_profile': stream_profile, 'container': 'm3u8', 'src': [stream['m3u']], 'size' : 0} + except: + log.i("vd: {} is not handled".format(stream['vd'])) + log.i("info is {}".format(stream)) + # why I need do below??? + if not 'BD' in self.stream_types: + p1080_vids = [] + if 18 in info['data']['ctl']['vip']['bids']: + p1080_vids.append(info['data']['ctl']['configs']['18']['vid']) + if 5 in info['data']['ctl']['vip']['bids']: + p1080_vids.append(info['data']['ctl']['configs']['5']['vid']) + for v in p1080_vids: + p1080_info = getVMS(tvid, v) + if info['code'] == 'A00000': + p1080_url = p1080_info['data']['m3u'] + self.streams['BD'] = {'video_profile': '1080p', 'container': 'm3u8', 'src': [p1080_url], 'size' : 0} + break + if not '4k' in self.stream_types: + k4_vids = [] + if 19 in info['data']['ctl']['vip']['bids']: + k4_vids.append(info['data']['ctl']['configs']['19']['vid']) + if 10 in info['data']['ctl']['vip']['bids']: + k4_vids.append(info['data']['ctl']['configs']['10']['vid']) + for v in k4_vids: + k4_info = getVMS(tvid, v) + if info['code'] == 'A00000': + k4_url = k4_info['data']['m3u'] + self.streams['4k'] = {'video_profile': '4k', 'container': 'm3u8', 'src': [k4_url], 'size' : 0} + break ''' if info["code"] != "A000000": log.e("[error] outdated iQIYI key") @@ -214,4 +246,4 @@ class Iqiyi(VideoExtractor): site = Iqiyi() download = site.download_by_url iqiyi_download_by_vid = site.download_by_vid -download_playlist = site.download_playlist_by_url +download_playlist = site.download_playlist_by_url \ No newline at end of file From 5a99b2f42f1157c6b51be768d69fb771b8761405 Mon Sep 17 00:00:00 2001 From: David Zhuang <david.zhuang@mail.utoronto.ca> Date: Fri, 1 Jul 2016 01:07:32 -0400 Subject: [PATCH 14/19] [Showroom] Add support via HLS M3U --- src/you_get/common.py | 3 +- src/you_get/extractors/__init__.py | 1 + src/you_get/extractors/showroom.py | 67 ++++++++++++++++++++++++++++++ 3 files changed, 70 insertions(+), 1 deletion(-) create mode 100755 src/you_get/extractors/showroom.py diff --git a/src/you_get/common.py b/src/you_get/common.py index 99ce14c7..c8843ddf 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -62,6 +62,7 @@ SITES = { 'pptv' : 'pptv', 'qianmo' : 'qianmo', 'qq' : 'qq', + 'showroom-live' : 'showroom', 'sina' : 'sina', 'smgbb' : 'bilibili', 'sohu' : 'sohu', @@ -912,7 +913,7 @@ def download_url_ffmpeg(url,title, ext,params={}, total_size=0, output_dir='.', ffmpeg_play_stream(player, url, params) return - from .processor.ffmpeg import has_ffmpeg_installed, ffmpeg_download_streaming + from .processor.ffmpeg import has_ffmpeg_installed, ffmpeg_download_stream assert has_ffmpeg_installed(), "FFmpeg not installed." ffmpeg_download_stream(url, title, ext, params, output_dir) diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index 20a7f7cf..b879b83e 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -55,6 +55,7 @@ from .pptv import * from .qianmo import * from .qie import * from .qq import * +from .showroom import * from .sina import * from .sohu import * from .soundcloud import * diff --git a/src/you_get/extractors/showroom.py b/src/you_get/extractors/showroom.py new file mode 100755 index 00000000..43d7b192 --- /dev/null +++ b/src/you_get/extractors/showroom.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python + +__all__ = ['showroom_download'] + +from ..common import * +import urllib.error +from json import loads +from time import time + +#---------------------------------------------------------------------- +def showroom_get_roomid_by_room_url_key(room_url_key): + """str->str""" + fake_headers_mobile = { + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'Accept-Charset': 'UTF-8,*;q=0.5', + 'Accept-Encoding': 'gzip,deflate,sdch', + 'Accept-Language': 'en-US,en;q=0.8', + 'User-Agent': 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36' + } + webpage_url = 'https://www.showroom-live.com/' + room_url_key + html = get_content(webpage_url, headers = fake_headers_mobile) + roomid = match1(html, r'room\?room_id\=(\d+)') + assert roomid + return roomid + +def showroom_download_by_room_id(room_id, output_dir = '.', merge = False, info_only = False, **kwargs): + '''Source: Android mobile''' + timestamp = str(int(time() * 1000)) + api_endpoint = 'https://www.showroom-live.com/api/live/streaming_url?room_id={room_id}&_={timestamp}'.format(room_id = room_id, timestamp = timestamp) + html = get_content(api_endpoint) + html = json.loads(html) + #{'streaming_url_list': [{'url': 'rtmp://52.197.69.198:1935/liveedge', 'id': 1, 'label': 'original spec(low latency)', 'is_default': True, 'type': 'rtmp', 'stream_name': '7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed'}, {'url': 'http://52.197.69.198:1935/liveedge/7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed/playlist.m3u8', 'is_default': True, 'id': 2, 'type': 'hls', 'label': 'original spec'}, {'url': 'rtmp://52.197.69.198:1935/liveedge', 'id': 3, 'label': 'low spec(low latency)', 'is_default': False, 'type': 'rtmp', 'stream_name': '7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed_low'}, {'url': 'http://52.197.69.198:1935/liveedge/7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed_low/playlist.m3u8', 'is_default': False, 'id': 4, 'type': 'hls', 'label': 'low spec'}]} + if len(html) < 1: + log.wtf('Cannot find any live URL! Maybe the live have ended or haven\'t start yet?') + + #This is mainly for testing the M3U FFmpeg parser so I would ignore any non-m3u ones + stream_url = [i['url'] for i in html['streaming_url_list'] if i['is_default'] and i['type'] == 'hls'][0] + + assert stream_url + + #title + title = '' + profile_api = 'https://www.showroom-live.com/api/room/profile?room_id={room_id}'.format(room_id = room_id) + html = loads(get_content(profile_api)) + try: + title = html['main_name'] + except KeyError: + title = 'Showroom_{room_id}'.format(room_id = room_id) + + type_, ext, size = url_info(stream_url) + print_info(site_info, title, type_, size) + if not info_only: + download_url_ffmpeg(url=stream_url, title=title, ext= 'mp4', output_dir=output_dir) + + +#---------------------------------------------------------------------- +def showroom_download(url, output_dir = '.', merge = False, info_only = False, **kwargs): + """""" + if re.match( r'(\w+)://www.showroom-live.com/(\w*)', url): + room_url_key = match1(url, r'\w+://www.showroom-live.com/(\w*)') + room_id = showroom_get_roomid_by_room_url_key(room_url_key) + showroom_download_by_room_id(room_id, output_dir, merge, + info_only) + +site_info = "Showroom" +download = showroom_download +download_playlist = playlist_not_supported('showroom') \ No newline at end of file From bbe1376a8670060f51803ed11d0027a3ab9792da Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Fri, 1 Jul 2016 15:35:02 +0200 Subject: [PATCH 15/19] [showroom] support room URLs with dashes (-) --- src/you_get/extractors/showroom.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/showroom.py b/src/you_get/extractors/showroom.py index 43d7b192..d0f636bc 100755 --- a/src/you_get/extractors/showroom.py +++ b/src/you_get/extractors/showroom.py @@ -56,8 +56,8 @@ def showroom_download_by_room_id(room_id, output_dir = '.', merge = False, info_ #---------------------------------------------------------------------- def showroom_download(url, output_dir = '.', merge = False, info_only = False, **kwargs): """""" - if re.match( r'(\w+)://www.showroom-live.com/(\w*)', url): - room_url_key = match1(url, r'\w+://www.showroom-live.com/(\w*)') + if re.match( r'(\w+)://www.showroom-live.com/([-\w]+)', url): + room_url_key = match1(url, r'\w+://www.showroom-live.com/([-\w]+)') room_id = showroom_get_roomid_by_room_url_key(room_url_key) showroom_download_by_room_id(room_id, output_dir, merge, info_only) From 7516028dd860d642bb1620fef0d78ed6a06153d8 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Fri, 1 Jul 2016 15:58:57 +0200 Subject: [PATCH 16/19] =?UTF-8?q?extractors:=20revert=20permissions=20(100?= =?UTF-8?q?755=20=E2=86=92=20100644)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/you_get/extractors/__init__.py | 0 src/you_get/extractors/acfun.py | 0 src/you_get/extractors/baidu.py | 0 src/you_get/extractors/baomihua.py | 0 src/you_get/extractors/dilidili.py | 0 src/you_get/extractors/funshion.py | 0 src/you_get/extractors/iqiyi.py | 0 src/you_get/extractors/khan.py | 0 src/you_get/extractors/miomio.py | 0 src/you_get/extractors/showroom.py | 0 src/you_get/extractors/yixia.py | 0 11 files changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 src/you_get/extractors/__init__.py mode change 100755 => 100644 src/you_get/extractors/acfun.py mode change 100755 => 100644 src/you_get/extractors/baidu.py mode change 100755 => 100644 src/you_get/extractors/baomihua.py mode change 100755 => 100644 src/you_get/extractors/dilidili.py mode change 100755 => 100644 src/you_get/extractors/funshion.py mode change 100755 => 100644 src/you_get/extractors/iqiyi.py mode change 100755 => 100644 src/you_get/extractors/khan.py mode change 100755 => 100644 src/you_get/extractors/miomio.py mode change 100755 => 100644 src/you_get/extractors/showroom.py mode change 100755 => 100644 src/you_get/extractors/yixia.py diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py old mode 100755 new mode 100644 diff --git a/src/you_get/extractors/acfun.py b/src/you_get/extractors/acfun.py old mode 100755 new mode 100644 diff --git a/src/you_get/extractors/baidu.py b/src/you_get/extractors/baidu.py old mode 100755 new mode 100644 diff --git a/src/you_get/extractors/baomihua.py b/src/you_get/extractors/baomihua.py old mode 100755 new mode 100644 diff --git a/src/you_get/extractors/dilidili.py b/src/you_get/extractors/dilidili.py old mode 100755 new mode 100644 diff --git a/src/you_get/extractors/funshion.py b/src/you_get/extractors/funshion.py old mode 100755 new mode 100644 diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py old mode 100755 new mode 100644 diff --git a/src/you_get/extractors/khan.py b/src/you_get/extractors/khan.py old mode 100755 new mode 100644 diff --git a/src/you_get/extractors/miomio.py b/src/you_get/extractors/miomio.py old mode 100755 new mode 100644 diff --git a/src/you_get/extractors/showroom.py b/src/you_get/extractors/showroom.py old mode 100755 new mode 100644 diff --git a/src/you_get/extractors/yixia.py b/src/you_get/extractors/yixia.py old mode 100755 new mode 100644 From e84810c4d2c1242bbe14265919f8627b9c1e7e54 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Fri, 1 Jul 2016 16:42:04 +0200 Subject: [PATCH 17/19] [facebook] use hd_src whenever possible and remove duplicates --- src/you_get/extractors/facebook.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/you_get/extractors/facebook.py b/src/you_get/extractors/facebook.py index 72ce8f31..2a96fcb0 100644 --- a/src/you_get/extractors/facebook.py +++ b/src/you_get/extractors/facebook.py @@ -9,17 +9,22 @@ def facebook_download(url, output_dir='.', merge=True, info_only=False, **kwargs html = get_html(url) title = r1(r'<title id="pageTitle">(.+)', html) - sd_urls = [ + sd_urls = list(set([ unicodize(str.replace(i, '\\/', '/')) for i in re.findall(r'"sd_src_no_ratelimit":"([^"]*)"', html) - ] + ])) + hd_urls = list(set([ + unicodize(str.replace(i, '\\/', '/')) + for i in re.findall(r'"hd_src_no_ratelimit":"([^"]*)"', html) + ])) + urls = hd_urls if hd_urls else sd_urls - type, ext, size = url_info(sd_urls[0], True) - size = urls_size(sd_urls) + type, ext, size = url_info(urls[0], True) + size = urls_size(urls) print_info(site_info, title, type, size) if not info_only: - download_urls(sd_urls, title, ext, size, output_dir, merge=False) + download_urls(urls, title, ext, size, output_dir, merge=False) site_info = "Facebook.com" download = facebook_download From 17386968c7315347ef7384f1a895afbf44d0b6fe Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 1 Jul 2016 20:28:03 +0200 Subject: [PATCH 18/19] [tumblr] fix #1232 --- src/you_get/extractors/tumblr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/tumblr.py b/src/you_get/extractors/tumblr.py index fea061ce..0c59f25a 100644 --- a/src/you_get/extractors/tumblr.py +++ b/src/you_get/extractors/tumblr.py @@ -68,7 +68,7 @@ def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs): real_url = r1(r']+tumblr_video_container[^>]+>]+src=[\'"]([^\'"]*)[\'"]', html) - if len(iframe_url) > 0: + if iframe_url: iframe_html = get_content(iframe_url, headers=fake_headers) real_url = r1(r']*>[\n ]*]+src=[\'"]([^\'"]*)[\'"]', iframe_html) else: From a4ed7859661e8bc9d7c9eab1a0c04eaf8fc7836d Mon Sep 17 00:00:00 2001 From: Zhang Ning Date: Sat, 2 Jul 2016 06:54:35 +0800 Subject: [PATCH 19/19] iqiyi: fix when no vip info Signed-off-by: Zhang Ning --- src/you_get/extractors/iqiyi.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py index a1552fe4..dc9b05fd 100644 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -148,12 +148,17 @@ class Iqiyi(VideoExtractor): log.i("vd: {} is not handled".format(stream['vd'])) log.i("info is {}".format(stream)) # why I need do below??? - if not 'BD' in self.stream_types: + try: + vip_vds = info['data']['ctl']['vip']['bids'] + vip_conf = info['data']['ctl']['configs'] + except: + return + if not 'BD' in self.streams.keys(): p1080_vids = [] - if 18 in info['data']['ctl']['vip']['bids']: - p1080_vids.append(info['data']['ctl']['configs']['18']['vid']) - if 5 in info['data']['ctl']['vip']['bids']: - p1080_vids.append(info['data']['ctl']['configs']['5']['vid']) + if 18 in vip_vds: + p1080_vids.append(vip_conf['18']['vid']) + if 5 in vip_vds: + p1080_vids.append(vip_conf['5']['vid']) for v in p1080_vids: p1080_info = getVMS(tvid, v) if info['code'] == 'A00000': @@ -161,12 +166,12 @@ class Iqiyi(VideoExtractor): self.streams['BD'] = {'video_profile': '1080p', 'container': 'm3u8', 'src': [p1080_url], 'size' : 0} break - if not '4k' in self.stream_types: + if not '4k' in self.streams.keys(): k4_vids = [] - if 19 in info['data']['ctl']['vip']['bids']: - k4_vids.append(info['data']['ctl']['configs']['19']['vid']) - if 10 in info['data']['ctl']['vip']['bids']: - k4_vids.append(info['data']['ctl']['configs']['10']['vid']) + if 19 in vip_vds: + k4_vids.append(vip_conf['19']['vid']) + if 10 in vip_vds: + k4_vids.append(vip_conf['10']['vid']) for v in k4_vids: k4_info = getVMS(tvid, v) if info['code'] == 'A00000': @@ -246,4 +251,4 @@ class Iqiyi(VideoExtractor): site = Iqiyi() download = site.download_by_url iqiyi_download_by_vid = site.download_by_vid -download_playlist = site.download_playlist_by_url \ No newline at end of file +download_playlist = site.download_playlist_by_url