From ef16e74fe72e7779a0b10af56f435cb2558d90c7 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 20 Dec 2014 03:08:40 +0100 Subject: [PATCH 01/57] Tudou: fix #460 --- src/you_get/extractors/tudou.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/tudou.py b/src/you_get/extractors/tudou.py index a9f78a6d..e7bc7e9f 100644 --- a/src/you_get/extractors/tudou.py +++ b/src/you_get/extractors/tudou.py @@ -29,7 +29,7 @@ def tudou_download_by_id(id, title, output_dir = '.', merge = True, info_only = title = r1(r'kw\s*[:=]\s*[\'\"]([^\']+?)[\'\"]', html) tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge, info_only = info_only) -def tudou_download(url, output_dir = '.', merge = True, info_only = False): +def tudou_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): # Embedded player id = r1(r'http://www.tudou.com/v/([^/]+)/', url) if id: @@ -44,7 +44,10 @@ def tudou_download(url, output_dir = '.', merge = True, info_only = False): vcode = r1(r'vcode\s*[:=]\s*\'([^\']+)\'', html) if vcode: from .youku import youku_download_by_vid - return youku_download_by_vid(vcode, title=title, output_dir = output_dir, merge = merge, info_only = info_only) + if 'stream_id' in kwargs: + return youku_download_by_vid(vcode, title=title, output_dir=output_dir, merge=merge, info_only=info_only, stream_id=kwargs['stream_id']) + else: + return youku_download_by_vid(vcode, title=title, output_dir=output_dir, merge=merge, info_only=info_only) iid = r1(r'iid\s*[:=]\s*(\d+)', html) if not iid: From 205cc3138fe54c09f552fbdc4bf719f799ba64bd Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 4 Jan 2015 09:28:23 +0100 Subject: [PATCH 02/57] Disable special conversion for non-UTF encoding. - Windows users should take care of their code page themselves --- src/you_get/common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 1a9fd524..3e2a2cfe 100644 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -36,7 +36,8 @@ def tr(s): if default_encoding == 'utf-8': return s else: - return str(s.encode('utf-8'))[2:-1] + return s + #return str(s.encode('utf-8'))[2:-1] # DEPRECATED in favor of match1() def r1(pattern, text): From 1b55b01b047824312c2eba342eed47d1d0503a97 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 5 Jan 2015 01:02:41 +0100 Subject: [PATCH 03/57] Tudou: improve regex --- src/you_get/extractors/tudou.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/tudou.py b/src/you_get/extractors/tudou.py index e7bc7e9f..f2cf3c82 100644 --- a/src/you_get/extractors/tudou.py +++ b/src/you_get/extractors/tudou.py @@ -26,7 +26,7 @@ def tudou_download_by_id(id, title, output_dir = '.', merge = True, info_only = html = get_html('http://www.tudou.com/programs/view/%s/' % id) iid = r1(r'iid\s*[:=]\s*(\S+)', html) - title = r1(r'kw\s*[:=]\s*[\'\"]([^\']+?)[\'\"]', html) + title = r1(r'kw\s*[:=]\s*[\'\"]([^\n]+?)\'\s*\n', html).replace("\\'", "\'") tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge, info_only = info_only) def tudou_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): @@ -37,7 +37,7 @@ def tudou_download(url, output_dir = '.', merge = True, info_only = False, **kwa html = get_decoded_html(url) - title = r1(r'kw\s*[:=]\s*[\'\"]([^\']+?)[\'\"]', html) + title = r1(r'kw\s*[:=]\s*[\'\"]([^\n]+?)\'\s*\n', html).replace("\\'", "\'") assert title title = unescape_html(title) From d8c0c0594efc61aea7c15c935669275d27c333a6 Mon Sep 17 00:00:00 2001 From: pastebt Date: Mon, 5 Jan 2015 16:48:04 -0800 Subject: [PATCH 04/57] Support skipping udta box, and ignore samples > 1 modified: src/you_get/processor/join_mp4.py --- src/you_get/processor/join_mp4.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/src/you_get/processor/join_mp4.py b/src/you_get/processor/join_mp4.py index 24ba77f6..a47bfb6a 100755 --- a/src/you_get/processor/join_mp4.py +++ b/src/you_get/processor/join_mp4.py @@ -127,6 +127,16 @@ def read_raw(stream, size, left, type): body = stream.read(left) return Atom(type, size, body) +def read_udta(stream, size, left, type): + assert size == left + 8 + body = stream.read(left) + class Udta(Atom): + def write(self, stream): + return + def calsize(self): + return 0 + return Udta(type, size, body) + def read_body_stream(stream, left): body = stream.read(left) assert len(body) == left @@ -240,8 +250,9 @@ def read_hdlr(stream, size, left, type): qt_component_flags_mask = read_uint(stream) left -= 20 - track_name = stream.read(left - 1) - assert stream.read(1) == b'\x00' + track_name = stream.read(left) + #track_name = stream.read(left - 1) + #assert stream.read(1) == b'\x00' return Atom(b'hdlr', size, body) @@ -324,7 +335,7 @@ def read_stts(stream, size, left, type): left -= 4 entry_count = read_uint(stream) - assert entry_count == 1 + #assert entry_count == 1 left -= 4 samples = [] @@ -625,6 +636,7 @@ atom_readers = { b'pasp': read_raw, b'mdat': read_mdat, + b'udta': read_udta, } #stsd sample descriptions (codec types, initialization etc.) #stts (decoding) time-to-sample @@ -679,6 +691,7 @@ def parse_atoms(stream): return atoms def read_mp4(stream): + print(stream.name) atoms = parse_atoms(stream) moov = list(filter(lambda x: x.type == b'moov', atoms)) mdat = list(filter(lambda x: x.type == b'mdat', atoms)) @@ -695,7 +708,7 @@ def read_mp4(stream): def merge_stts(samples_list): sample_list = [] for samples in samples_list: - assert len(samples) == 1 + #assert len(samples) == 1 sample_list.append(samples[0]) counts, durations = zip(*sample_list) assert len(set(durations)) == 1, 'not all durations equal' From 7b0dc1d3302b27e04a22c60d7d9e046be32ad081 Mon Sep 17 00:00:00 2001 From: pastebt Date: Wed, 7 Jan 2015 17:00:49 -0800 Subject: [PATCH 05/57] Support multi stts and mdhd version = 1 modified: src/you_get/processor/join_mp4.py --- src/you_get/processor/join_mp4.py | 83 ++++++++++++++++++++----------- 1 file changed, 54 insertions(+), 29 deletions(-) diff --git a/src/you_get/processor/join_mp4.py b/src/you_get/processor/join_mp4.py index a47bfb6a..8eca239c 100755 --- a/src/you_get/processor/join_mp4.py +++ b/src/you_get/processor/join_mp4.py @@ -24,6 +24,9 @@ def read_uint(stream): def write_uint(stream, n): stream.write(struct.pack('>I', n)) +def write_ulong(stream, n): + stream.write(struct.pack('>Q', n)) + def read_ushort(stream): return struct.unpack('>H', stream.read(2))[0] @@ -99,11 +102,16 @@ class VariableAtom(Atom): self.write1(stream) i = 0 n = 0 - for name, offset, value in self.variables: + for name, offset, value, bsize in self.variables: stream.write(self.body[i:offset]) - write_uint(stream, value) - n += offset - i + 4 - i = offset + 4 + if bsize == 4: + write_uint(stream, value) + elif bsize == 8: + write_ulong(stream, value) + else: + raise NotImplementedError() + n += offset - i + bsize + i = offset + bsize stream.write(self.body[i:]) n += len(self.body) - i assert n == len(self.body) @@ -117,7 +125,7 @@ class VariableAtom(Atom): for i in range(len(self.variables)): variable = self.variables[i] if variable[0] == k: - self.variables[i] = (k, variable[1], v) + self.variables[i] = (k, variable[1], v, variable[3]) break else: raise Exception('field not found: '+k) @@ -149,6 +157,12 @@ def read_full_atom(stream): assert version == 0 return value +def read_full_atom2(stream): + value = read_uint(stream) + version = value >> 24 + flags = value & 0xffffff + return version, value + def read_mvhd(stream, size, left, type): body, stream = read_body_stream(stream, left) value = read_full_atom(stream) @@ -182,7 +196,7 @@ def read_mvhd(stream, size, left, type): nextTrackID = read_uint(stream) left -= 80 assert left == 0 - return VariableAtom(b'mvhd', size, body, [('duration', 16, duration)]) + return VariableAtom(b'mvhd', size, body, [('duration', 16, duration, 4)]) def read_tkhd(stream, size, left, type): body, stream = read_body_stream(stream, left) @@ -217,26 +231,35 @@ def read_tkhd(stream, size, left, type): height = qt_track_height >> 16 left -= 60 assert left == 0 - return VariableAtom(b'tkhd', size, body, [('duration', 20, duration)]) + return VariableAtom(b'tkhd', size, body, [('duration', 20, duration, 4)]) def read_mdhd(stream, size, left, type): body, stream = read_body_stream(stream, left) - value = read_full_atom(stream) + ver, value = read_full_atom2(stream) left -= 4 - - # new Date(movieTime * 1000 - 2082850791998L); - creation_time = read_uint(stream) - modification_time = read_uint(stream) - time_scale = read_uint(stream) - duration = read_uint(stream) - left -= 16 + + if ver == 1: + creation_time = read_ulong(stream) + modification_time = read_ulong(stream) + time_scale = read_uint(stream) + duration = read_ulong(stream) + var = [('duration', 24, duration, 8)] + left -= 28 + else: + assert ver == 0, "ver=%d" % ver + creation_time = read_uint(stream) + modification_time = read_uint(stream) + time_scale = read_uint(stream) + duration = read_uint(stream) + var = [('duration', 16, duration, 4)] + left -= 16 packed_language = read_ushort(stream) qt_quality = read_ushort(stream) left -= 4 assert left == 0 - return VariableAtom(b'mdhd', size, body, [('duration', 16, duration)]) + return VariableAtom(b'mdhd', size, body, var) def read_hdlr(stream, size, left, type): body, stream = read_body_stream(stream, left) @@ -251,8 +274,7 @@ def read_hdlr(stream, size, left, type): left -= 20 track_name = stream.read(left) - #track_name = stream.read(left - 1) - #assert stream.read(1) == b'\x00' + #assert track_name[-1] == b'\x00' return Atom(b'hdlr', size, body) @@ -340,11 +362,11 @@ def read_stts(stream, size, left, type): samples = [] for i in range(entry_count): - sample_count = read_uint(stream) - sample_duration = read_uint(stream) - samples.append((sample_count, sample_duration)) - left -= 8 - + sample_count = read_uint(stream) + sample_duration = read_uint(stream) + samples.append((sample_count, sample_duration)) + left -= 8 + assert left == 0 #return Atom('stts', size, None) class stts_atom(Atom): @@ -358,9 +380,9 @@ def read_stts(stream, size, left, type): write_uint(stream, sample_count) write_uint(stream, sample_duration) def calsize(self): - oldsize = self.size # TODO: remove + #oldsize = self.size # TODO: remove self.size = 8 + 4 + 4 + len(self.body[1]) * 8 - assert oldsize == self.size, '%s: %d, %d' % (self.type, oldsize, self.size) # TODO: remove + #assert oldsize == self.size, '%s: %d, %d' % (self.type, oldsize, self.size) # TODO: remove return self.size return stts_atom(b'stts', size, (value, samples)) @@ -634,7 +656,7 @@ atom_readers = { b'free': read_raw, b'edts': read_raw, b'pasp': read_raw, - + b'mdat': read_mdat, b'udta': read_udta, } @@ -709,10 +731,13 @@ def merge_stts(samples_list): sample_list = [] for samples in samples_list: #assert len(samples) == 1 - sample_list.append(samples[0]) + #sample_list.append(samples[0]) + sample_list += samples counts, durations = zip(*sample_list) - assert len(set(durations)) == 1, 'not all durations equal' - return [(sum(counts), durations[0])] + #assert len(set(durations)) == 1, 'not all durations equal' + if len(set(durations)) == 1: + return [(sum(counts), durations[0])] + return sample_list def merge_stss(samples, sample_number_list): results = [] From de2dee7fe35b586a5957ff287415de9d7d6026de Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 20 Jan 2015 04:39:29 +0100 Subject: [PATCH 06/57] AcFun: fix #475 --- src/you_get/extractors/acfun.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/acfun.py b/src/you_get/extractors/acfun.py index e00c1c52..e91ec784 100644 --- a/src/you_get/extractors/acfun.py +++ b/src/you_get/extractors/acfun.py @@ -54,7 +54,7 @@ def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only pass def acfun_download(url, output_dir = '.', merge = True, info_only = False): - assert re.match(r'http://[^\.]+.acfun.[^\.]+/v/ac(\d+)', url) + assert re.match(r'http://[^\.]+.acfun.[^\.]+/\D/\D\D(\d+)', url) html = get_html(url) title = r1(r'

([^<>]+)<', html) From 0fd7de823d5b870ff02b358879283c58d3673900 Mon Sep 17 00:00:00 2001 From: pastebt Date: Tue, 20 Jan 2015 17:05:38 -0800 Subject: [PATCH 07/57] fix typo --- src/you_get/extractors/bilibili.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 1869f955..6f8d7af3 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -150,7 +150,7 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False): bilibili_download_by_cids(cids, title, output_dir=output_dir, merge=merge, info_only=info_only) elif t == 'vid': - sina_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only) + sina_download_by_vid(id, title, output_dir = output_dir, merge = merge, info_only = info_only) elif t == 'ykid': youku_download_by_vid(id, title=title, output_dir = output_dir, merge = merge, info_only = info_only) elif t == 'uid': From 0a38485b0db1030b93dcc2ce4ee5011e4945047e Mon Sep 17 00:00:00 2001 From: The Gitter Badger Date: Fri, 23 Jan 2015 15:56:44 +0000 Subject: [PATCH 08/57] Added Gitter badge --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 54e94ec8..93100b81 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # You-Get +[![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/soimort/you-get?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) + [![Build Status](https://api.travis-ci.org/soimort/you-get.png)](https://travis-ci.org/soimort/you-get) [![PyPI version](https://badge.fury.io/py/you-get.png)](http://badge.fury.io/py/you-get) [You-Get](http://www.soimort.org/you-get) is a video downloader for [YouTube](http://www.youtube.com), [Youku](http://www.youku.com), [niconico](http://www.nicovideo.jp) and a few other sites. From b59a97a8eda73f6fa95aaf43ed6089321303dcf6 Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Tue, 27 Jan 2015 23:41:17 +0800 Subject: [PATCH 09/57] introduce new api for acfun --- src/you_get/extractors/acfun.py | 99 ++++++++++++++++++++++++++------- 1 file changed, 80 insertions(+), 19 deletions(-) diff --git a/src/you_get/extractors/acfun.py b/src/you_get/extractors/acfun.py index e91ec784..b87f8f3d 100644 --- a/src/you_get/extractors/acfun.py +++ b/src/you_get/extractors/acfun.py @@ -21,25 +21,87 @@ def get_srt_lock_json(id): url = 'http://comment.acfun.tv/%s_lock.json' % id return get_html(url) -def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only=False): - info = json.loads(get_html('http://www.acfun.tv/video/getVideo.aspx?id=' + vid)) - sourceType = info['sourceType'] - sourceId = info['sourceId'] - # danmakuId = info['danmakuId'] - if sourceType == 'sina': - sina_download_by_vid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) - elif sourceType == 'youku': - youku_download_by_vid(sourceId, title=title, output_dir=output_dir, merge=merge, info_only=info_only) - elif sourceType == 'tudou': - tudou_download_by_iid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) - elif sourceType == 'qq': - qq_download_by_id(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) - elif sourceType == 'letv': - letvcloud_download_by_vu(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) +# def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only=False): +# info = json.loads(get_html('http://www.acfun.tv/video/getVideo.aspx?id=' + vid)) +# sourceType = info['sourceType'] +# sourceId = info['sourceId'] +# # danmakuId = info['danmakuId'] +# if sourceType == 'sina': +# sina_download_by_vid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) +# elif sourceType == 'youku': +# youku_download_by_vid(sourceId, title=title, output_dir=output_dir, merge=merge, info_only=info_only) +# elif sourceType == 'tudou': +# tudou_download_by_iid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) +# elif sourceType == 'qq': +# qq_download_by_id(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) +# elif sourceType == 'letv': +# letvcloud_download_by_vu(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) +# else: +# raise NotImplementedError(sourceType) + +# if not info_only: +# title = get_filename(title) +# try: +# print('Downloading %s ...\n' % (title + '.cmt.json')) +# cmt = get_srt_json(vid) +# with open(os.path.join(output_dir, title + '.cmt.json'), 'w') as x: +# x.write(cmt) +# # print('Downloading %s ...\n' % (title + '.cmt_lock.json')) +# # cmt = get_srt_lock_json(danmakuId) +# # with open(os.path.join(output_dir, title + '.cmt_lock.json'), 'w') as x: +# # x.write(cmt) +# except: +# pass + + + +# decompile from player swf +# protected static const VIDEO_PARSE_API:String = "http://jiexi.acfun.info/index.php?vid="; +# protected static var VIDEO_RATES_CODE:Array = ["C40","C30","C20","C10"]; +# public static var VIDEO_RATES_STRING:Array = ["原画","超清","高清","流畅"]; + +# Sometimes may find C80 but size smaller than C30 +stream_types = ["C40","C30","C20","C10"] +stream_types_map = {"C10":"流畅","C20":"高清","C30":"超清","C40":"原画"} + +def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only=False ,**kwargs): + #api example http://jiexi.acfun.info/index.php?vid=1122870 + info = json.loads(get_content("http://jiexi.acfun.info/index.php?vid={}".format(vid))) + assert info["code"] == 200 + assert info["success"] == True + + support_types = sorted(info["result"].keys(),key= lambda i: int(i[1:])) + + stream_id = None + if "stream_id" in kwargs and kwargs["stream_id"] in support_types: + stream_id = kwargs["stream_id"] else: - raise NotImplementedError(sourceType) + print("Current Video Supports:") + for i in support_types: + if info["result"][i]["totalbytes"] != 0: + print("\t--foramt",i,":",info["result"][i]["quality"],"size:","%.2f"% (info["result"][i]["totalbytes"] / 1024.0 /1024.0),"MB") + else: + print("\t--foramt",i,":",info["result"][i]["quality"]) + #because C80 is not the best + if "C80" not in support_types: + stream_id = support_types[-1] + else: + stream_id = support_types[-2] + + urls = [None] * len(info["result"][stream_id]["files"]) + for i in info["result"][stream_id]["files"]: + urls[i["no"]] = i["url"] + ext = info["result"][stream_id]["files"][0]["type"] + size = 0 + for i in urls: + _, _, tmp =url_info(i) + size +=tmp + print_info(site_info, title, ext, size) + print("Format: ",stream_id) + print() if not info_only: + download_urls(urls, title, ext, size, output_dir = output_dir, merge = merge) title = get_filename(title) try: print('Downloading %s ...\n' % (title + '.cmt.json')) @@ -52,8 +114,7 @@ def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only # x.write(cmt) except: pass - -def acfun_download(url, output_dir = '.', merge = True, info_only = False): +def acfun_download(url, output_dir = '.', merge = True, info_only = False ,**kwargs): assert re.match(r'http://[^\.]+.acfun.[^\.]+/\D/\D\D(\d+)', url) html = get_html(url) @@ -67,7 +128,7 @@ def acfun_download(url, output_dir = '.', merge = True, info_only = False): for video in videos: p_vid = video[0] p_title = title + " - " + video[1] - acfun_download_by_vid(p_vid, p_title, output_dir=output_dir, merge=merge, info_only=info_only) + acfun_download_by_vid(p_vid, p_title, output_dir=output_dir, merge=merge, info_only=info_only ,**kwargs) else: # Useless - to be removed? id = r1(r"src=\"/newflvplayer/player.*id=(\d+)", html) From c05843cb3a24de350e47ea67f63685c091bd18d4 Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Tue, 27 Jan 2015 23:44:45 +0800 Subject: [PATCH 10/57] format code --- src/you_get/extractors/acfun.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/you_get/extractors/acfun.py b/src/you_get/extractors/acfun.py index b87f8f3d..61da1a0b 100644 --- a/src/you_get/extractors/acfun.py +++ b/src/you_get/extractors/acfun.py @@ -59,10 +59,8 @@ def get_srt_lock_json(id): # protected static const VIDEO_PARSE_API:String = "http://jiexi.acfun.info/index.php?vid="; # protected static var VIDEO_RATES_CODE:Array = ["C40","C30","C20","C10"]; # public static var VIDEO_RATES_STRING:Array = ["原画","超清","高清","流畅"]; - # Sometimes may find C80 but size smaller than C30 -stream_types = ["C40","C30","C20","C10"] -stream_types_map = {"C10":"流畅","C20":"高清","C30":"超清","C40":"原画"} + def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only=False ,**kwargs): #api example http://jiexi.acfun.info/index.php?vid=1122870 @@ -108,12 +106,9 @@ def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only cmt = get_srt_json(vid) with open(os.path.join(output_dir, title + '.cmt.json'), 'w') as x: x.write(cmt) - # print('Downloading %s ...\n' % (title + '.cmt_lock.json')) - # cmt = get_srt_lock_json(danmakuId) - # with open(os.path.join(output_dir, title + '.cmt_lock.json'), 'w') as x: - # x.write(cmt) except: pass + def acfun_download(url, output_dir = '.', merge = True, info_only = False ,**kwargs): assert re.match(r'http://[^\.]+.acfun.[^\.]+/\D/\D\D(\d+)', url) html = get_html(url) From 74235551f8fd8d14f9c3192c0c1fcfb284a751b6 Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Wed, 28 Jan 2015 22:15:47 +0800 Subject: [PATCH 11/57] fix iqiyi --- src/you_get/extractors/iqiyi.py | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py index b7003842..a63c6b37 100644 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -45,9 +45,19 @@ def getVrsEncodeCode(vlink): return loc2[::-1] def getVMS(tvid,vid,uid): - tm=randint(1000,2000) - vmsreq='http://cache.video.qiyi.com/vms?key=fvip&src=p'+"&tvId="+tvid+"&vid="+vid+"&vinfo=1&tm="+str(tm)+"&enc="+hashlib.new('md5',bytes('ts56gh'+str(tm)+tvid,"utf-8")).hexdigest()+"&qyid="+uid+"&tn="+str(random()) - return json.loads(get_content(vmsreq)) + #tm ->the flash run time for md5 usage + #um -> vip 1 normal 0 + #authkey -> for password protected video ,replace '' with your password + #puid user.passportid may empty? + #TODO: support password protected video + tm=randint(100,1000) + vmsreq='http://cache.video.qiyi.com/vms?key=fvip&src=1702633101b340d8917a69cf8a4b8c7' +\ + "&tvId="+tvid+"&vid="+vid+"&vinfo=1&tm="+str(tm)+\ + "&enc="+hashlib.new('md5',bytes('ts56gh'+str(tm)+tvid,"utf-8")).hexdigest()+\ + "&qyid="+uid+"&tn="+str(random()) +"&um=0" +\ + "&authkey="+hashlib.new('md5',bytes(''+str(tm)+tvid,'utf-8')).hexdigest() + tmp = get_content(vmsreq) + return json.loads(tmp) def getDispathKey(rid): tp=")(*&^flash@#$%a" #magic from swf @@ -67,9 +77,15 @@ def iqiyi_download(url, output_dir = '.', merge = True, info_only = False): assert videoid info = getVMS(tvid,videoid,gen_uid) + assert info["code"] == "A000000" title = info["data"]["vi"]["vn"] + # data.vp = json.data.vp + # data.vi = json.data.vi + # data.f4v = json.data.f4v + # if movieIsMember data.vp = json.data.np + #for highest qualities #for http://www.iqiyi.com/v_19rrmmz5yw.html not vp -> np try: @@ -79,28 +95,24 @@ def iqiyi_download(url, output_dir = '.', merge = True, info_only = False): log.e("[Error] Do not support for iQIYI VIP video.") exit(-1) - # assert info["data"]['vp']["tkl"]!='' bid=0 for i in info["data"]["vp"]["tkl"][0]["vs"]: if int(i["bid"])<=10 and int(i["bid"])>=bid: bid=int(i["bid"]) - video_links=i["fs"] - #todo support choose quality with cmdline + video_links=i["flvs"] #now in i["flvs"] not in i["fs"] urls=[] size=0 for i in video_links: vlink=i["l"] - # print(vlink) if not vlink.startswith("/"): #vlink is encode vlink=getVrsEncodeCode(vlink) - assert vlink.endswith(".f4v") - size+=i["b"] key=getDispathKey(vlink.split("/")[-1].split(".")[0]) + size+=i["b"] baseurl=info["data"]["vp"]["du"].split("/") baseurl.insert(-1,key) - url="/".join(baseurl)+vlink+'?su='+gen_uid+'&client=&z=&bt=&ct=&tn='+str(randint(10000,20000)) + url="/".join(baseurl)+vlink+'?su='+gen_uid+'&qyid='+uuid4().hex+'&client=&z=&bt=&ct=&tn='+str(randint(10000,20000)) urls.append(json.loads(get_content(url))["l"]) #download should be complete in 10 minutes From 3ce3e6adb3d47989338866da1b840b77e6ceab5a Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Wed, 28 Jan 2015 22:17:07 +0800 Subject: [PATCH 12/57] remove some debug code --- src/you_get/extractors/iqiyi.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py index a63c6b37..3b1b6582 100644 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -56,8 +56,7 @@ def getVMS(tvid,vid,uid): "&enc="+hashlib.new('md5',bytes('ts56gh'+str(tm)+tvid,"utf-8")).hexdigest()+\ "&qyid="+uid+"&tn="+str(random()) +"&um=0" +\ "&authkey="+hashlib.new('md5',bytes(''+str(tm)+tvid,'utf-8')).hexdigest() - tmp = get_content(vmsreq) - return json.loads(tmp) + return json.loads(get_content(vmsreq)) def getDispathKey(rid): tp=")(*&^flash@#$%a" #magic from swf From a73db610ee53c7d020ab604394bd4c2847981088 Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Wed, 28 Jan 2015 22:28:33 +0800 Subject: [PATCH 13/57] oops typo --- src/you_get/extractors/acfun.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/acfun.py b/src/you_get/extractors/acfun.py index 61da1a0b..d4ddf974 100644 --- a/src/you_get/extractors/acfun.py +++ b/src/you_get/extractors/acfun.py @@ -77,9 +77,9 @@ def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only print("Current Video Supports:") for i in support_types: if info["result"][i]["totalbytes"] != 0: - print("\t--foramt",i,":",info["result"][i]["quality"],"size:","%.2f"% (info["result"][i]["totalbytes"] / 1024.0 /1024.0),"MB") + print("\t--format",i,":",info["result"][i]["quality"],"size:","%.2f"% (info["result"][i]["totalbytes"] / 1024.0 /1024.0),"MB") else: - print("\t--foramt",i,":",info["result"][i]["quality"]) + print("\t--format",i,":",info["result"][i]["quality"]) #because C80 is not the best if "C80" not in support_types: stream_id = support_types[-1] From 14c2e68c39ef3b0cbe2fcc72da322943b819f6fc Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 29 Jan 2015 08:39:54 +0100 Subject: [PATCH 14/57] update README.md --- README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/README.md b/README.md index 93100b81..1ac7db40 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,6 @@ # You-Get -[![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/soimort/you-get?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) - -[![Build Status](https://api.travis-ci.org/soimort/you-get.png)](https://travis-ci.org/soimort/you-get) [![PyPI version](https://badge.fury.io/py/you-get.png)](http://badge.fury.io/py/you-get) +[![Build Status](https://api.travis-ci.org/soimort/you-get.png)](https://travis-ci.org/soimort/you-get) [![PyPI version](https://badge.fury.io/py/you-get.png)](http://badge.fury.io/py/you-get) [![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/soimort/you-get?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [You-Get](http://www.soimort.org/you-get) is a video downloader for [YouTube](http://www.youtube.com), [Youku](http://www.youku.com), [niconico](http://www.nicovideo.jp) and a few other sites. From d1d45bee8abacd05f040e5eed796760b9901f329 Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Fri, 30 Jan 2015 14:01:31 +0800 Subject: [PATCH 15/57] links may in fs or flvs,tmp fix,more research to be done --- src/you_get/extractors/iqiyi.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py index 3b1b6582..29bb378a 100644 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -98,7 +98,13 @@ def iqiyi_download(url, output_dir = '.', merge = True, info_only = False): for i in info["data"]["vp"]["tkl"][0]["vs"]: if int(i["bid"])<=10 and int(i["bid"])>=bid: bid=int(i["bid"]) - video_links=i["flvs"] #now in i["flvs"] not in i["fs"] + + video_links=i["fs"] #now in i["flvs"] not in i["fs"] + if not i["fs"][0]["l"].startswith("/"): + tmp = getVrsEncodeCode(i["fs"][0]["l"]) + if tmp.endswith('mp4'): + video_links = i["flvs"] + urls=[] size=0 @@ -113,7 +119,6 @@ def iqiyi_download(url, output_dir = '.', merge = True, info_only = False): baseurl.insert(-1,key) url="/".join(baseurl)+vlink+'?su='+gen_uid+'&qyid='+uuid4().hex+'&client=&z=&bt=&ct=&tn='+str(randint(10000,20000)) urls.append(json.loads(get_content(url))["l"]) - #download should be complete in 10 minutes #because the url is generated before start downloading #and the key may be expired after 10 minutes From cb375b6cfb210e3282337102642b97cb8244e591 Mon Sep 17 00:00:00 2001 From: pastebt Date: Mon, 2 Feb 2015 19:16:20 -0800 Subject: [PATCH 16/57] fix bilibili.com MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sample: python3 you-get -i http://www.bilibili.com/video/av663903/ Before fix it return: Video Site: bilibili.com Title: 【美版无间道】无间道风云【小李子-马呆萌】【犯罪】[2006] Type: Flash video (video/x-flv) Size: 0.17 MiB (180635 Bytes) This is a error.mp4 file After fix it return: Video Site: bilibili.com Title: 【美版无间道】无间道风云【小李子-马呆萌】【犯罪】[2006] Type: Flash video (video/x-flv) Size: 1989.14 MiB (2085762683 Bytes) --- src/you_get/extractors/bilibili.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 6f8d7af3..190d62c3 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -19,7 +19,8 @@ client = { 'Accept-Charset': 'UTF-8,*;q=0.5', 'Accept-Encoding': 'gzip,deflate,sdch', 'Accept-Language': 'en-US,en;q=0.8', - 'User-Agent': 'Biligrab /0.8 (cnbeining@gmail.com)' + #'User-Agent': 'Biligrab /0.8 (cnbeining@gmail.com)' + 'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.94 Safari/537.36" } def get_srt_xml(id): From 1763986ca4992ef45f63ad56f68176ac6ddf0ec5 Mon Sep 17 00:00:00 2001 From: pastebt Date: Mon, 2 Feb 2015 20:14:41 -0800 Subject: [PATCH 17/57] fix bug #353 also may fix bug #456 Now the -y proxy can be used for tv.sohu.com such as http://tv.sohu.com/20140629/n401523369.shtml?txid=8254069965286abe9ee523a73c256ea7 --- src/you_get/extractors/sohu.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py index 6ee472e0..c0e46545 100644 --- a/src/you_get/extractors/sohu.py +++ b/src/you_get/extractors/sohu.py @@ -11,7 +11,7 @@ def real_url(host, prot, file, new): start, _, host, key = get_html(url).split('|')[:4] return '%s%s?key=%s' % (start[:-1], new, key) -def sohu_download(url, output_dir = '.', merge = True, info_only = False): +def sohu_download(url, output_dir = '.', merge = True, info_only = False, extractor_proxy=None): if re.match(r'http://share.vrs.sohu.com', url): vid = r1('id=(\d+)', url) else: @@ -20,12 +20,16 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False): assert vid if re.match(r'http://tv.sohu.com/', url): + if extractor_proxy: + set_proxy(tuple(extractor_proxy.split(":"))) data = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid)) for qtyp in ["oriVid","superVid","highVid" ,"norVid","relativeId"]: hqvid = data['data'][qtyp] if hqvid != 0 and hqvid != vid : data = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % hqvid)) break + if extractor_proxy: + unset_proxy() host = data['allot'] prot = data['prot'] urls = [] From 6fba4bd9ce58e4897afd64a28307988638e05711 Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Mon, 9 Feb 2015 01:07:40 +0800 Subject: [PATCH 18/57] fix letv --- src/you_get/extractors/letv.py | 103 ++++++++++++++++++++++----------- 1 file changed, 68 insertions(+), 35 deletions(-) diff --git a/src/you_get/extractors/letv.py b/src/you_get/extractors/letv.py index 2ce16a84..b9cdd35b 100644 --- a/src/you_get/extractors/letv.py +++ b/src/you_get/extractors/letv.py @@ -5,16 +5,17 @@ __all__ = ['letv_download', 'letvcloud_download', 'letvcloud_download_by_vu'] import json import random import xml.etree.ElementTree as ET -import base64, hashlib, urllib +import base64, hashlib, urllib, time, re from ..common import * +#@DEPRECATED def get_timestamp(): tn = random.random() url = 'http://api.letv.com/time?tn={}'.format(tn) result = get_content(url) return json.loads(result)['stime'] - +#@DEPRECATED def get_key(t): for s in range(0, 8): e = 1 & t @@ -23,46 +24,78 @@ def get_key(t): t += e return t ^ 185025305 -def video_info(vid): - tn = get_timestamp() - key = get_key(tn) -#old api reserve for future use or for example - # url = 'http://api.letv.com/mms/out/video/play?id={}&platid=1&splatid=101&format=1&tkey={}&domain=www.letv.com'.format(vid, key) - # print(url) - # r = get_content(url, decoded=False) - # print(r) - # xml_obj = ET.fromstring(r) - # info = json.loads(xml_obj.find("playurl").text) - # title = info.get('title') - # urls = info.get('dispatch') - # for k in urls.keys(): - # url = urls[k][0] - # break - # url += '&termid=1&format=0&hwtype=un&ostype=Windows7&tag=letv&sign=letv&expect=1&pay=0&rateid={}'.format(k) - # return url, title +def calcTimeKey(t): + ror = lambda val, r_bits, : ((val & (2**32-1)) >> r_bits%32) | (val << (32-(r_bits%32)) & (2**32-1)) + return ror(ror(t,773625421%13)^773625421,773625421%17) - url="http://api.letv.com/mms/out/common/geturl?platid=3&splatid=301&playid=0&vtype=9,13,21,28&version=2.0&tss=no&vid={}&domain=www.letv.com&tkey={}".format(vid,key) + +def decode(data): + version = data[0:5] + if version.lower() == b'vc_01': + #get real m3u8 + loc2 = data[5:] + length = len(loc2) + loc4 = [0]*(2*length) + for i in range(length): + loc4[2*i] = loc2[i] >> 4 + loc4[2*i+1]= loc2[i] & 15; + loc6 = loc4[len(loc4)-11:]+loc4[:len(loc4)-11] + loc7 = [0]*length + for i in range(length): + loc7[i] = (loc6[2 * i] << 4) +loc6[2*i+1] + return ''.join([chr(i) for i in loc7]) + else: + # directly return + return data + + + + +def video_info(vid,**kwargs): + url = 'http://api.letv.com/mms/out/video/playJson?id={}&platid=1&splatid=101&format=1&tkey={}&domain=www.letv.com'.format(vid,calcTimeKey(int(time.time()))) r = get_content(url, decoded=False) info=json.loads(str(r,"utf-8")) - size=0 - for i in info["data"][0]["infos"]: #0 means only one file not truncated.need to upgrade - if int(i["gsize"])>size: - size=int(i["gsize"]) - url=i["mainUrl"] - url+="&ctv=pc&m3v=1&termid=1&format=1&hwtype=un&ostype=Linux&tag=letv&sign=letv&expect=3&tn={}&pay=0&iscpn=f9051&rateid=1300".format(random.random()) - # url += '&termid=1&format=0&hwtype=un&ostype=Windows7&tag=letv&sign=letv&expect=1&pay=0&rateid=1000' #{}'.format(k) + + stream_id = None + support_stream_id = info["playurl"]["dispatch"].keys() + if "stream_id" in kwargs and kwargs["stream_id"].lower() in support_stream_id: + stream_id = kwargs["stream_id"] + else: + print("Current Video Supports:") + for i in support_stream_id: + print("\t--format",i,"") + if "1080p" in support_stream_id: + stream_id = '1080p' + elif "720p" in support_stream_id: + stream_id = '720p' + else: + stream_id =sorted(support_stream_id,key= lambda i: int(i[1:]))[-1] + + url =info["playurl"]["domain"][0]+info["playurl"]["dispatch"][stream_id][0] + ext = info["playurl"]["dispatch"][stream_id][1].split('.')[-1] + url+="&ctv=pc&m3v=1&termid=1&format=1&hwtype=un&ostype=Linux&tag=letv&sign=letv&expect=3&tn={}&pay=0&iscpn=f9051&rateid={}".format(random.random(),stream_id) + r2=get_content(url,decoded=False) info2=json.loads(str(r2,"utf-8")) - return info2["location"] -def letv_download_by_vid(vid,title, output_dir='.', merge=True, info_only=False): - url= video_info(vid) - _, _, size = url_info(url) - ext = 'flv' + # hold on ! more things to do + # to decode m3u8 (encoded) + m3u8 = get_content(info2["location"],decoded=False) + m3u8_list = decode(m3u8) + urls = re.findall(r'^[^#^][^\r]*',m3u8_list,re.MULTILINE) + return ext,urls + +def letv_download_by_vid(vid,title, output_dir='.', merge=True, info_only=False,**kwargs): + ext , urls = video_info(vid,**kwargs) + size = 0 + for i in urls: + _, _, tmp = url_info(i) + size += tmp + print_info(site_info, title, ext, size) if not info_only: - download_urls([url], title, ext, size, output_dir=output_dir, merge=merge) + download_urls(urls, title, ext, size, output_dir=output_dir, merge=merge) def letvcloud_download_by_vu(vu, title=None, output_dir='.', merge=True, info_only=False): str2Hash = 'cfflashformatjsonran0.7214574650861323uu2d8c027396ver2.1vu' + vu + 'bie^#@(%27eib58' @@ -90,7 +123,7 @@ def letvcloud_download(url, output_dir='.', merge=True, info_only=False): title = "LETV-%s" % vu letvcloud_download_by_vu(vu, title=title, output_dir=output_dir, merge=merge, info_only=info_only) -def letv_download(url, output_dir='.', merge=True, info_only=False): +def letv_download(url, output_dir='.', merge=True, info_only=False ,**kwargs): if re.match(r'http://yuntv.letv.com/', url): letvcloud_download(url, output_dir=output_dir, merge=merge, info_only=info_only) else: @@ -101,7 +134,7 @@ def letv_download(url, output_dir='.', merge=True, info_only=False): else: vid = match1(html, r'vid="(\d+)"') title = match1(html,r'name="irTitle" content="(.*?)"') - letv_download_by_vid(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only) + letv_download_by_vid(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only,**kwargs) site_info = "LeTV.com" download = letv_download From 6de02c22b68c2eecc27a72d8c10bdc880786abd4 Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Mon, 9 Feb 2015 10:31:13 +0800 Subject: [PATCH 19/57] fix typo in re pattern --- src/you_get/extractors/letv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/letv.py b/src/you_get/extractors/letv.py index b9cdd35b..08b68662 100644 --- a/src/you_get/extractors/letv.py +++ b/src/you_get/extractors/letv.py @@ -83,7 +83,7 @@ def video_info(vid,**kwargs): # to decode m3u8 (encoded) m3u8 = get_content(info2["location"],decoded=False) m3u8_list = decode(m3u8) - urls = re.findall(r'^[^#^][^\r]*',m3u8_list,re.MULTILINE) + urls = re.findall(r'^[^#][^\r]*',m3u8_list,re.MULTILINE) return ext,urls def letv_download_by_vid(vid,title, output_dir='.', merge=True, info_only=False,**kwargs): From e6581b99aa48bb7ed8bab3b8d2d23980a6674c77 Mon Sep 17 00:00:00 2001 From: lilydjwg Date: Sat, 14 Feb 2015 18:54:43 +0800 Subject: [PATCH 20/57] escape filenames for ffmpeg concatenating --- src/you_get/processor/ffmpeg.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/you_get/processor/ffmpeg.py b/src/you_get/processor/ffmpeg.py index 24439bc2..94378daa 100644 --- a/src/you_get/processor/ffmpeg.py +++ b/src/you_get/processor/ffmpeg.py @@ -100,7 +100,9 @@ def ffmpeg_concat_flv_to_mp4(files, output='output.mp4'): concat_list = open(output + '.txt', 'w', encoding="utf-8") for file in files: if os.path.isfile(file): - concat_list.write("file '%s'\n" % file) + # for escaping rules, see: + # https://www.ffmpeg.org/ffmpeg-utils.html#Quoting-and-escaping + concat_list.write("file '%s'\n" % file.replace("'", r"'\''")) concat_list.close() params = [FFMPEG, '-f', 'concat', '-y', '-i'] From b61e995f9228ef629e51bd4be4225ea473699dcf Mon Sep 17 00:00:00 2001 From: Eskibear Date: Mon, 9 Mar 2015 13:50:09 +0800 Subject: [PATCH 21/57] Xiami: update collect url; fix #502 --- src/you_get/extractors/xiami.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/xiami.py b/src/you_get/extractors/xiami.py index 143e6eb5..ca4d634f 100644 --- a/src/you_get/extractors/xiami.py +++ b/src/you_get/extractors/xiami.py @@ -142,8 +142,8 @@ def xiami_download(url, output_dir = '.', stream_type = None, merge = True, info id = r1(r'http://www.xiami.com/album/(\d+)', url) xiami_download_album(id, output_dir, merge, info_only) - if re.match(r'http://www.xiami.com/song/showcollect/id/\d+', url): - id = r1(r'http://www.xiami.com/song/showcollect/id/(\d+)', url) + if re.match(r'http://www.xiami.com/collect/\d+', url): + id = r1(r'http://www.xiami.com/collect/(\d+)', url) xiami_download_showcollect(id, output_dir, merge, info_only) if re.match('http://www.xiami.com/song/\d+', url): From e384e45215fbe5a9784fc59bd61eb789be7eb335 Mon Sep 17 00:00:00 2001 From: Eskibear Date: Tue, 10 Mar 2015 19:00:20 +0800 Subject: [PATCH 22/57] robustness: skip unavailable tracks in Xiami showcollect downloading --- src/you_get/extractors/xiami.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/you_get/extractors/xiami.py b/src/you_get/extractors/xiami.py index ca4d634f..f6e141d9 100644 --- a/src/you_get/extractors/xiami.py +++ b/src/you_get/extractors/xiami.py @@ -78,10 +78,16 @@ def xiami_download_showcollect(cid, output_dir = '.', merge = True, info_only = tracks = doc.getElementsByTagName("track") track_nr = 1 for i in tracks: - artist = i.getElementsByTagName("artist")[0].firstChild.nodeValue - album_name = i.getElementsByTagName("album_name")[0].firstChild.nodeValue - song_title = i.getElementsByTagName("title")[0].firstChild.nodeValue - url = location_dec(i.getElementsByTagName("location")[0].firstChild.nodeValue) + artist=album_name=song_title=url="" + try: + song_id = i.getElementsByTagName("song_id")[0].firstChild.nodeValue + artist = i.getElementsByTagName("artist")[0].firstChild.nodeValue + album_name = i.getElementsByTagName("album_name")[0].firstChild.nodeValue + song_title = i.getElementsByTagName("title")[0].firstChild.nodeValue + url = location_dec(i.getElementsByTagName("location")[0].firstChild.nodeValue) + except: + log.e("Song %s failed. [Info Missing] artist:%s, album:%s, title:%s, url:%s" % (song_id, artist, album_name, song_title, url)) + continue try: lrc_url = i.getElementsByTagName("lyric")[0].firstChild.nodeValue except: From 1fc754a94fce84a2154ca03cf950c284d234063f Mon Sep 17 00:00:00 2001 From: Eskibear Date: Wed, 11 Mar 2015 19:38:21 +0800 Subject: [PATCH 23/57] unify mp3 filename format --- src/you_get/extractors/xiami.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/xiami.py b/src/you_get/extractors/xiami.py index f6e141d9..4e0baec0 100644 --- a/src/you_get/extractors/xiami.py +++ b/src/you_get/extractors/xiami.py @@ -61,7 +61,7 @@ def xiami_download_song(sid, output_dir = '.', merge = True, info_only = False): print_info(site_info, song_title, ext, size) if not info_only: - file_name = "%s - %s - %s" % (song_title, album_name, artist) + file_name = "%s - %s - %s" % (song_title, artist, album_name) download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True) try: xiami_download_lyric(lrc_url, file_name, output_dir) From 228bb26f5b5d1a356560f1a39a7ed8cc99bcd760 Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Wed, 25 Mar 2015 11:29:33 +0800 Subject: [PATCH 24/57] support for lizhifm --- src/you_get/common.py | 3 ++- src/you_get/extractors/__init__.py | 1 + src/you_get/extractors/lizhi.py | 41 ++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 src/you_get/extractors/lizhi.py diff --git a/src/you_get/common.py b/src/you_get/common.py index 3e2a2cfe..c14026dc 100644 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -900,7 +900,7 @@ def script_main(script_name, download, download_playlist = None): sys.exit(1) def url_to_module(url): - from .extractors import netease, w56, acfun, baidu, baomihua, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, magisto, miomio, mixcloud, mtv81, nicovideo, pptv, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, vid48, videobam, vimeo, vine, vk, xiami, yinyuetai, youku, youtube, zhanqi + from .extractors import netease, w56, acfun, baidu, baomihua, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, lizhi, magisto, miomio, mixcloud, mtv81, nicovideo, pptv, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, vid48, videobam, vimeo, vine, vk, xiami, yinyuetai, youku, youtube, zhanqi video_host = r1(r'https?://([^/]+)/', url) video_url = r1(r'https?://[^/]+(.*)', url) @@ -945,6 +945,7 @@ def url_to_module(url): 'kugou': kugou, 'kuwo': kuwo, 'letv': letv, + 'lizhi':lizhi, 'magisto': magisto, 'miomio': miomio, 'mixcloud': mixcloud, diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index da19036b..a5894d2d 100644 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -25,6 +25,7 @@ from .ku6 import * from .kugou import * from .kuwo import * from .letv import * +from .lizhi import * from .magisto import * from .miomio import * from .mixcloud import * diff --git a/src/you_get/extractors/lizhi.py b/src/you_get/extractors/lizhi.py new file mode 100644 index 00000000..faeaa366 --- /dev/null +++ b/src/you_get/extractors/lizhi.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python + +__all__ = ['lizhi_download'] +import json +from ..common import * + +def lizhi_download_playlist(url, output_dir = '.', merge = True, info_only = False): + # like this http://www.lizhi.fm/#/31365/ + #api desc: s->start l->length band->some radio + #http://www.lizhi.fm/api/radio_audios?s=0&l=100&band=31365 + band_id = match1(url,r'#/(\d+)') + #try to get a considerable large l to reduce html parsing task. + api_url = 'http://www.lizhi.fm/api/radio_audios?s=0&l=65535&band='+band_id + content_json = json.loads(get_content(api_url)) + for sound in content_json: + title = sound["name"] + res_url = sound["url"] + songtype, ext, size = url_info(res_url,faker=True) + print_info(site_info, title, songtype, size) + if not info_only: + #no referer no speed! + download_urls([res_url], title, ext, size, output_dir, merge=merge ,refer = 'http://www.lizhi.fm',faker=True) + pass + +def lizhi_download(url, output_dir = '.', merge = True, info_only = False): + # url like http://www.lizhi.fm/#/549759/18864883431656710 + api_id = match1(url,r'#/(\d+/\d+)') + api_url = 'http://www.lizhi.fm/api/audio/'+api_id + content_json = json.loads(get_content(api_url)) + title = content_json["audio"]["name"] + res_url = content_json["audio"]["url"] + songtype, ext, size = url_info(res_url,faker=True) + print_info(site_info, title, songtype, size) + if not info_only: + #no referer no speed! + download_urls([res_url], title, ext, size, output_dir, merge=merge ,refer = 'http://www.lizhi.fm',faker=True) + + +site_info = "lizhi.fm" +download = lizhi_download +download_playlist = lizhi_download_playlist From e22fb5269d7c718d97bd7bd6486c5fd5e1e1e1b4 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 2 Apr 2015 10:39:39 +0800 Subject: [PATCH 25/57] update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 1ac7db40..f89adeff 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,7 @@ Others: * Kugou (酷狗音乐) * Kuwo (酷我音乐) * LeTV (乐视网) +* Lizhi.fm (荔枝FM) * MioMio * MTV 81 * NetEase (网易视频) From 90fb2f48425927f947e10a4e66903d4792ce1781 Mon Sep 17 00:00:00 2001 From: D Low Date: Sun, 12 Apr 2015 14:55:36 +0100 Subject: [PATCH 26/57] Fix facebook extractor Previously would throw error if there is no hd_src Modified to use json parser and check if hd_src is None --- src/you_get/extractors/facebook.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/you_get/extractors/facebook.py b/src/you_get/extractors/facebook.py index edbbb671..c0610a17 100644 --- a/src/you_get/extractors/facebook.py +++ b/src/you_get/extractors/facebook.py @@ -3,22 +3,26 @@ __all__ = ['facebook_download'] from ..common import * +import json -def facebook_download(url, output_dir = '.', merge = True, info_only = False): + +def facebook_download(url, output_dir='.', merge=True, info_only=False): html = get_html(url) - + title = r1(r'(.+) \| Facebook', html) - + s2 = parse.unquote(unicodize(r1(r'\["params","([^"]*)"\]', html))) + data = json.loads(s2) + video_data = data["video_data"][0] for fmt in ["hd_src", "sd_src"]: - src= re.sub(r'\\/', r'/', r1(r'"' + fmt + '":"([^"]*)"', parse.unquote(unicodize(r1(r'\["params","([^"]*)"\]', html))))) + src = video_data[fmt] if src: break - - type, ext, size = url_info(src) - + + type, ext, size = url_info(src, True) + print_info(site_info, title, type, size) if not info_only: - download_urls([src], title, ext, size, output_dir, merge = merge) + download_urls([src], title, ext, size, output_dir, merge=merge) site_info = "Facebook.com" download = facebook_download From acc5e90777c89c8d434a7deb22a569c8b3375918 Mon Sep 17 00:00:00 2001 From: D Low Date: Wed, 22 Apr 2015 21:59:03 +0100 Subject: [PATCH 27/57] Add extractor for vidto --- src/you_get/common.py | 3 ++- src/you_get/extractors/vidto.py | 40 +++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 src/you_get/extractors/vidto.py diff --git a/src/you_get/common.py b/src/you_get/common.py index c14026dc..0a8e160c 100644 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -900,7 +900,7 @@ def script_main(script_name, download, download_playlist = None): sys.exit(1) def url_to_module(url): - from .extractors import netease, w56, acfun, baidu, baomihua, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, lizhi, magisto, miomio, mixcloud, mtv81, nicovideo, pptv, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, vid48, videobam, vimeo, vine, vk, xiami, yinyuetai, youku, youtube, zhanqi + from .extractors import netease, w56, acfun, baidu, baomihua, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, lizhi, magisto, miomio, mixcloud, mtv81, nicovideo, pptv, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, vid48, videobam, vidto, vimeo, vine, vk, xiami, yinyuetai, youku, youtube, zhanqi video_host = r1(r'https?://([^/]+)/', url) video_url = r1(r'https?://[^/]+(.*)', url) @@ -965,6 +965,7 @@ def url_to_module(url): 'tumblr': tumblr, 'vid48': vid48, 'videobam': videobam, + 'vidto': vidto, 'vimeo': vimeo, 'vine': vine, 'vk': vk, diff --git a/src/you_get/extractors/vidto.py b/src/you_get/extractors/vidto.py new file mode 100644 index 00000000..999c3aa6 --- /dev/null +++ b/src/you_get/extractors/vidto.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python + +__all__ = ['vidto_download'] + +from ..common import * +import pdb +import time + + +def vidto_download(url, output_dir='.', merge=True, info_only=False): + html = get_content(url) + params = {} + r = re.findall( + r'type="(?:hidden|submit)?"(?:.*?)name="(.+?)"\s* value="?(.+?)">', html) + for name, value in r: + params[name] = value + data = parse.urlencode(params).encode('utf-8') + req = request.Request(url) + print("Please wait for 6 seconds...") + time.sleep(6) + print("Starting") + new_html = request.urlopen(req, data).read().decode('utf-8', 'replace') + new_stff = re.search('lnk_download" href="(.*?)">', new_html) + if(new_stff): + url = new_stff.group(1) + title = params['fname'] + type = "" + ext = "" + a, b, size = url_info(url) + print_info(site_info, title, type, size) + if not info_only: + download_urls([url], title, ext, size, output_dir, merge=merge) + else: + print("cannot find link, please review") + pdb.set_trace() + + +site_info = "vidto.me" +download = vidto_download +download_playlist = playlist_not_supported('vidto') From 4a4b6ceac0fd0db0d023ccb001f4abc59bf765ab Mon Sep 17 00:00:00 2001 From: Zhiming Wang Date: Sun, 3 May 2015 22:03:54 -0700 Subject: [PATCH 28/57] print urls one per line instead of as a list... when the --url option is specified. --- src/you_get/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) mode change 100644 => 100755 src/you_get/common.py diff --git a/src/you_get/common.py b/src/you_get/common.py old mode 100644 new mode 100755 index c14026dc..0d4641d3 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -478,7 +478,7 @@ class DummyProgressBar: def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False): assert urls if dry_run: - print('Real URLs:\n%s\n' % urls) + print('Real URLs:\n%s' % '\n'.join(urls)) return if player: From 87868db44408681409ca091be1029676fb200467 Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Tue, 12 May 2015 22:58:17 +0800 Subject: [PATCH 29/57] iqiyi changes encrypt key --- src/you_get/extractors/iqiyi.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py index 29bb378a..bf9637ec 100644 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -9,6 +9,15 @@ import json from math import floor import hashlib + +''' +Changelog: + +-> http://www.iqiyi.com/common/flashplayer/20150506/MainPlayer_5_2_21_c3_2_6_1.swf + In this version iqiyi player, it changes enc key from 'ts56gh' to 'aw6UWGtp' + +''' + ''' com.qiyi.player.core.model.def.DefinitonEnum bid meaning for quality @@ -24,6 +33,9 @@ bid meaning for quality ''' +ENC_KEY = 'aw6UWGtp' + + def getVRSXORCode(arg1,arg2): loc3=arg2 %3 if loc3 == 1: @@ -53,7 +65,7 @@ def getVMS(tvid,vid,uid): tm=randint(100,1000) vmsreq='http://cache.video.qiyi.com/vms?key=fvip&src=1702633101b340d8917a69cf8a4b8c7' +\ "&tvId="+tvid+"&vid="+vid+"&vinfo=1&tm="+str(tm)+\ - "&enc="+hashlib.new('md5',bytes('ts56gh'+str(tm)+tvid,"utf-8")).hexdigest()+\ + "&enc="+hashlib.new('md5',bytes(ENC_KEY+str(tm)+tvid,"utf-8")).hexdigest()+\ "&qyid="+uid+"&tn="+str(random()) +"&um=0" +\ "&authkey="+hashlib.new('md5',bytes(''+str(tm)+tvid,'utf-8')).hexdigest() return json.loads(get_content(vmsreq)) @@ -76,6 +88,7 @@ def iqiyi_download(url, output_dir = '.', merge = True, info_only = False): assert videoid info = getVMS(tvid,videoid,gen_uid) + print(info) assert info["code"] == "A000000" title = info["data"]["vi"]["vn"] From b85eed14616bcbb4c9081b42ad627e598ee5d928 Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Tue, 12 May 2015 22:59:14 +0800 Subject: [PATCH 30/57] remove debug info --- src/you_get/extractors/iqiyi.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py index bf9637ec..97d99fb5 100644 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -88,7 +88,6 @@ def iqiyi_download(url, output_dir = '.', merge = True, info_only = False): assert videoid info = getVMS(tvid,videoid,gen_uid) - print(info) assert info["code"] == "A000000" title = info["data"]["vi"]["vn"] From 9e567e7954f988a641dc96627534f2cc92ba5efa Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Wed, 13 May 2015 23:44:19 +0800 Subject: [PATCH 31/57] bilibili title from h2 to h1 --- src/you_get/extractors/bilibili.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 190d62c3..5186a5c3 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -126,7 +126,7 @@ def bilibili_download_by_cid(id, title, output_dir='.', merge=True, info_only=Fa def bilibili_download(url, output_dir='.', merge=True, info_only=False): html = get_html(url) - title = r1_of([r'',r']*>([^<>]+)

'], html) + title = r1_of([r'',r']*>([^<>]+)'], html) title = unescape_html(title) title = escape_file_path(title) From 130f2c9f44eccc08ba21816760148828ac8ae7dd Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Fri, 15 May 2015 16:09:52 +0800 Subject: [PATCH 32/57] again, iqiyi changes its key.consider to write a function to automatically extract this key --- src/you_get/extractors/iqiyi.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py index 97d99fb5..83948153 100644 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -12,6 +12,9 @@ import hashlib ''' Changelog: +-> http://www.iqiyi.com/common/flashplayer/20150514/MainPlayer_5_2_21_c3_2_6_2.swf + In this version ,it changes enc key to 'Qakh4T0A' + consider to write a function to parse swf and extract this key automatically -> http://www.iqiyi.com/common/flashplayer/20150506/MainPlayer_5_2_21_c3_2_6_1.swf In this version iqiyi player, it changes enc key from 'ts56gh' to 'aw6UWGtp' @@ -33,7 +36,7 @@ bid meaning for quality ''' -ENC_KEY = 'aw6UWGtp' +ENC_KEY = 'Qakh4T0A' def getVRSXORCode(arg1,arg2): From 075bbc87aaa481add981fa1e2390363c34a92658 Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Fri, 15 May 2015 17:17:28 +0800 Subject: [PATCH 33/57] now get enc_key from player's swf automatically. Magic!!! --- src/you_get/extractors/iqiyi.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py index 83948153..d17f677b 100644 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -7,9 +7,9 @@ from uuid import uuid4 from random import random,randint import json from math import floor +from zlib import decompress import hashlib - ''' Changelog: -> http://www.iqiyi.com/common/flashplayer/20150514/MainPlayer_5_2_21_c3_2_6_2.swf @@ -36,7 +36,6 @@ bid meaning for quality ''' -ENC_KEY = 'Qakh4T0A' def getVRSXORCode(arg1,arg2): @@ -59,7 +58,7 @@ def getVrsEncodeCode(vlink): loc2+=chr(loc6) return loc2[::-1] -def getVMS(tvid,vid,uid): +def getVMS(tvid,vid,enc,uid): #tm ->the flash run time for md5 usage #um -> vip 1 normal 0 #authkey -> for password protected video ,replace '' with your password @@ -68,7 +67,7 @@ def getVMS(tvid,vid,uid): tm=randint(100,1000) vmsreq='http://cache.video.qiyi.com/vms?key=fvip&src=1702633101b340d8917a69cf8a4b8c7' +\ "&tvId="+tvid+"&vid="+vid+"&vinfo=1&tm="+str(tm)+\ - "&enc="+hashlib.new('md5',bytes(ENC_KEY+str(tm)+tvid,"utf-8")).hexdigest()+\ + "&enc="+hashlib.new('md5',bytes(enc+str(tm)+tvid,"utf-8")).hexdigest()+\ "&qyid="+uid+"&tn="+str(random()) +"&um=0" +\ "&authkey="+hashlib.new('md5',bytes(''+str(tm)+tvid,'utf-8')).hexdigest() return json.loads(get_content(vmsreq)) @@ -79,18 +78,30 @@ def getDispathKey(rid): t=str(int(floor(int(time)/(10*60.0)))) return hashlib.new("md5",bytes(t+tp+rid,"utf-8")).hexdigest() +def getEncKey(swflink): + swfcontent = get_content(swflink,decoded=False) + swfraw = decompress(swfcontent[8:]) + key = r1(b'MixerRemote\x08(.+?)\$&vv',swfraw) + return key.decode('utf-8') + def iqiyi_download(url, output_dir = '.', merge = True, info_only = False): gen_uid=uuid4().hex html = get_html(url) - + + swflink = r1(r'(http://.+?MainPlayer.+?swf)',html) + enc_key = getEncKey(swflink) + tvid = r1(r'data-player-tvid="([^"]+)"', html) videoid = r1(r'data-player-videoid="([^"]+)"', html) + + assert enc_key assert tvid assert videoid - info = getVMS(tvid,videoid,gen_uid) + info = getVMS(tvid, videoid, enc_key, gen_uid) + assert info["code"] == "A000000" title = info["data"]["vi"]["vn"] From 705d96c43c9c357ea256cefd6688d29b2315809b Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Wed, 20 May 2015 15:30:06 +0800 Subject: [PATCH 34/57] fix sohu --- src/you_get/extractors/sohu.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py index c0e46545..132f3ec4 100644 --- a/src/you_get/extractors/sohu.py +++ b/src/you_get/extractors/sohu.py @@ -5,11 +5,13 @@ __all__ = ['sohu_download'] from ..common import * import json +import time -def real_url(host, prot, file, new): - url = 'http://%s/?prot=%s&file=%s&new=%s' % (host, prot, file, new) - start, _, host, key = get_html(url).split('|')[:4] - return '%s%s?key=%s' % (start[:-1], new, key) + + +def real_url(vid,new): + url = 'http://data.vod.itc.cn/cdnList?new='+new+'&vid='+str(vid)+'&uid='+str(int(time.time()*1000)) + return json.loads(get_html(url))['url'] def sohu_download(url, output_dir = '.', merge = True, info_only = False, extractor_proxy=None): if re.match(r'http://share.vrs.sohu.com', url): @@ -37,8 +39,8 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac title = data['tvName'] size = sum(data['clipsBytes']) assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) - for file, new in zip(data['clipsURL'], data['su']): - urls.append(real_url(host, prot, file, new)) + for new in data['su']: + urls.append(real_url(hqvid, new)) assert data['clipsURL'][0].endswith('.mp4') else: From ce4f116a31e166a2de43fa1edb925e8c903a952e Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Thu, 21 May 2015 12:54:20 +0800 Subject: [PATCH 35/57] forget to fix another code branch --- src/you_get/extractors/sohu.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py index 132f3ec4..0c9f8f49 100644 --- a/src/you_get/extractors/sohu.py +++ b/src/you_get/extractors/sohu.py @@ -52,8 +52,8 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac title = data['tvName'] size = sum([int(clipsBytes) for clipsBytes in data['clipsBytes']]) assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) - for file, new in zip(data['clipsURL'], data['su']): - urls.append(real_url(host, prot, file, new)) + for new in data['su']: + urls.append(real_url(hqvid, new)) assert data['clipsURL'][0].endswith('.mp4') print_info(site_info, title, 'mp4', size) From 50954908aa71c0758e1a4530027c3cd93f3c5fc4 Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Thu, 21 May 2015 13:01:44 +0800 Subject: [PATCH 36/57] copy-paste typo --- src/you_get/extractors/sohu.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py index 0c9f8f49..abe41b9e 100644 --- a/src/you_get/extractors/sohu.py +++ b/src/you_get/extractors/sohu.py @@ -50,11 +50,11 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac urls = [] data = data['data'] title = data['tvName'] + print(data) size = sum([int(clipsBytes) for clipsBytes in data['clipsBytes']]) assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) for new in data['su']: - urls.append(real_url(hqvid, new)) - assert data['clipsURL'][0].endswith('.mp4') + urls.append(real_url(vid, new)) print_info(site_info, title, 'mp4', size) if not info_only: From 0222cbb19febd68d9995768e2ab19673f0334553 Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Thu, 21 May 2015 13:05:39 +0800 Subject: [PATCH 37/57] remove debug info --- src/you_get/extractors/sohu.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py index abe41b9e..fdc52f17 100644 --- a/src/you_get/extractors/sohu.py +++ b/src/you_get/extractors/sohu.py @@ -50,7 +50,6 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac urls = [] data = data['data'] title = data['tvName'] - print(data) size = sum([int(clipsBytes) for clipsBytes in data['clipsBytes']]) assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) for new in data['su']: From 4539fd161a6acd992a9c0ac7584a133a4e9bcde5 Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Tue, 26 May 2015 22:59:19 +0800 Subject: [PATCH 38/57] fix bilibili video type check error --- src/you_get/extractors/bilibili.py | 32 ++++++++---------------------- 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 5186a5c3..b4ea8035 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -79,23 +79,15 @@ def bilibili_download_by_cids(cids, title, output_dir='.', merge=True, info_only else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i) for i in parse_cid_playurl(get_content(url, headers=client))] - if re.search(r'\.(flv|hlv)\b', urls[0]): - type = 'flv' - elif re.search(r'/flv/', urls[0]): - type = 'flv' - elif re.search(r'/mp4/', urls[0]): - type = 'mp4' - else: - type = 'flv' - + type_ = '' size = 0 for url in urls: - _, _, temp = url_info(url) + _, type_, temp = url_info(url) size += temp - print_info(site_info, title, type, size) + print_info(site_info, title, type_, size) if not info_only: - download_urls(urls, title, type, total_size=None, output_dir=output_dir, merge=merge) + download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge) def bilibili_download_by_cid(id, title, output_dir='.', merge=True, info_only=False): sign_this = hashlib.md5(bytes('appkey=' + appkey + '&cid=' + id + secretkey, 'utf-8')).hexdigest() @@ -105,23 +97,15 @@ def bilibili_download_by_cid(id, title, output_dir='.', merge=True, info_only=Fa else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i) for i in parse_cid_playurl(get_content(url, headers=client))] - if re.search(r'\.(flv|hlv)\b', urls[0]): - type = 'flv' - elif re.search(r'/flv/', urls[0]): - type = 'flv' - elif re.search(r'/mp4/', urls[0]): - type = 'mp4' - else: - type = 'flv' - + type_ = '' size = 0 for url in urls: - _, _, temp = url_info(url) + _, type_, temp = url_info(url) size += temp or 0 - print_info(site_info, title, type, size) + print_info(site_info, title, type_, size) if not info_only: - download_urls(urls, title, type, total_size=None, output_dir=output_dir, merge=merge) + download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge) def bilibili_download(url, output_dir='.', merge=True, info_only=False): html = get_html(url) From c8a35458dd11f14b52e200d4c44c8bb428301956 Mon Sep 17 00:00:00 2001 From: lilydjwg Date: Wed, 27 May 2015 21:44:28 +0800 Subject: [PATCH 39/57] netease: more url pattern http://music.163.com/#/song/30706076/ --- src/you_get/extractors/netease.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/you_get/extractors/netease.py b/src/you_get/extractors/netease.py index a0a3824d..c1213139 100644 --- a/src/you_get/extractors/netease.py +++ b/src/you_get/extractors/netease.py @@ -12,6 +12,8 @@ import os def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=False): rid = match1(url, r'id=(.*)') + if rid is None: + rid = match1(url, r'/(\d+)/?$') if "album" in url: j = loads(get_content("http://music.163.com/api/album/%s?id=%s&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"})) From 5ebb2e05fee794e0f35784d3c95a60d7d62d2e55 Mon Sep 17 00:00:00 2001 From: cnbeining Date: Wed, 27 May 2015 22:29:49 -0400 Subject: [PATCH 40/57] Fix Acfun, add uu to Letvcloud, update key(fix #530, #495,#525, #528) --- src/you_get/extractors/acfun.py | 144 ++++++++++++++++---------------- src/you_get/extractors/letv.py | 20 +++-- 2 files changed, 86 insertions(+), 78 deletions(-) diff --git a/src/you_get/extractors/acfun.py b/src/you_get/extractors/acfun.py index d4ddf974..b06e23f4 100644 --- a/src/you_get/extractors/acfun.py +++ b/src/you_get/extractors/acfun.py @@ -21,37 +21,37 @@ def get_srt_lock_json(id): url = 'http://comment.acfun.tv/%s_lock.json' % id return get_html(url) -# def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only=False): -# info = json.loads(get_html('http://www.acfun.tv/video/getVideo.aspx?id=' + vid)) -# sourceType = info['sourceType'] -# sourceId = info['sourceId'] -# # danmakuId = info['danmakuId'] -# if sourceType == 'sina': -# sina_download_by_vid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) -# elif sourceType == 'youku': -# youku_download_by_vid(sourceId, title=title, output_dir=output_dir, merge=merge, info_only=info_only) -# elif sourceType == 'tudou': -# tudou_download_by_iid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) -# elif sourceType == 'qq': -# qq_download_by_id(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) -# elif sourceType == 'letv': -# letvcloud_download_by_vu(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) -# else: -# raise NotImplementedError(sourceType) +def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only=False): + info = json.loads(get_html('http://www.acfun.tv/video/getVideo.aspx?id=' + vid)) + sourceType = info['sourceType'] + sourceId = info['sourceId'] + # danmakuId = info['danmakuId'] + if sourceType == 'sina': + sina_download_by_vid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) + elif sourceType == 'youku': + youku_download_by_vid(sourceId, title=title, output_dir=output_dir, merge=merge, info_only=info_only) + elif sourceType == 'tudou': + tudou_download_by_iid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) + elif sourceType == 'qq': + qq_download_by_id(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) + elif sourceType == 'letv': + letvcloud_download_by_vu(sourceId, '2d8c027396', title, output_dir=output_dir, merge=merge, info_only=info_only) + else: + raise NotImplementedError(sourceType) -# if not info_only: -# title = get_filename(title) -# try: -# print('Downloading %s ...\n' % (title + '.cmt.json')) -# cmt = get_srt_json(vid) -# with open(os.path.join(output_dir, title + '.cmt.json'), 'w') as x: -# x.write(cmt) -# # print('Downloading %s ...\n' % (title + '.cmt_lock.json')) -# # cmt = get_srt_lock_json(danmakuId) -# # with open(os.path.join(output_dir, title + '.cmt_lock.json'), 'w') as x: -# # x.write(cmt) -# except: -# pass + if not info_only: + title = get_filename(title) + try: + print('Downloading %s ...\n' % (title + '.cmt.json')) + cmt = get_srt_json(vid) + with open(os.path.join(output_dir, title + '.cmt.json'), 'w') as x: + x.write(cmt) + # print('Downloading %s ...\n' % (title + '.cmt_lock.json')) + # cmt = get_srt_lock_json(danmakuId) + # with open(os.path.join(output_dir, title + '.cmt_lock.json'), 'w') as x: + # x.write(cmt) + except: + pass @@ -62,52 +62,52 @@ def get_srt_lock_json(id): # Sometimes may find C80 but size smaller than C30 -def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only=False ,**kwargs): - #api example http://jiexi.acfun.info/index.php?vid=1122870 - info = json.loads(get_content("http://jiexi.acfun.info/index.php?vid={}".format(vid))) - assert info["code"] == 200 - assert info["success"] == True +#def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only=False ,**kwargs): + ###api example http://jiexi.acfun.info/index.php?vid=1122870 + #info = json.loads(get_content("http://jiexi.acfun.info/index.php?vid={}".format(vid))) + #assert info["code"] == 200 + #assert info["success"] == True - support_types = sorted(info["result"].keys(),key= lambda i: int(i[1:])) + #support_types = sorted(info["result"].keys(),key= lambda i: int(i[1:])) - stream_id = None - if "stream_id" in kwargs and kwargs["stream_id"] in support_types: - stream_id = kwargs["stream_id"] - else: - print("Current Video Supports:") - for i in support_types: - if info["result"][i]["totalbytes"] != 0: - print("\t--format",i,":",info["result"][i]["quality"],"size:","%.2f"% (info["result"][i]["totalbytes"] / 1024.0 /1024.0),"MB") - else: - print("\t--format",i,":",info["result"][i]["quality"]) - #because C80 is not the best - if "C80" not in support_types: - stream_id = support_types[-1] - else: - stream_id = support_types[-2] + #stream_id = None + #if "stream_id" in kwargs and kwargs["stream_id"] in support_types: + #stream_id = kwargs["stream_id"] + #else: + #print("Current Video Supports:") + #for i in support_types: + #if info["result"][i]["totalbytes"] != 0: + #print("\t--format",i,":",info["result"][i]["quality"],"size:","%.2f"% (info["result"][i]["totalbytes"] / 1024.0 /1024.0),"MB") + #else: + #print("\t--format",i,":",info["result"][i]["quality"]) + ##because C80 is not the best + #if "C80" not in support_types: + #stream_id = support_types[-1] + #else: + #stream_id = support_types[-2] - urls = [None] * len(info["result"][stream_id]["files"]) - for i in info["result"][stream_id]["files"]: - urls[i["no"]] = i["url"] - ext = info["result"][stream_id]["files"][0]["type"] - size = 0 - for i in urls: - _, _, tmp =url_info(i) - size +=tmp - print_info(site_info, title, ext, size) - print("Format: ",stream_id) - print() + #urls = [None] * len(info["result"][stream_id]["files"]) + #for i in info["result"][stream_id]["files"]: + #urls[i["no"]] = i["url"] + #ext = info["result"][stream_id]["files"][0]["type"] + #size = 0 + #for i in urls: + #_, _, tmp =url_info(i) + #size +=tmp + #print_info(site_info, title, ext, size) + #print("Format: ",stream_id) + #print() - if not info_only: - download_urls(urls, title, ext, size, output_dir = output_dir, merge = merge) - title = get_filename(title) - try: - print('Downloading %s ...\n' % (title + '.cmt.json')) - cmt = get_srt_json(vid) - with open(os.path.join(output_dir, title + '.cmt.json'), 'w') as x: - x.write(cmt) - except: - pass + #if not info_only: + #download_urls(urls, title, ext, size, output_dir = output_dir, merge = merge) + #title = get_filename(title) + #try: + #print('Downloading %s ...\n' % (title + '.cmt.json')) + #cmt = get_srt_json(vid) + #with open(os.path.join(output_dir, title + '.cmt.json'), 'w') as x: + #x.write(cmt) + #except: + #pass def acfun_download(url, output_dir = '.', merge = True, info_only = False ,**kwargs): assert re.match(r'http://[^\.]+.acfun.[^\.]+/\D/\D\D(\d+)', url) diff --git a/src/you_get/extractors/letv.py b/src/you_get/extractors/letv.py index 08b68662..eaf92fbb 100644 --- a/src/you_get/extractors/letv.py +++ b/src/you_get/extractors/letv.py @@ -97,16 +97,20 @@ def letv_download_by_vid(vid,title, output_dir='.', merge=True, info_only=False, if not info_only: download_urls(urls, title, ext, size, output_dir=output_dir, merge=merge) -def letvcloud_download_by_vu(vu, title=None, output_dir='.', merge=True, info_only=False): - str2Hash = 'cfflashformatjsonran0.7214574650861323uu2d8c027396ver2.1vu' + vu + 'bie^#@(%27eib58' +def letvcloud_download_by_vu(vu, uu, title=None, output_dir='.', merge=True, info_only=False): + #ran = float('0.' + str(random.randint(0, 9999999999999999))) # For ver 2.1 + #str2Hash = 'cfflashformatjsonran{ran}uu{uu}ver2.2vu{vu}bie^#@(%27eib58'.format(vu = vu, uu = uu, ran = ran) #Magic!/ In ver 2.1 + argumet_dict ={'cf' : 'flash', 'format': 'json', 'ran': str(int(time.time())), 'uu': str(uu),'ver': '2.2', 'vu': str(vu), } + sign_key = '2f9d6924b33a165a6d8b5d3d42f4f987' #ALL YOUR BASE ARE BELONG TO US + str2Hash = ''.join([i + argumet_dict[i] for i in sorted(argumet_dict)]) + sign_key sign = hashlib.md5(str2Hash.encode('utf-8')).hexdigest() - request_info = urllib.request.Request('http://api.letvcloud.com/gpc.php?&sign='+sign+'&cf=flash&vu='+vu+'&ver=2.1&ran=0.7214574650861323&qr=2&format=json&uu=2d8c027396') + request_info = urllib.request.Request('http://api.letvcloud.com/gpc.php?' + '&'.join([i + '=' + argumet_dict[i] for i in argumet_dict]) + '&sign={sign}'.format(sign = sign)) response = urllib.request.urlopen(request_info) data = response.read() info = json.loads(data.decode('utf-8')) type_available = [] - for i in info['data']['video_info']['media']: - type_available.append({'video_url': info['data']['video_info']['media'][i]['play_url']['main_url'], 'video_quality': int(info['data']['video_info']['media'][i]['play_url']['vtype'])}) + for video_type in info['data']['video_info']['media']: + type_available.append({'video_url': info['data']['video_info']['media'][video_type]['play_url']['main_url'], 'video_quality': int(info['data']['video_info']['media'][video_type]['play_url']['vtype'])}) urls = [base64.b64decode(sorted(type_available, key = lambda x:x['video_quality'])[-1]['video_url']).decode("utf-8")] size = urls_size(urls) ext = 'mp4' @@ -118,10 +122,14 @@ def letvcloud_download(url, output_dir='.', merge=True, info_only=False): for i in url.split('&'): if 'vu=' in i: vu = i[3:] + if 'uu=' in i: + uu = i[3:] if len(vu) == 0: raise ValueError('Cannot get vu!') + if len(uu) == 0: + raise ValueError('Cannot get uu!') title = "LETV-%s" % vu - letvcloud_download_by_vu(vu, title=title, output_dir=output_dir, merge=merge, info_only=info_only) + letvcloud_download_by_vu(vu, uu, title=title, output_dir=output_dir, merge=merge, info_only=info_only) def letv_download(url, output_dir='.', merge=True, info_only=False ,**kwargs): if re.match(r'http://yuntv.letv.com/', url): From 16e75a06b2b84fb9eef5b83fd72fdd9f0c3a04f0 Mon Sep 17 00:00:00 2001 From: Gilles Filippini Date: Mon, 1 Jun 2015 18:59:55 +0200 Subject: [PATCH 41/57] Fix case when hMusic data is None This occurs for example with these urls: http://music.163.com/song?id=17699228 http://music.163.com/song?id=17699232 --- src/you_get/extractors/netease.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/netease.py b/src/you_get/extractors/netease.py index a0a3824d..7ef90899 100644 --- a/src/you_get/extractors/netease.py +++ b/src/you_get/extractors/netease.py @@ -48,7 +48,7 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals def netease_song_download(song, output_dir='.', info_only=False): title = "%s. %s" % (song['position'], song['name']) - if 'hMusic' in song: + if 'hMusic' in song and song['hMusic'] != None: url_best = make_url(song['hMusic']['dfsId']) elif 'mp3Url' in song: url_best = song['mp3Url'] From 4d0bb3d4fe41a24aad1dd3ddb9174c7d4aa648c1 Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Fri, 5 Jun 2015 12:19:38 +0800 Subject: [PATCH 42/57] sohu new api --- src/you_get/extractors/sohu.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py index fdc52f17..c273f78f 100644 --- a/src/you_get/extractors/sohu.py +++ b/src/you_get/extractors/sohu.py @@ -6,11 +6,15 @@ from ..common import * import json import time +from random import random +from url.parse import urlparse +#http://115.25.217.132/?prot=9&prod=flash&pt=1& +#file=/v/Sample1/BackUp_Sample1/svc/20150604/1663504_2406534_v_H_231452_18500/1663504_2406534_v_H_231452_18500_001.mp4 +#&new=/248/222/JwoalHHmSNWLsCVDEPqgTD.mp4 +#&key=3q6dEeDbCZwpf-kydU-7TH0YDP5UxFdU&vid=2406534&tvid=1663504&uid=13796019242829873083&sz=1583_434&md=WG4FExsQg2SW3C8BylUDISibt+AaBtYlyoHEkA==179&t=0.928698823787272 - - -def real_url(vid,new): - url = 'http://data.vod.itc.cn/cdnList?new='+new+'&vid='+str(vid)+'&uid='+str(int(time.time()*1000)) +def real_url(host,vid,tvid,new,clipURL,ck): + url = 'http://'+host+'/?prot=9&prod=flash&pt=1&file='+clipURL+'&new='+new +'&key='+ ck+'&vid='+str(vid)+'&uid='+str(int(time.time()*1000))+'&t='+str(random()) return json.loads(get_html(url))['url'] def sohu_download(url, output_dir = '.', merge = True, info_only = False, extractor_proxy=None): @@ -37,11 +41,13 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac urls = [] data = data['data'] title = data['tvName'] + tvid = data['tvid'] size = sum(data['clipsBytes']) assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) - for new in data['su']: - urls.append(real_url(hqvid, new)) - assert data['clipsURL'][0].endswith('.mp4') + for new,clip,ck, in zip(data['su'],data['clipsURL']): + clipURL = urlparse(clip).path + urls.append(real_url(host,hqvid,tvid,new,clipURL,ck)) + # assert data['clipsURL'][0].endswith('.mp4') else: data = json.loads(get_decoded_html('http://my.tv.sohu.com/play/videonew.do?vid=%s&referer=http://my.tv.sohu.com' % vid)) @@ -50,10 +56,12 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac urls = [] data = data['data'] title = data['tvName'] - size = sum([int(clipsBytes) for clipsBytes in data['clipsBytes']]) + tvid = data['tvid'] + size = sum(data['clipsBytes']) assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) - for new in data['su']: - urls.append(real_url(vid, new)) + for new,clip,ck, in zip(data['su'],data['clipsURL']): + clipURL = urlparse(clip).path + urls.append(real_url(host,hqvid,tvid,new,clipURL,ck)) print_info(site_info, title, 'mp4', size) if not info_only: From e26e149fb84ade180beb9accf0d4093e0c58ef93 Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Fri, 5 Jun 2015 12:24:20 +0800 Subject: [PATCH 43/57] oops --- src/you_get/extractors/sohu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py index c273f78f..79770d28 100644 --- a/src/you_get/extractors/sohu.py +++ b/src/you_get/extractors/sohu.py @@ -7,7 +7,7 @@ from ..common import * import json import time from random import random -from url.parse import urlparse +from urllib.parse import urlparse #http://115.25.217.132/?prot=9&prod=flash&pt=1& #file=/v/Sample1/BackUp_Sample1/svc/20150604/1663504_2406534_v_H_231452_18500/1663504_2406534_v_H_231452_18500_001.mp4 #&new=/248/222/JwoalHHmSNWLsCVDEPqgTD.mp4 From 2384e49fb25690e516f837c4fb770bfdbccf0199 Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Fri, 5 Jun 2015 12:28:08 +0800 Subject: [PATCH 44/57] obfused by arg name --- src/you_get/extractors/sohu.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py index 79770d28..cbbc4b62 100644 --- a/src/you_get/extractors/sohu.py +++ b/src/you_get/extractors/sohu.py @@ -28,20 +28,20 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac if re.match(r'http://tv.sohu.com/', url): if extractor_proxy: set_proxy(tuple(extractor_proxy.split(":"))) - data = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid)) + info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid)) for qtyp in ["oriVid","superVid","highVid" ,"norVid","relativeId"]: - hqvid = data['data'][qtyp] + hqvid = info['data'][qtyp] if hqvid != 0 and hqvid != vid : - data = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % hqvid)) + info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % hqvid)) break if extractor_proxy: unset_proxy() - host = data['allot'] - prot = data['prot'] + host = info['allot'] + prot = info['prot'] + tvid = info['tvid'] urls = [] - data = data['data'] + data = info['data'] title = data['tvName'] - tvid = data['tvid'] size = sum(data['clipsBytes']) assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) for new,clip,ck, in zip(data['su'],data['clipsURL']): @@ -50,11 +50,11 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac # assert data['clipsURL'][0].endswith('.mp4') else: - data = json.loads(get_decoded_html('http://my.tv.sohu.com/play/videonew.do?vid=%s&referer=http://my.tv.sohu.com' % vid)) - host = data['allot'] - prot = data['prot'] + info = json.loads(get_decoded_html('http://my.tv.sohu.com/play/videonew.do?vid=%s&referer=http://my.tv.sohu.com' % vid)) + host = info['allot'] + prot = info['prot'] urls = [] - data = data['data'] + data = info['data'] title = data['tvName'] tvid = data['tvid'] size = sum(data['clipsBytes']) From f629a20eb829405d8702768f2a033a1e7205c3a3 Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Fri, 5 Jun 2015 12:29:17 +0800 Subject: [PATCH 45/57] lack a arg --- src/you_get/extractors/sohu.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py index cbbc4b62..4d8db749 100644 --- a/src/you_get/extractors/sohu.py +++ b/src/you_get/extractors/sohu.py @@ -44,7 +44,7 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac title = data['tvName'] size = sum(data['clipsBytes']) assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) - for new,clip,ck, in zip(data['su'],data['clipsURL']): + for new,clip,ck, in zip(data['su'], data['clipsURL'], data['ck']): clipURL = urlparse(clip).path urls.append(real_url(host,hqvid,tvid,new,clipURL,ck)) # assert data['clipsURL'][0].endswith('.mp4') @@ -59,7 +59,7 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac tvid = data['tvid'] size = sum(data['clipsBytes']) assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) - for new,clip,ck, in zip(data['su'],data['clipsURL']): + for new,clip,ck, in zip(data['su'], data['clipsURL'], data['ck']): clipURL = urlparse(clip).path urls.append(real_url(host,hqvid,tvid,new,clipURL,ck)) From e7414d6f3808df0982ea490fd4a590dcf4a18f02 Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Fri, 5 Jun 2015 12:33:10 +0800 Subject: [PATCH 46/57] some mistake in my.tv.sohu.com --- src/you_get/extractors/sohu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py index 4d8db749..0bfea54d 100644 --- a/src/you_get/extractors/sohu.py +++ b/src/you_get/extractors/sohu.py @@ -53,10 +53,10 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac info = json.loads(get_decoded_html('http://my.tv.sohu.com/play/videonew.do?vid=%s&referer=http://my.tv.sohu.com' % vid)) host = info['allot'] prot = info['prot'] + tvid = info['tvid'] urls = [] data = info['data'] title = data['tvName'] - tvid = data['tvid'] size = sum(data['clipsBytes']) assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) for new,clip,ck, in zip(data['su'], data['clipsURL'], data['ck']): From 62e0ec0219c5e12054f8b9edf6b796380ed0cb0d Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Fri, 5 Jun 2015 12:37:58 +0800 Subject: [PATCH 47/57] some mistake2 in my.tv.sohu.com --- src/you_get/extractors/sohu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py index 0bfea54d..fbf04f6c 100644 --- a/src/you_get/extractors/sohu.py +++ b/src/you_get/extractors/sohu.py @@ -57,7 +57,7 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac urls = [] data = info['data'] title = data['tvName'] - size = sum(data['clipsBytes']) + size = sum(map(int,data['clipsBytes'])) assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) for new,clip,ck, in zip(data['su'], data['clipsURL'], data['ck']): clipURL = urlparse(clip).path From 3b97487505e9b26ba27a2afee446663ffb8059b8 Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Fri, 5 Jun 2015 12:38:56 +0800 Subject: [PATCH 48/57] some mistake3 in my.tv.sohu.com --- src/you_get/extractors/sohu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py index fbf04f6c..96448e71 100644 --- a/src/you_get/extractors/sohu.py +++ b/src/you_get/extractors/sohu.py @@ -61,7 +61,7 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) for new,clip,ck, in zip(data['su'], data['clipsURL'], data['ck']): clipURL = urlparse(clip).path - urls.append(real_url(host,hqvid,tvid,new,clipURL,ck)) + urls.append(real_url(host,vid,tvid,new,clipURL,ck)) print_info(site_info, title, 'mp4', size) if not info_only: From 412bfaf517663b326f44620eac0c31dce3f67c12 Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Fri, 5 Jun 2015 14:21:30 +0800 Subject: [PATCH 49/57] add some coments --- src/you_get/extractors/sohu.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py index 96448e71..68e9b8ad 100644 --- a/src/you_get/extractors/sohu.py +++ b/src/you_get/extractors/sohu.py @@ -8,10 +8,12 @@ import json import time from random import random from urllib.parse import urlparse -#http://115.25.217.132/?prot=9&prod=flash&pt=1& -#file=/v/Sample1/BackUp_Sample1/svc/20150604/1663504_2406534_v_H_231452_18500/1663504_2406534_v_H_231452_18500_001.mp4 -#&new=/248/222/JwoalHHmSNWLsCVDEPqgTD.mp4 -#&key=3q6dEeDbCZwpf-kydU-7TH0YDP5UxFdU&vid=2406534&tvid=1663504&uid=13796019242829873083&sz=1583_434&md=WG4FExsQg2SW3C8BylUDISibt+AaBtYlyoHEkA==179&t=0.928698823787272 + +''' +Changelog: + 1. http://tv.sohu.com/upload/swf/20150604/Main.swf + new api +''' def real_url(host,vid,tvid,new,clipURL,ck): url = 'http://'+host+'/?prot=9&prod=flash&pt=1&file='+clipURL+'&new='+new +'&key='+ ck+'&vid='+str(vid)+'&uid='+str(int(time.time()*1000))+'&t='+str(random()) From 1e41302690ee83e848ded207a7c876878aafc351 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 10 Jun 2015 11:57:27 +0800 Subject: [PATCH 50/57] version 0.3.33 --- CHANGELOG.rst | 7 +++++++ src/you_get/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index df78e14c..4dc1f1f5 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,13 @@ Changelog ========= +0.3.33 +------ + +*Date: 2015-06-10* + +* Many bug fixes by our awesome contributors + 0.3.32 ------ diff --git a/src/you_get/version.py b/src/you_get/version.py index 93aba3e8..f3e3d6a2 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.3.32' +__version__ = '0.3.33' From 6874ab35d2b18a1090ec120a8a5b8af4b393e931 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 10 Jun 2015 23:56:16 +0800 Subject: [PATCH 51/57] [cntv] fix #512 --- src/you_get/extractors/cntv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/cntv.py b/src/you_get/extractors/cntv.py index 35945ffb..7abd3d41 100644 --- a/src/you_get/extractors/cntv.py +++ b/src/you_get/extractors/cntv.py @@ -28,7 +28,7 @@ def cntv_download_by_id(id, title = None, output_dir = '.', merge = True, info_o download_urls(urls, title, ext, size, output_dir = output_dir, merge = merge) def cntv_download(url, output_dir = '.', merge = True, info_only = False): - if re.match(r'http://\w+\.cntv\.cn/(\w+/\w+/(classpage/video/)?)?\d+/\d+\.shtml', url): + if re.match(r'http://\w+\.cntv\.cn/(\w+/\w+/(classpage/video/)?)?\d+/\d+\.shtml', url) or re.match(r'http://\w+.cntv.cn/(\w+/)*VIDE\d+.shtml', url): id = r1(r'(\w+)', get_html(url)) elif re.match(r'http://xiyou.cntv.cn/v-[\w-]+\.html', url): id = r1(r'http://xiyou.cntv.cn/v-([\w-]+)\.html', url) From 8f7cad5ebabec92f9548a9ff86b03f870d8067be Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Sat, 13 Jun 2015 12:11:32 +0800 Subject: [PATCH 52/57] fix for iqiyi new encrypt method, for #542 --- src/you_get/extractors/iqiyi.py | 40 +++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py index d17f677b..565aa789 100644 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -12,6 +12,12 @@ import hashlib ''' Changelog: +-> http://www.iqiyi.com/common/flashplayer/20150612/MainPlayer_5_2_23_1_c3_2_6_5.swf + In this version do not directly use enc key + gen enc key (so called sc ) in DMEmagelzzup.mix(tvid) -> (tm->getTimer(),src='hsalf',sc) + encrypy alogrithm is md5(DMEmagelzzup.mix.genInnerKey +tm+tvid) + how to gen genInnerKey ,can see first 3 lin in mix function in this file + -> http://www.iqiyi.com/common/flashplayer/20150514/MainPlayer_5_2_21_c3_2_6_2.swf In this version ,it changes enc key to 'Qakh4T0A' consider to write a function to parse swf and extract this key automatically @@ -35,8 +41,18 @@ bid meaning for quality ''' - - +def mix(tvid): + enc = [] + arr = [ -0.625, -0.5546875, -0.59375, -0.625, -0.234375, -0.203125, -0.609375, -0.2421875, -0.234375, -0.2109375, -0.625, -0.2265625, -0.625, -0.234375, -0.6171875, -0.234375, -0.5546875, -0.5625, -0.625, -0.59375, -0.2421875, -0.234375, -0.203125, -0.234375, -0.21875, -0.6171875, -0.6015625, -0.6015625, -0.2109375, -0.5703125, -0.2109375, -0.203125 ] [::-1] + for i in arr: + enc.append(chr(int(i *(1<<7)+(1<<7)))) + #enc -> fe7e331dbfba4089b1b0c0eba2fb0490 + tm = str(randint(100,1000)) + src = 'hsalf' + enc.append(str(tm)) + enc.append(tvid) + sc = hashlib.new('md5',bytes("".join(enc),'utf-8')).hexdigest() + return tm,sc,src def getVRSXORCode(arg1,arg2): loc3=arg2 %3 @@ -58,16 +74,16 @@ def getVrsEncodeCode(vlink): loc2+=chr(loc6) return loc2[::-1] -def getVMS(tvid,vid,enc,uid): +def getVMS(tvid,vid,uid): #tm ->the flash run time for md5 usage #um -> vip 1 normal 0 #authkey -> for password protected video ,replace '' with your password #puid user.passportid may empty? #TODO: support password protected video - tm=randint(100,1000) + tm,sc,src = mix(tvid) vmsreq='http://cache.video.qiyi.com/vms?key=fvip&src=1702633101b340d8917a69cf8a4b8c7' +\ - "&tvId="+tvid+"&vid="+vid+"&vinfo=1&tm="+str(tm)+\ - "&enc="+hashlib.new('md5',bytes(enc+str(tm)+tvid,"utf-8")).hexdigest()+\ + "&tvId="+tvid+"&vid="+vid+"&vinfo=1&tm="+tm+\ + "&enc="+sc+\ "&qyid="+uid+"&tn="+str(random()) +"&um=0" +\ "&authkey="+hashlib.new('md5',bytes(''+str(tm)+tvid,'utf-8')).hexdigest() return json.loads(get_content(vmsreq)) @@ -78,29 +94,19 @@ def getDispathKey(rid): t=str(int(floor(int(time)/(10*60.0)))) return hashlib.new("md5",bytes(t+tp+rid,"utf-8")).hexdigest() -def getEncKey(swflink): - swfcontent = get_content(swflink,decoded=False) - swfraw = decompress(swfcontent[8:]) - key = r1(b'MixerRemote\x08(.+?)\$&vv',swfraw) - return key.decode('utf-8') - def iqiyi_download(url, output_dir = '.', merge = True, info_only = False): gen_uid=uuid4().hex html = get_html(url) - swflink = r1(r'(http://.+?MainPlayer.+?swf)',html) - enc_key = getEncKey(swflink) - tvid = r1(r'data-player-tvid="([^"]+)"', html) videoid = r1(r'data-player-videoid="([^"]+)"', html) - assert enc_key assert tvid assert videoid - info = getVMS(tvid, videoid, enc_key, gen_uid) + info = getVMS(tvid, videoid, gen_uid) assert info["code"] == "A000000" From 6886c5ace1a7132093dcb2f2b5f25fab14d23037 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 13 Jun 2015 22:52:11 +0800 Subject: [PATCH 53/57] [vine] fix support --- src/you_get/extractors/vine.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/you_get/extractors/vine.py b/src/you_get/extractors/vine.py index 6f2d50aa..11ac09b8 100644 --- a/src/you_get/extractors/vine.py +++ b/src/you_get/extractors/vine.py @@ -7,18 +7,16 @@ from ..common import * def vine_download(url, output_dir='.', merge=True, info_only=False): html = get_html(url) - vid = r1(r'vine.co/v/([^/]+)/', html) + vid = r1(r'vine.co/v/([^/]+)', url) title1 = r1(r'', html) + mime, ext, size = url_info(stream) - print_info(site_info, title, type, size) + print_info(site_info, title, mime, size) if not info_only: - download_urls([url], title, ext, size, output_dir, merge = merge) + download_urls([stream], title, ext, size, output_dir, merge=merge) site_info = "Vine.co" download = vine_download From b375576d5e5afdab222562afde720b3a17f9c0be Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 14 Jun 2015 00:12:28 +0800 Subject: [PATCH 54/57] [instagram] fix support --- src/you_get/extractors/instagram.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/you_get/extractors/instagram.py b/src/you_get/extractors/instagram.py index 0605a6c3..472804f9 100644 --- a/src/you_get/extractors/instagram.py +++ b/src/you_get/extractors/instagram.py @@ -7,15 +7,15 @@ from ..common import * def instagram_download(url, output_dir = '.', merge = True, info_only = False): html = get_html(url) - vid = r1(r'instagram.com/p/([^/]+)/', html) - description = r1(r' Date: Mon, 15 Jun 2015 00:04:57 +0800 Subject: [PATCH 55/57] [twitter] add support --- src/you_get/common.py | 3 ++- src/you_get/extractors/__init__.py | 1 + src/you_get/extractors/twitter.py | 27 +++++++++++++++++++++++++++ 3 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 src/you_get/extractors/twitter.py diff --git a/src/you_get/common.py b/src/you_get/common.py index 4ebb255a..1349a28e 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -900,7 +900,7 @@ def script_main(script_name, download, download_playlist = None): sys.exit(1) def url_to_module(url): - from .extractors import netease, w56, acfun, baidu, baomihua, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, lizhi, magisto, miomio, mixcloud, mtv81, nicovideo, pptv, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, vid48, videobam, vidto, vimeo, vine, vk, xiami, yinyuetai, youku, youtube, zhanqi + from .extractors import netease, w56, acfun, baidu, baomihua, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, lizhi, magisto, miomio, mixcloud, mtv81, nicovideo, pptv, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, twitter, vid48, videobam, vidto, vimeo, vine, vk, xiami, yinyuetai, youku, youtube, zhanqi video_host = r1(r'https?://([^/]+)/', url) video_url = r1(r'https?://[^/]+(.*)', url) @@ -963,6 +963,7 @@ def url_to_module(url): "tucao":tucao, 'tudou': tudou, 'tumblr': tumblr, + 'twitter': twitter, 'vid48': vid48, 'videobam': videobam, 'vidto': vidto, diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index a5894d2d..9dcfdb30 100644 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -42,6 +42,7 @@ from .theplatform import * from .tucao import * from .tudou import * from .tumblr import * +from .twitter import * from .vid48 import * from .videobam import * from .vimeo import * diff --git a/src/you_get/extractors/twitter.py b/src/you_get/extractors/twitter.py new file mode 100644 index 00000000..4c0546ae --- /dev/null +++ b/src/you_get/extractors/twitter.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python + +__all__ = ['twitter_download'] + +from ..common import * + +def twitter_download(url, output_dir='.', merge=True, info_only=False): + html = get_html(url) + screen_name = r1(r'data-screen-name="([^"]*)"', html) + item_id = r1(r'data-item-id="([^"]*)"', html) + title = "{} [{}]".format(screen_name, item_id) + icards = r1(r'data-src="([^"]*)"', html) + if icards: + html = get_html("https://twitter.com" + icards) + data = json.loads(unescape_html(r1(r'data-player-config="([^"]*)"', html))) + source = data['playlist'][0]['source'] + else: + source = r1(r' Date: Mon, 15 Jun 2015 00:05:44 +0800 Subject: [PATCH 56/57] [README] update --- README.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/README.md b/README.md index f89adeff..2e96c296 100644 --- a/README.md +++ b/README.md @@ -14,8 +14,6 @@ Fork me on GitHub: ### Supported Sites -First-class (better maintained): - * Dailymotion * Freesound * Google+ @@ -26,11 +24,10 @@ First-class (better maintained): * Niconico (ニコニコ動画) * Vimeo * Vine +* Twitter * Youku (优酷) * YouTube -Others: - * AcFun * Alive.in.th * Baidu Music (百度音乐) From 2afa854c66b4a7ab6224a287dcc4fc2ec0b48841 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 15 Jun 2015 00:18:43 +0800 Subject: [PATCH 57/57] [README] update --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 2e96c296..0e4fa5ad 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,6 @@ Fork me on GitHub: * Twitter * Youku (优酷) * YouTube - * AcFun * Alive.in.th * Baidu Music (百度音乐)