diff --git a/CHANGELOG.rst b/CHANGELOG.rst index df78e14c..4dc1f1f5 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,13 @@ Changelog ========= +0.3.33 +------ + +*Date: 2015-06-10* + +* Many bug fixes by our awesome contributors + 0.3.32 ------ diff --git a/README.md b/README.md index 54e94ec8..0e4fa5ad 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # You-Get -[![Build Status](https://api.travis-ci.org/soimort/you-get.png)](https://travis-ci.org/soimort/you-get) [![PyPI version](https://badge.fury.io/py/you-get.png)](http://badge.fury.io/py/you-get) +[![Build Status](https://api.travis-ci.org/soimort/you-get.png)](https://travis-ci.org/soimort/you-get) [![PyPI version](https://badge.fury.io/py/you-get.png)](http://badge.fury.io/py/you-get) [![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/soimort/you-get?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [You-Get](http://www.soimort.org/you-get) is a video downloader for [YouTube](http://www.youtube.com), [Youku](http://www.youku.com), [niconico](http://www.nicovideo.jp) and a few other sites. @@ -14,8 +14,6 @@ Fork me on GitHub: ### Supported Sites -First-class (better maintained): - * Dailymotion * Freesound * Google+ @@ -26,11 +24,9 @@ First-class (better maintained): * Niconico (ニコニコ動画) * Vimeo * Vine +* Twitter * Youku (优酷) * YouTube - -Others: - * AcFun * Alive.in.th * Baidu Music (百度音乐) @@ -56,6 +52,7 @@ Others: * Kugou (酷狗音乐) * Kuwo (酷我音乐) * LeTV (乐视网) +* Lizhi.fm (荔枝FM) * MioMio * MTV 81 * NetEase (网易视频) diff --git a/src/you_get/common.py b/src/you_get/common.py old mode 100644 new mode 100755 index 1a9fd524..1349a28e --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -36,7 +36,8 @@ def tr(s): if default_encoding == 'utf-8': return s else: - return str(s.encode('utf-8'))[2:-1] + return s + #return str(s.encode('utf-8'))[2:-1] # DEPRECATED in favor of match1() def r1(pattern, text): @@ -477,7 +478,7 @@ class DummyProgressBar: def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False): assert urls if dry_run: - print('Real URLs:\n%s\n' % urls) + print('Real URLs:\n%s' % '\n'.join(urls)) return if player: @@ -899,7 +900,7 @@ def script_main(script_name, download, download_playlist = None): sys.exit(1) def url_to_module(url): - from .extractors import netease, w56, acfun, baidu, baomihua, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, magisto, miomio, mixcloud, mtv81, nicovideo, pptv, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, vid48, videobam, vimeo, vine, vk, xiami, yinyuetai, youku, youtube, zhanqi + from .extractors import netease, w56, acfun, baidu, baomihua, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, lizhi, magisto, miomio, mixcloud, mtv81, nicovideo, pptv, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, twitter, vid48, videobam, vidto, vimeo, vine, vk, xiami, yinyuetai, youku, youtube, zhanqi video_host = r1(r'https?://([^/]+)/', url) video_url = r1(r'https?://[^/]+(.*)', url) @@ -944,6 +945,7 @@ def url_to_module(url): 'kugou': kugou, 'kuwo': kuwo, 'letv': letv, + 'lizhi':lizhi, 'magisto': magisto, 'miomio': miomio, 'mixcloud': mixcloud, @@ -961,8 +963,10 @@ def url_to_module(url): "tucao":tucao, 'tudou': tudou, 'tumblr': tumblr, + 'twitter': twitter, 'vid48': vid48, 'videobam': videobam, + 'vidto': vidto, 'vimeo': vimeo, 'vine': vine, 'vk': vk, diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index da19036b..9dcfdb30 100644 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -25,6 +25,7 @@ from .ku6 import * from .kugou import * from .kuwo import * from .letv import * +from .lizhi import * from .magisto import * from .miomio import * from .mixcloud import * @@ -41,6 +42,7 @@ from .theplatform import * from .tucao import * from .tudou import * from .tumblr import * +from .twitter import * from .vid48 import * from .videobam import * from .vimeo import * diff --git a/src/you_get/extractors/acfun.py b/src/you_get/extractors/acfun.py index e00c1c52..b06e23f4 100644 --- a/src/you_get/extractors/acfun.py +++ b/src/you_get/extractors/acfun.py @@ -35,7 +35,7 @@ def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only elif sourceType == 'qq': qq_download_by_id(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) elif sourceType == 'letv': - letvcloud_download_by_vu(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) + letvcloud_download_by_vu(sourceId, '2d8c027396', title, output_dir=output_dir, merge=merge, info_only=info_only) else: raise NotImplementedError(sourceType) @@ -53,8 +53,64 @@ def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only except: pass -def acfun_download(url, output_dir = '.', merge = True, info_only = False): - assert re.match(r'http://[^\.]+.acfun.[^\.]+/v/ac(\d+)', url) + + +# decompile from player swf +# protected static const VIDEO_PARSE_API:String = "http://jiexi.acfun.info/index.php?vid="; +# protected static var VIDEO_RATES_CODE:Array = ["C40","C30","C20","C10"]; +# public static var VIDEO_RATES_STRING:Array = ["原画","超清","高清","流畅"]; +# Sometimes may find C80 but size smaller than C30 + + +#def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only=False ,**kwargs): + ###api example http://jiexi.acfun.info/index.php?vid=1122870 + #info = json.loads(get_content("http://jiexi.acfun.info/index.php?vid={}".format(vid))) + #assert info["code"] == 200 + #assert info["success"] == True + + #support_types = sorted(info["result"].keys(),key= lambda i: int(i[1:])) + + #stream_id = None + #if "stream_id" in kwargs and kwargs["stream_id"] in support_types: + #stream_id = kwargs["stream_id"] + #else: + #print("Current Video Supports:") + #for i in support_types: + #if info["result"][i]["totalbytes"] != 0: + #print("\t--format",i,":",info["result"][i]["quality"],"size:","%.2f"% (info["result"][i]["totalbytes"] / 1024.0 /1024.0),"MB") + #else: + #print("\t--format",i,":",info["result"][i]["quality"]) + ##because C80 is not the best + #if "C80" not in support_types: + #stream_id = support_types[-1] + #else: + #stream_id = support_types[-2] + + #urls = [None] * len(info["result"][stream_id]["files"]) + #for i in info["result"][stream_id]["files"]: + #urls[i["no"]] = i["url"] + #ext = info["result"][stream_id]["files"][0]["type"] + #size = 0 + #for i in urls: + #_, _, tmp =url_info(i) + #size +=tmp + #print_info(site_info, title, ext, size) + #print("Format: ",stream_id) + #print() + + #if not info_only: + #download_urls(urls, title, ext, size, output_dir = output_dir, merge = merge) + #title = get_filename(title) + #try: + #print('Downloading %s ...\n' % (title + '.cmt.json')) + #cmt = get_srt_json(vid) + #with open(os.path.join(output_dir, title + '.cmt.json'), 'w') as x: + #x.write(cmt) + #except: + #pass + +def acfun_download(url, output_dir = '.', merge = True, info_only = False ,**kwargs): + assert re.match(r'http://[^\.]+.acfun.[^\.]+/\D/\D\D(\d+)', url) html = get_html(url) title = r1(r'

([^<>]+)<', html) @@ -67,7 +123,7 @@ def acfun_download(url, output_dir = '.', merge = True, info_only = False): for video in videos: p_vid = video[0] p_title = title + " - " + video[1] - acfun_download_by_vid(p_vid, p_title, output_dir=output_dir, merge=merge, info_only=info_only) + acfun_download_by_vid(p_vid, p_title, output_dir=output_dir, merge=merge, info_only=info_only ,**kwargs) else: # Useless - to be removed? id = r1(r"src=\"/newflvplayer/player.*id=(\d+)", html) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 1869f955..b4ea8035 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -19,7 +19,8 @@ client = { 'Accept-Charset': 'UTF-8,*;q=0.5', 'Accept-Encoding': 'gzip,deflate,sdch', 'Accept-Language': 'en-US,en;q=0.8', - 'User-Agent': 'Biligrab /0.8 (cnbeining@gmail.com)' + #'User-Agent': 'Biligrab /0.8 (cnbeining@gmail.com)' + 'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.94 Safari/537.36" } def get_srt_xml(id): @@ -78,23 +79,15 @@ def bilibili_download_by_cids(cids, title, output_dir='.', merge=True, info_only else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i) for i in parse_cid_playurl(get_content(url, headers=client))] - if re.search(r'\.(flv|hlv)\b', urls[0]): - type = 'flv' - elif re.search(r'/flv/', urls[0]): - type = 'flv' - elif re.search(r'/mp4/', urls[0]): - type = 'mp4' - else: - type = 'flv' - + type_ = '' size = 0 for url in urls: - _, _, temp = url_info(url) + _, type_, temp = url_info(url) size += temp - print_info(site_info, title, type, size) + print_info(site_info, title, type_, size) if not info_only: - download_urls(urls, title, type, total_size=None, output_dir=output_dir, merge=merge) + download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge) def bilibili_download_by_cid(id, title, output_dir='.', merge=True, info_only=False): sign_this = hashlib.md5(bytes('appkey=' + appkey + '&cid=' + id + secretkey, 'utf-8')).hexdigest() @@ -104,28 +97,20 @@ def bilibili_download_by_cid(id, title, output_dir='.', merge=True, info_only=Fa else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i) for i in parse_cid_playurl(get_content(url, headers=client))] - if re.search(r'\.(flv|hlv)\b', urls[0]): - type = 'flv' - elif re.search(r'/flv/', urls[0]): - type = 'flv' - elif re.search(r'/mp4/', urls[0]): - type = 'mp4' - else: - type = 'flv' - + type_ = '' size = 0 for url in urls: - _, _, temp = url_info(url) + _, type_, temp = url_info(url) size += temp or 0 - print_info(site_info, title, type, size) + print_info(site_info, title, type_, size) if not info_only: - download_urls(urls, title, type, total_size=None, output_dir=output_dir, merge=merge) + download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge) def bilibili_download(url, output_dir='.', merge=True, info_only=False): html = get_html(url) - title = r1_of([r'',r']*>([^<>]+)

'], html) + title = r1_of([r'',r']*>([^<>]+)'], html) title = unescape_html(title) title = escape_file_path(title) @@ -150,7 +135,7 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False): bilibili_download_by_cids(cids, title, output_dir=output_dir, merge=merge, info_only=info_only) elif t == 'vid': - sina_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only) + sina_download_by_vid(id, title, output_dir = output_dir, merge = merge, info_only = info_only) elif t == 'ykid': youku_download_by_vid(id, title=title, output_dir = output_dir, merge = merge, info_only = info_only) elif t == 'uid': diff --git a/src/you_get/extractors/cntv.py b/src/you_get/extractors/cntv.py index 35945ffb..7abd3d41 100644 --- a/src/you_get/extractors/cntv.py +++ b/src/you_get/extractors/cntv.py @@ -28,7 +28,7 @@ def cntv_download_by_id(id, title = None, output_dir = '.', merge = True, info_o download_urls(urls, title, ext, size, output_dir = output_dir, merge = merge) def cntv_download(url, output_dir = '.', merge = True, info_only = False): - if re.match(r'http://\w+\.cntv\.cn/(\w+/\w+/(classpage/video/)?)?\d+/\d+\.shtml', url): + if re.match(r'http://\w+\.cntv\.cn/(\w+/\w+/(classpage/video/)?)?\d+/\d+\.shtml', url) or re.match(r'http://\w+.cntv.cn/(\w+/)*VIDE\d+.shtml', url): id = r1(r'(\w+)', get_html(url)) elif re.match(r'http://xiyou.cntv.cn/v-[\w-]+\.html', url): id = r1(r'http://xiyou.cntv.cn/v-([\w-]+)\.html', url) diff --git a/src/you_get/extractors/facebook.py b/src/you_get/extractors/facebook.py index edbbb671..c0610a17 100644 --- a/src/you_get/extractors/facebook.py +++ b/src/you_get/extractors/facebook.py @@ -3,22 +3,26 @@ __all__ = ['facebook_download'] from ..common import * +import json -def facebook_download(url, output_dir = '.', merge = True, info_only = False): + +def facebook_download(url, output_dir='.', merge=True, info_only=False): html = get_html(url) - + title = r1(r'(.+) \| Facebook', html) - + s2 = parse.unquote(unicodize(r1(r'\["params","([^"]*)"\]', html))) + data = json.loads(s2) + video_data = data["video_data"][0] for fmt in ["hd_src", "sd_src"]: - src= re.sub(r'\\/', r'/', r1(r'"' + fmt + '":"([^"]*)"', parse.unquote(unicodize(r1(r'\["params","([^"]*)"\]', html))))) + src = video_data[fmt] if src: break - - type, ext, size = url_info(src) - + + type, ext, size = url_info(src, True) + print_info(site_info, title, type, size) if not info_only: - download_urls([src], title, ext, size, output_dir, merge = merge) + download_urls([src], title, ext, size, output_dir, merge=merge) site_info = "Facebook.com" download = facebook_download diff --git a/src/you_get/extractors/instagram.py b/src/you_get/extractors/instagram.py index 0605a6c3..472804f9 100644 --- a/src/you_get/extractors/instagram.py +++ b/src/you_get/extractors/instagram.py @@ -7,15 +7,15 @@ from ..common import * def instagram_download(url, output_dir = '.', merge = True, info_only = False): html = get_html(url) - vid = r1(r'instagram.com/p/([^/]+)/', html) - description = r1(r' http://www.iqiyi.com/common/flashplayer/20150612/MainPlayer_5_2_23_1_c3_2_6_5.swf + In this version do not directly use enc key + gen enc key (so called sc ) in DMEmagelzzup.mix(tvid) -> (tm->getTimer(),src='hsalf',sc) + encrypy alogrithm is md5(DMEmagelzzup.mix.genInnerKey +tm+tvid) + how to gen genInnerKey ,can see first 3 lin in mix function in this file + +-> http://www.iqiyi.com/common/flashplayer/20150514/MainPlayer_5_2_21_c3_2_6_2.swf + In this version ,it changes enc key to 'Qakh4T0A' + consider to write a function to parse swf and extract this key automatically + +-> http://www.iqiyi.com/common/flashplayer/20150506/MainPlayer_5_2_21_c3_2_6_1.swf + In this version iqiyi player, it changes enc key from 'ts56gh' to 'aw6UWGtp' + +''' + ''' com.qiyi.player.core.model.def.DefinitonEnum bid meaning for quality @@ -23,6 +41,18 @@ bid meaning for quality ''' +def mix(tvid): + enc = [] + arr = [ -0.625, -0.5546875, -0.59375, -0.625, -0.234375, -0.203125, -0.609375, -0.2421875, -0.234375, -0.2109375, -0.625, -0.2265625, -0.625, -0.234375, -0.6171875, -0.234375, -0.5546875, -0.5625, -0.625, -0.59375, -0.2421875, -0.234375, -0.203125, -0.234375, -0.21875, -0.6171875, -0.6015625, -0.6015625, -0.2109375, -0.5703125, -0.2109375, -0.203125 ] [::-1] + for i in arr: + enc.append(chr(int(i *(1<<7)+(1<<7)))) + #enc -> fe7e331dbfba4089b1b0c0eba2fb0490 + tm = str(randint(100,1000)) + src = 'hsalf' + enc.append(str(tm)) + enc.append(tvid) + sc = hashlib.new('md5',bytes("".join(enc),'utf-8')).hexdigest() + return tm,sc,src def getVRSXORCode(arg1,arg2): loc3=arg2 %3 @@ -45,8 +75,17 @@ def getVrsEncodeCode(vlink): return loc2[::-1] def getVMS(tvid,vid,uid): - tm=randint(1000,2000) - vmsreq='http://cache.video.qiyi.com/vms?key=fvip&src=p'+"&tvId="+tvid+"&vid="+vid+"&vinfo=1&tm="+str(tm)+"&enc="+hashlib.new('md5',bytes('ts56gh'+str(tm)+tvid,"utf-8")).hexdigest()+"&qyid="+uid+"&tn="+str(random()) + #tm ->the flash run time for md5 usage + #um -> vip 1 normal 0 + #authkey -> for password protected video ,replace '' with your password + #puid user.passportid may empty? + #TODO: support password protected video + tm,sc,src = mix(tvid) + vmsreq='http://cache.video.qiyi.com/vms?key=fvip&src=1702633101b340d8917a69cf8a4b8c7' +\ + "&tvId="+tvid+"&vid="+vid+"&vinfo=1&tm="+tm+\ + "&enc="+sc+\ + "&qyid="+uid+"&tn="+str(random()) +"&um=0" +\ + "&authkey="+hashlib.new('md5',bytes(''+str(tm)+tvid,'utf-8')).hexdigest() return json.loads(get_content(vmsreq)) def getDispathKey(rid): @@ -60,16 +99,24 @@ def iqiyi_download(url, output_dir = '.', merge = True, info_only = False): gen_uid=uuid4().hex html = get_html(url) - + tvid = r1(r'data-player-tvid="([^"]+)"', html) videoid = r1(r'data-player-videoid="([^"]+)"', html) + assert tvid assert videoid - info = getVMS(tvid,videoid,gen_uid) + info = getVMS(tvid, videoid, gen_uid) + + assert info["code"] == "A000000" title = info["data"]["vi"]["vn"] + # data.vp = json.data.vp + # data.vi = json.data.vi + # data.f4v = json.data.f4v + # if movieIsMember data.vp = json.data.np + #for highest qualities #for http://www.iqiyi.com/v_19rrmmz5yw.html not vp -> np try: @@ -79,30 +126,31 @@ def iqiyi_download(url, output_dir = '.', merge = True, info_only = False): log.e("[Error] Do not support for iQIYI VIP video.") exit(-1) - # assert info["data"]['vp']["tkl"]!='' bid=0 for i in info["data"]["vp"]["tkl"][0]["vs"]: if int(i["bid"])<=10 and int(i["bid"])>=bid: bid=int(i["bid"]) - video_links=i["fs"] - #todo support choose quality with cmdline + + video_links=i["fs"] #now in i["flvs"] not in i["fs"] + if not i["fs"][0]["l"].startswith("/"): + tmp = getVrsEncodeCode(i["fs"][0]["l"]) + if tmp.endswith('mp4'): + video_links = i["flvs"] + urls=[] size=0 for i in video_links: vlink=i["l"] - # print(vlink) if not vlink.startswith("/"): #vlink is encode vlink=getVrsEncodeCode(vlink) - assert vlink.endswith(".f4v") - size+=i["b"] key=getDispathKey(vlink.split("/")[-1].split(".")[0]) + size+=i["b"] baseurl=info["data"]["vp"]["du"].split("/") baseurl.insert(-1,key) - url="/".join(baseurl)+vlink+'?su='+gen_uid+'&client=&z=&bt=&ct=&tn='+str(randint(10000,20000)) + url="/".join(baseurl)+vlink+'?su='+gen_uid+'&qyid='+uuid4().hex+'&client=&z=&bt=&ct=&tn='+str(randint(10000,20000)) urls.append(json.loads(get_content(url))["l"]) - #download should be complete in 10 minutes #because the url is generated before start downloading #and the key may be expired after 10 minutes diff --git a/src/you_get/extractors/letv.py b/src/you_get/extractors/letv.py index 2ce16a84..eaf92fbb 100644 --- a/src/you_get/extractors/letv.py +++ b/src/you_get/extractors/letv.py @@ -5,16 +5,17 @@ __all__ = ['letv_download', 'letvcloud_download', 'letvcloud_download_by_vu'] import json import random import xml.etree.ElementTree as ET -import base64, hashlib, urllib +import base64, hashlib, urllib, time, re from ..common import * +#@DEPRECATED def get_timestamp(): tn = random.random() url = 'http://api.letv.com/time?tn={}'.format(tn) result = get_content(url) return json.loads(result)['stime'] - +#@DEPRECATED def get_key(t): for s in range(0, 8): e = 1 & t @@ -23,57 +24,93 @@ def get_key(t): t += e return t ^ 185025305 -def video_info(vid): - tn = get_timestamp() - key = get_key(tn) -#old api reserve for future use or for example - # url = 'http://api.letv.com/mms/out/video/play?id={}&platid=1&splatid=101&format=1&tkey={}&domain=www.letv.com'.format(vid, key) - # print(url) - # r = get_content(url, decoded=False) - # print(r) - # xml_obj = ET.fromstring(r) - # info = json.loads(xml_obj.find("playurl").text) - # title = info.get('title') - # urls = info.get('dispatch') - # for k in urls.keys(): - # url = urls[k][0] - # break - # url += '&termid=1&format=0&hwtype=un&ostype=Windows7&tag=letv&sign=letv&expect=1&pay=0&rateid={}'.format(k) - # return url, title +def calcTimeKey(t): + ror = lambda val, r_bits, : ((val & (2**32-1)) >> r_bits%32) | (val << (32-(r_bits%32)) & (2**32-1)) + return ror(ror(t,773625421%13)^773625421,773625421%17) - url="http://api.letv.com/mms/out/common/geturl?platid=3&splatid=301&playid=0&vtype=9,13,21,28&version=2.0&tss=no&vid={}&domain=www.letv.com&tkey={}".format(vid,key) + +def decode(data): + version = data[0:5] + if version.lower() == b'vc_01': + #get real m3u8 + loc2 = data[5:] + length = len(loc2) + loc4 = [0]*(2*length) + for i in range(length): + loc4[2*i] = loc2[i] >> 4 + loc4[2*i+1]= loc2[i] & 15; + loc6 = loc4[len(loc4)-11:]+loc4[:len(loc4)-11] + loc7 = [0]*length + for i in range(length): + loc7[i] = (loc6[2 * i] << 4) +loc6[2*i+1] + return ''.join([chr(i) for i in loc7]) + else: + # directly return + return data + + + + +def video_info(vid,**kwargs): + url = 'http://api.letv.com/mms/out/video/playJson?id={}&platid=1&splatid=101&format=1&tkey={}&domain=www.letv.com'.format(vid,calcTimeKey(int(time.time()))) r = get_content(url, decoded=False) info=json.loads(str(r,"utf-8")) - size=0 - for i in info["data"][0]["infos"]: #0 means only one file not truncated.need to upgrade - if int(i["gsize"])>size: - size=int(i["gsize"]) - url=i["mainUrl"] - url+="&ctv=pc&m3v=1&termid=1&format=1&hwtype=un&ostype=Linux&tag=letv&sign=letv&expect=3&tn={}&pay=0&iscpn=f9051&rateid=1300".format(random.random()) - # url += '&termid=1&format=0&hwtype=un&ostype=Windows7&tag=letv&sign=letv&expect=1&pay=0&rateid=1000' #{}'.format(k) + + stream_id = None + support_stream_id = info["playurl"]["dispatch"].keys() + if "stream_id" in kwargs and kwargs["stream_id"].lower() in support_stream_id: + stream_id = kwargs["stream_id"] + else: + print("Current Video Supports:") + for i in support_stream_id: + print("\t--format",i,"") + if "1080p" in support_stream_id: + stream_id = '1080p' + elif "720p" in support_stream_id: + stream_id = '720p' + else: + stream_id =sorted(support_stream_id,key= lambda i: int(i[1:]))[-1] + + url =info["playurl"]["domain"][0]+info["playurl"]["dispatch"][stream_id][0] + ext = info["playurl"]["dispatch"][stream_id][1].split('.')[-1] + url+="&ctv=pc&m3v=1&termid=1&format=1&hwtype=un&ostype=Linux&tag=letv&sign=letv&expect=3&tn={}&pay=0&iscpn=f9051&rateid={}".format(random.random(),stream_id) + r2=get_content(url,decoded=False) info2=json.loads(str(r2,"utf-8")) - return info2["location"] -def letv_download_by_vid(vid,title, output_dir='.', merge=True, info_only=False): - url= video_info(vid) - _, _, size = url_info(url) - ext = 'flv' + # hold on ! more things to do + # to decode m3u8 (encoded) + m3u8 = get_content(info2["location"],decoded=False) + m3u8_list = decode(m3u8) + urls = re.findall(r'^[^#][^\r]*',m3u8_list,re.MULTILINE) + return ext,urls + +def letv_download_by_vid(vid,title, output_dir='.', merge=True, info_only=False,**kwargs): + ext , urls = video_info(vid,**kwargs) + size = 0 + for i in urls: + _, _, tmp = url_info(i) + size += tmp + print_info(site_info, title, ext, size) if not info_only: - download_urls([url], title, ext, size, output_dir=output_dir, merge=merge) + download_urls(urls, title, ext, size, output_dir=output_dir, merge=merge) -def letvcloud_download_by_vu(vu, title=None, output_dir='.', merge=True, info_only=False): - str2Hash = 'cfflashformatjsonran0.7214574650861323uu2d8c027396ver2.1vu' + vu + 'bie^#@(%27eib58' +def letvcloud_download_by_vu(vu, uu, title=None, output_dir='.', merge=True, info_only=False): + #ran = float('0.' + str(random.randint(0, 9999999999999999))) # For ver 2.1 + #str2Hash = 'cfflashformatjsonran{ran}uu{uu}ver2.2vu{vu}bie^#@(%27eib58'.format(vu = vu, uu = uu, ran = ran) #Magic!/ In ver 2.1 + argumet_dict ={'cf' : 'flash', 'format': 'json', 'ran': str(int(time.time())), 'uu': str(uu),'ver': '2.2', 'vu': str(vu), } + sign_key = '2f9d6924b33a165a6d8b5d3d42f4f987' #ALL YOUR BASE ARE BELONG TO US + str2Hash = ''.join([i + argumet_dict[i] for i in sorted(argumet_dict)]) + sign_key sign = hashlib.md5(str2Hash.encode('utf-8')).hexdigest() - request_info = urllib.request.Request('http://api.letvcloud.com/gpc.php?&sign='+sign+'&cf=flash&vu='+vu+'&ver=2.1&ran=0.7214574650861323&qr=2&format=json&uu=2d8c027396') + request_info = urllib.request.Request('http://api.letvcloud.com/gpc.php?' + '&'.join([i + '=' + argumet_dict[i] for i in argumet_dict]) + '&sign={sign}'.format(sign = sign)) response = urllib.request.urlopen(request_info) data = response.read() info = json.loads(data.decode('utf-8')) type_available = [] - for i in info['data']['video_info']['media']: - type_available.append({'video_url': info['data']['video_info']['media'][i]['play_url']['main_url'], 'video_quality': int(info['data']['video_info']['media'][i]['play_url']['vtype'])}) + for video_type in info['data']['video_info']['media']: + type_available.append({'video_url': info['data']['video_info']['media'][video_type]['play_url']['main_url'], 'video_quality': int(info['data']['video_info']['media'][video_type]['play_url']['vtype'])}) urls = [base64.b64decode(sorted(type_available, key = lambda x:x['video_quality'])[-1]['video_url']).decode("utf-8")] size = urls_size(urls) ext = 'mp4' @@ -85,12 +122,16 @@ def letvcloud_download(url, output_dir='.', merge=True, info_only=False): for i in url.split('&'): if 'vu=' in i: vu = i[3:] + if 'uu=' in i: + uu = i[3:] if len(vu) == 0: raise ValueError('Cannot get vu!') + if len(uu) == 0: + raise ValueError('Cannot get uu!') title = "LETV-%s" % vu - letvcloud_download_by_vu(vu, title=title, output_dir=output_dir, merge=merge, info_only=info_only) + letvcloud_download_by_vu(vu, uu, title=title, output_dir=output_dir, merge=merge, info_only=info_only) -def letv_download(url, output_dir='.', merge=True, info_only=False): +def letv_download(url, output_dir='.', merge=True, info_only=False ,**kwargs): if re.match(r'http://yuntv.letv.com/', url): letvcloud_download(url, output_dir=output_dir, merge=merge, info_only=info_only) else: @@ -101,7 +142,7 @@ def letv_download(url, output_dir='.', merge=True, info_only=False): else: vid = match1(html, r'vid="(\d+)"') title = match1(html,r'name="irTitle" content="(.*?)"') - letv_download_by_vid(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only) + letv_download_by_vid(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only,**kwargs) site_info = "LeTV.com" download = letv_download diff --git a/src/you_get/extractors/lizhi.py b/src/you_get/extractors/lizhi.py new file mode 100644 index 00000000..faeaa366 --- /dev/null +++ b/src/you_get/extractors/lizhi.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python + +__all__ = ['lizhi_download'] +import json +from ..common import * + +def lizhi_download_playlist(url, output_dir = '.', merge = True, info_only = False): + # like this http://www.lizhi.fm/#/31365/ + #api desc: s->start l->length band->some radio + #http://www.lizhi.fm/api/radio_audios?s=0&l=100&band=31365 + band_id = match1(url,r'#/(\d+)') + #try to get a considerable large l to reduce html parsing task. + api_url = 'http://www.lizhi.fm/api/radio_audios?s=0&l=65535&band='+band_id + content_json = json.loads(get_content(api_url)) + for sound in content_json: + title = sound["name"] + res_url = sound["url"] + songtype, ext, size = url_info(res_url,faker=True) + print_info(site_info, title, songtype, size) + if not info_only: + #no referer no speed! + download_urls([res_url], title, ext, size, output_dir, merge=merge ,refer = 'http://www.lizhi.fm',faker=True) + pass + +def lizhi_download(url, output_dir = '.', merge = True, info_only = False): + # url like http://www.lizhi.fm/#/549759/18864883431656710 + api_id = match1(url,r'#/(\d+/\d+)') + api_url = 'http://www.lizhi.fm/api/audio/'+api_id + content_json = json.loads(get_content(api_url)) + title = content_json["audio"]["name"] + res_url = content_json["audio"]["url"] + songtype, ext, size = url_info(res_url,faker=True) + print_info(site_info, title, songtype, size) + if not info_only: + #no referer no speed! + download_urls([res_url], title, ext, size, output_dir, merge=merge ,refer = 'http://www.lizhi.fm',faker=True) + + +site_info = "lizhi.fm" +download = lizhi_download +download_playlist = lizhi_download_playlist diff --git a/src/you_get/extractors/netease.py b/src/you_get/extractors/netease.py index a0a3824d..17b97bd6 100644 --- a/src/you_get/extractors/netease.py +++ b/src/you_get/extractors/netease.py @@ -12,6 +12,8 @@ import os def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=False): rid = match1(url, r'id=(.*)') + if rid is None: + rid = match1(url, r'/(\d+)/?$') if "album" in url: j = loads(get_content("http://music.163.com/api/album/%s?id=%s&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"})) @@ -48,7 +50,7 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals def netease_song_download(song, output_dir='.', info_only=False): title = "%s. %s" % (song['position'], song['name']) - if 'hMusic' in song: + if 'hMusic' in song and song['hMusic'] != None: url_best = make_url(song['hMusic']['dfsId']) elif 'mp3Url' in song: url_best = song['mp3Url'] diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py index 6ee472e0..68e9b8ad 100644 --- a/src/you_get/extractors/sohu.py +++ b/src/you_get/extractors/sohu.py @@ -5,13 +5,21 @@ __all__ = ['sohu_download'] from ..common import * import json +import time +from random import random +from urllib.parse import urlparse -def real_url(host, prot, file, new): - url = 'http://%s/?prot=%s&file=%s&new=%s' % (host, prot, file, new) - start, _, host, key = get_html(url).split('|')[:4] - return '%s%s?key=%s' % (start[:-1], new, key) +''' +Changelog: + 1. http://tv.sohu.com/upload/swf/20150604/Main.swf + new api +''' -def sohu_download(url, output_dir = '.', merge = True, info_only = False): +def real_url(host,vid,tvid,new,clipURL,ck): + url = 'http://'+host+'/?prot=9&prod=flash&pt=1&file='+clipURL+'&new='+new +'&key='+ ck+'&vid='+str(vid)+'&uid='+str(int(time.time()*1000))+'&t='+str(random()) + return json.loads(get_html(url))['url'] + +def sohu_download(url, output_dir = '.', merge = True, info_only = False, extractor_proxy=None): if re.match(r'http://share.vrs.sohu.com', url): vid = r1('id=(\d+)', url) else: @@ -20,35 +28,42 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False): assert vid if re.match(r'http://tv.sohu.com/', url): - data = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid)) + if extractor_proxy: + set_proxy(tuple(extractor_proxy.split(":"))) + info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid)) for qtyp in ["oriVid","superVid","highVid" ,"norVid","relativeId"]: - hqvid = data['data'][qtyp] + hqvid = info['data'][qtyp] if hqvid != 0 and hqvid != vid : - data = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % hqvid)) + info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % hqvid)) break - host = data['allot'] - prot = data['prot'] + if extractor_proxy: + unset_proxy() + host = info['allot'] + prot = info['prot'] + tvid = info['tvid'] urls = [] - data = data['data'] + data = info['data'] title = data['tvName'] size = sum(data['clipsBytes']) assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) - for file, new in zip(data['clipsURL'], data['su']): - urls.append(real_url(host, prot, file, new)) - assert data['clipsURL'][0].endswith('.mp4') + for new,clip,ck, in zip(data['su'], data['clipsURL'], data['ck']): + clipURL = urlparse(clip).path + urls.append(real_url(host,hqvid,tvid,new,clipURL,ck)) + # assert data['clipsURL'][0].endswith('.mp4') else: - data = json.loads(get_decoded_html('http://my.tv.sohu.com/play/videonew.do?vid=%s&referer=http://my.tv.sohu.com' % vid)) - host = data['allot'] - prot = data['prot'] + info = json.loads(get_decoded_html('http://my.tv.sohu.com/play/videonew.do?vid=%s&referer=http://my.tv.sohu.com' % vid)) + host = info['allot'] + prot = info['prot'] + tvid = info['tvid'] urls = [] - data = data['data'] + data = info['data'] title = data['tvName'] - size = sum([int(clipsBytes) for clipsBytes in data['clipsBytes']]) + size = sum(map(int,data['clipsBytes'])) assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) - for file, new in zip(data['clipsURL'], data['su']): - urls.append(real_url(host, prot, file, new)) - assert data['clipsURL'][0].endswith('.mp4') + for new,clip,ck, in zip(data['su'], data['clipsURL'], data['ck']): + clipURL = urlparse(clip).path + urls.append(real_url(host,vid,tvid,new,clipURL,ck)) print_info(site_info, title, 'mp4', size) if not info_only: diff --git a/src/you_get/extractors/tudou.py b/src/you_get/extractors/tudou.py index a9f78a6d..f2cf3c82 100644 --- a/src/you_get/extractors/tudou.py +++ b/src/you_get/extractors/tudou.py @@ -26,10 +26,10 @@ def tudou_download_by_id(id, title, output_dir = '.', merge = True, info_only = html = get_html('http://www.tudou.com/programs/view/%s/' % id) iid = r1(r'iid\s*[:=]\s*(\S+)', html) - title = r1(r'kw\s*[:=]\s*[\'\"]([^\']+?)[\'\"]', html) + title = r1(r'kw\s*[:=]\s*[\'\"]([^\n]+?)\'\s*\n', html).replace("\\'", "\'") tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge, info_only = info_only) -def tudou_download(url, output_dir = '.', merge = True, info_only = False): +def tudou_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): # Embedded player id = r1(r'http://www.tudou.com/v/([^/]+)/', url) if id: @@ -37,14 +37,17 @@ def tudou_download(url, output_dir = '.', merge = True, info_only = False): html = get_decoded_html(url) - title = r1(r'kw\s*[:=]\s*[\'\"]([^\']+?)[\'\"]', html) + title = r1(r'kw\s*[:=]\s*[\'\"]([^\n]+?)\'\s*\n', html).replace("\\'", "\'") assert title title = unescape_html(title) vcode = r1(r'vcode\s*[:=]\s*\'([^\']+)\'', html) if vcode: from .youku import youku_download_by_vid - return youku_download_by_vid(vcode, title=title, output_dir = output_dir, merge = merge, info_only = info_only) + if 'stream_id' in kwargs: + return youku_download_by_vid(vcode, title=title, output_dir=output_dir, merge=merge, info_only=info_only, stream_id=kwargs['stream_id']) + else: + return youku_download_by_vid(vcode, title=title, output_dir=output_dir, merge=merge, info_only=info_only) iid = r1(r'iid\s*[:=]\s*(\d+)', html) if not iid: diff --git a/src/you_get/extractors/twitter.py b/src/you_get/extractors/twitter.py new file mode 100644 index 00000000..4c0546ae --- /dev/null +++ b/src/you_get/extractors/twitter.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python + +__all__ = ['twitter_download'] + +from ..common import * + +def twitter_download(url, output_dir='.', merge=True, info_only=False): + html = get_html(url) + screen_name = r1(r'data-screen-name="([^"]*)"', html) + item_id = r1(r'data-item-id="([^"]*)"', html) + title = "{} [{}]".format(screen_name, item_id) + icards = r1(r'data-src="([^"]*)"', html) + if icards: + html = get_html("https://twitter.com" + icards) + data = json.loads(unescape_html(r1(r'data-player-config="([^"]*)"', html))) + source = data['playlist'][0]['source'] + else: + source = r1(r'', html) + for name, value in r: + params[name] = value + data = parse.urlencode(params).encode('utf-8') + req = request.Request(url) + print("Please wait for 6 seconds...") + time.sleep(6) + print("Starting") + new_html = request.urlopen(req, data).read().decode('utf-8', 'replace') + new_stff = re.search('lnk_download" href="(.*?)">', new_html) + if(new_stff): + url = new_stff.group(1) + title = params['fname'] + type = "" + ext = "" + a, b, size = url_info(url) + print_info(site_info, title, type, size) + if not info_only: + download_urls([url], title, ext, size, output_dir, merge=merge) + else: + print("cannot find link, please review") + pdb.set_trace() + + +site_info = "vidto.me" +download = vidto_download +download_playlist = playlist_not_supported('vidto') diff --git a/src/you_get/extractors/vine.py b/src/you_get/extractors/vine.py index 6f2d50aa..11ac09b8 100644 --- a/src/you_get/extractors/vine.py +++ b/src/you_get/extractors/vine.py @@ -7,18 +7,16 @@ from ..common import * def vine_download(url, output_dir='.', merge=True, info_only=False): html = get_html(url) - vid = r1(r'vine.co/v/([^/]+)/', html) + vid = r1(r'vine.co/v/([^/]+)', url) title1 = r1(r'', html) + mime, ext, size = url_info(stream) - print_info(site_info, title, type, size) + print_info(site_info, title, mime, size) if not info_only: - download_urls([url], title, ext, size, output_dir, merge = merge) + download_urls([stream], title, ext, size, output_dir, merge=merge) site_info = "Vine.co" download = vine_download diff --git a/src/you_get/extractors/xiami.py b/src/you_get/extractors/xiami.py index 143e6eb5..4e0baec0 100644 --- a/src/you_get/extractors/xiami.py +++ b/src/you_get/extractors/xiami.py @@ -61,7 +61,7 @@ def xiami_download_song(sid, output_dir = '.', merge = True, info_only = False): print_info(site_info, song_title, ext, size) if not info_only: - file_name = "%s - %s - %s" % (song_title, album_name, artist) + file_name = "%s - %s - %s" % (song_title, artist, album_name) download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True) try: xiami_download_lyric(lrc_url, file_name, output_dir) @@ -78,10 +78,16 @@ def xiami_download_showcollect(cid, output_dir = '.', merge = True, info_only = tracks = doc.getElementsByTagName("track") track_nr = 1 for i in tracks: - artist = i.getElementsByTagName("artist")[0].firstChild.nodeValue - album_name = i.getElementsByTagName("album_name")[0].firstChild.nodeValue - song_title = i.getElementsByTagName("title")[0].firstChild.nodeValue - url = location_dec(i.getElementsByTagName("location")[0].firstChild.nodeValue) + artist=album_name=song_title=url="" + try: + song_id = i.getElementsByTagName("song_id")[0].firstChild.nodeValue + artist = i.getElementsByTagName("artist")[0].firstChild.nodeValue + album_name = i.getElementsByTagName("album_name")[0].firstChild.nodeValue + song_title = i.getElementsByTagName("title")[0].firstChild.nodeValue + url = location_dec(i.getElementsByTagName("location")[0].firstChild.nodeValue) + except: + log.e("Song %s failed. [Info Missing] artist:%s, album:%s, title:%s, url:%s" % (song_id, artist, album_name, song_title, url)) + continue try: lrc_url = i.getElementsByTagName("lyric")[0].firstChild.nodeValue except: @@ -142,8 +148,8 @@ def xiami_download(url, output_dir = '.', stream_type = None, merge = True, info id = r1(r'http://www.xiami.com/album/(\d+)', url) xiami_download_album(id, output_dir, merge, info_only) - if re.match(r'http://www.xiami.com/song/showcollect/id/\d+', url): - id = r1(r'http://www.xiami.com/song/showcollect/id/(\d+)', url) + if re.match(r'http://www.xiami.com/collect/\d+', url): + id = r1(r'http://www.xiami.com/collect/(\d+)', url) xiami_download_showcollect(id, output_dir, merge, info_only) if re.match('http://www.xiami.com/song/\d+', url): diff --git a/src/you_get/processor/ffmpeg.py b/src/you_get/processor/ffmpeg.py index 24439bc2..94378daa 100644 --- a/src/you_get/processor/ffmpeg.py +++ b/src/you_get/processor/ffmpeg.py @@ -100,7 +100,9 @@ def ffmpeg_concat_flv_to_mp4(files, output='output.mp4'): concat_list = open(output + '.txt', 'w', encoding="utf-8") for file in files: if os.path.isfile(file): - concat_list.write("file '%s'\n" % file) + # for escaping rules, see: + # https://www.ffmpeg.org/ffmpeg-utils.html#Quoting-and-escaping + concat_list.write("file '%s'\n" % file.replace("'", r"'\''")) concat_list.close() params = [FFMPEG, '-f', 'concat', '-y', '-i'] diff --git a/src/you_get/processor/join_mp4.py b/src/you_get/processor/join_mp4.py index 24ba77f6..8eca239c 100755 --- a/src/you_get/processor/join_mp4.py +++ b/src/you_get/processor/join_mp4.py @@ -24,6 +24,9 @@ def read_uint(stream): def write_uint(stream, n): stream.write(struct.pack('>I', n)) +def write_ulong(stream, n): + stream.write(struct.pack('>Q', n)) + def read_ushort(stream): return struct.unpack('>H', stream.read(2))[0] @@ -99,11 +102,16 @@ class VariableAtom(Atom): self.write1(stream) i = 0 n = 0 - for name, offset, value in self.variables: + for name, offset, value, bsize in self.variables: stream.write(self.body[i:offset]) - write_uint(stream, value) - n += offset - i + 4 - i = offset + 4 + if bsize == 4: + write_uint(stream, value) + elif bsize == 8: + write_ulong(stream, value) + else: + raise NotImplementedError() + n += offset - i + bsize + i = offset + bsize stream.write(self.body[i:]) n += len(self.body) - i assert n == len(self.body) @@ -117,7 +125,7 @@ class VariableAtom(Atom): for i in range(len(self.variables)): variable = self.variables[i] if variable[0] == k: - self.variables[i] = (k, variable[1], v) + self.variables[i] = (k, variable[1], v, variable[3]) break else: raise Exception('field not found: '+k) @@ -127,6 +135,16 @@ def read_raw(stream, size, left, type): body = stream.read(left) return Atom(type, size, body) +def read_udta(stream, size, left, type): + assert size == left + 8 + body = stream.read(left) + class Udta(Atom): + def write(self, stream): + return + def calsize(self): + return 0 + return Udta(type, size, body) + def read_body_stream(stream, left): body = stream.read(left) assert len(body) == left @@ -139,6 +157,12 @@ def read_full_atom(stream): assert version == 0 return value +def read_full_atom2(stream): + value = read_uint(stream) + version = value >> 24 + flags = value & 0xffffff + return version, value + def read_mvhd(stream, size, left, type): body, stream = read_body_stream(stream, left) value = read_full_atom(stream) @@ -172,7 +196,7 @@ def read_mvhd(stream, size, left, type): nextTrackID = read_uint(stream) left -= 80 assert left == 0 - return VariableAtom(b'mvhd', size, body, [('duration', 16, duration)]) + return VariableAtom(b'mvhd', size, body, [('duration', 16, duration, 4)]) def read_tkhd(stream, size, left, type): body, stream = read_body_stream(stream, left) @@ -207,26 +231,35 @@ def read_tkhd(stream, size, left, type): height = qt_track_height >> 16 left -= 60 assert left == 0 - return VariableAtom(b'tkhd', size, body, [('duration', 20, duration)]) + return VariableAtom(b'tkhd', size, body, [('duration', 20, duration, 4)]) def read_mdhd(stream, size, left, type): body, stream = read_body_stream(stream, left) - value = read_full_atom(stream) + ver, value = read_full_atom2(stream) left -= 4 - - # new Date(movieTime * 1000 - 2082850791998L); - creation_time = read_uint(stream) - modification_time = read_uint(stream) - time_scale = read_uint(stream) - duration = read_uint(stream) - left -= 16 + + if ver == 1: + creation_time = read_ulong(stream) + modification_time = read_ulong(stream) + time_scale = read_uint(stream) + duration = read_ulong(stream) + var = [('duration', 24, duration, 8)] + left -= 28 + else: + assert ver == 0, "ver=%d" % ver + creation_time = read_uint(stream) + modification_time = read_uint(stream) + time_scale = read_uint(stream) + duration = read_uint(stream) + var = [('duration', 16, duration, 4)] + left -= 16 packed_language = read_ushort(stream) qt_quality = read_ushort(stream) left -= 4 assert left == 0 - return VariableAtom(b'mdhd', size, body, [('duration', 16, duration)]) + return VariableAtom(b'mdhd', size, body, var) def read_hdlr(stream, size, left, type): body, stream = read_body_stream(stream, left) @@ -240,8 +273,8 @@ def read_hdlr(stream, size, left, type): qt_component_flags_mask = read_uint(stream) left -= 20 - track_name = stream.read(left - 1) - assert stream.read(1) == b'\x00' + track_name = stream.read(left) + #assert track_name[-1] == b'\x00' return Atom(b'hdlr', size, body) @@ -324,16 +357,16 @@ def read_stts(stream, size, left, type): left -= 4 entry_count = read_uint(stream) - assert entry_count == 1 + #assert entry_count == 1 left -= 4 samples = [] for i in range(entry_count): - sample_count = read_uint(stream) - sample_duration = read_uint(stream) - samples.append((sample_count, sample_duration)) - left -= 8 - + sample_count = read_uint(stream) + sample_duration = read_uint(stream) + samples.append((sample_count, sample_duration)) + left -= 8 + assert left == 0 #return Atom('stts', size, None) class stts_atom(Atom): @@ -347,9 +380,9 @@ def read_stts(stream, size, left, type): write_uint(stream, sample_count) write_uint(stream, sample_duration) def calsize(self): - oldsize = self.size # TODO: remove + #oldsize = self.size # TODO: remove self.size = 8 + 4 + 4 + len(self.body[1]) * 8 - assert oldsize == self.size, '%s: %d, %d' % (self.type, oldsize, self.size) # TODO: remove + #assert oldsize == self.size, '%s: %d, %d' % (self.type, oldsize, self.size) # TODO: remove return self.size return stts_atom(b'stts', size, (value, samples)) @@ -623,8 +656,9 @@ atom_readers = { b'free': read_raw, b'edts': read_raw, b'pasp': read_raw, - + b'mdat': read_mdat, + b'udta': read_udta, } #stsd sample descriptions (codec types, initialization etc.) #stts (decoding) time-to-sample @@ -679,6 +713,7 @@ def parse_atoms(stream): return atoms def read_mp4(stream): + print(stream.name) atoms = parse_atoms(stream) moov = list(filter(lambda x: x.type == b'moov', atoms)) mdat = list(filter(lambda x: x.type == b'mdat', atoms)) @@ -695,11 +730,14 @@ def read_mp4(stream): def merge_stts(samples_list): sample_list = [] for samples in samples_list: - assert len(samples) == 1 - sample_list.append(samples[0]) + #assert len(samples) == 1 + #sample_list.append(samples[0]) + sample_list += samples counts, durations = zip(*sample_list) - assert len(set(durations)) == 1, 'not all durations equal' - return [(sum(counts), durations[0])] + #assert len(set(durations)) == 1, 'not all durations equal' + if len(set(durations)) == 1: + return [(sum(counts), durations[0])] + return sample_list def merge_stss(samples, sample_number_list): results = [] diff --git a/src/you_get/version.py b/src/you_get/version.py index 93aba3e8..f3e3d6a2 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.3.32' +__version__ = '0.3.33'