commit 146bae2f97f5d19a1d683a5087c2832cf832651a Author: Mort Yao Date: Mon Aug 20 23:54:03 2012 +0800 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..77c8ae46 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +_* +*.py[cod] + +*.flv +*.mp4 +*.webm diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..711df074 --- /dev/null +++ b/LICENSE @@ -0,0 +1,23 @@ +============================================== +This is a copy of the MIT license. +============================================== +Copyright (C) 2012 Mort Yao +Copyright (C) 2012 Boyu Guo + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 00000000..45ea187d --- /dev/null +++ b/README.md @@ -0,0 +1,98 @@ +# You-Get + +一个Python 3的YouTube/优酷视频下载脚本。 + +### Python版本 + +Python 3.x + +### 说明 + +基于优酷下载脚本[iambus/youku-lixian](https://github.com/iambus/youku-lixian)用Python 3改写而成,增加了以下功能: + +* 支持YouTube +* 支持断点续传 +* 可设置HTTP代理 + +### 支持的站点(持续更新中) + +目前根据本人需求,仅实现了对有限几个视频站点的支持,以后会继续增加(・∀・) + +* YouTube +* 音悦台 +* 优酷 +* 土豆 + +### 输出视频格式 + +* WebM (*.webm) +* MP4 (*.mp4) +* FLV (*.flv) +* 3GP (*.3gp) + +对于YouTube,程序将下载画质最高的[编码格式](http://en.wikipedia.org/wiki/Youtube#Quality_and_codecs)。 + +### 如何下载视频 + +(以下命令均以Linux shell为例……Windows用户请自行脑补正确的命令格式) + +显示视频信息,但不进行下载(`-i`或`--info`选项): + + $ ./you-get -i http://www.yinyuetai.com/video/463772 + +下载视频: + + $ ./you-get http://www.yinyuetai.com/video/463772 + +下载多个视频: + + $ ./you-get http://www.yinyuetai.com/video/463772 http://www.yinyuetai.com/video/471500 + +若当前目录下已有与视频标题同名的文件,下载时会自动跳过。若有同名的`.download`临时文件,程序会从上次中断处开始下载。 +如要强制重新下载该视频,可使用`-f`(`--force`)选项: + + $ ./you-get -f http://www.yinyuetai.com/video/463772 + +`-l`(`--playlist`)选项用于下载播放列表(只对某些网站适用): + + $ ./you-get -l http://www.youku.com/playlist_show/id_5344313.html + +指定视频文件的下载目录: + + $ ./you-get -o ~/Downloads http://www.yinyuetai.com/video/463772 + +显示详细帮助: + + $ ./you-get -h + +### 如何设置代理 + +默认情况下,Python自动使用系统的代理配置。可以通过环境变量`http_proxy`来设置系统的HTTP代理。 + +`-x`(`--http-proxy`)选项用于手动指定You-Get所使用的HTTP代理。例如:GoAgent的代理服务器是`http://127.0.0.1:8087`,则使用该代理下载某YouTube视频的命令是: + + $ ./you-get -x 127.0.0.1:8087 http://www.youtube.com/watch?v=KbtO_Ayjw0M + +Windows下的自由门等翻墙软件会自动设置系统全局代理,因此无需指定HTTP代理即可下载YouTube视频: + + $ ./you-get http://www.youtube.com/watch?v=KbtO_Ayjw0M + +如果不希望程序在下载过程中使用任何代理(包括系统的代理配置),可以显式地指定`--no-proxy`选项: + + $ ./you-get --no-proxy http://v.youku.com/v_show/id_XMjI0ODc1NTc2.html + +### 断点续传 + +下载未完成时意外中止(因为网络中断或程序被强行终止等),在目标路径中会有一个扩展名为`.download`的临时文件。 + +下次运行只要在目标路径中找到相应的`.download`临时文件,程序会自动从中断处继续下载。(除非指定了`-f`选项) + +### 使用Python 2? + +优酷等国内视频网站的下载,请移步:[iambus/youku-lixian](https://github.com/iambus/youku-lixian) + +YouTube等国外视频网站的下载,请移步:[rg3/youtube-dl](https://github.com/rg3/youtube-dl) + +### 许可证 + +源码在MIT License下发布。 diff --git a/common.py b/common.py new file mode 100644 index 00000000..6c8a5c8a --- /dev/null +++ b/common.py @@ -0,0 +1,430 @@ +#!/usr/bin/env python3 + +import getopt +import json +import os +import re +import sys +from urllib import request, parse + +try: + proj_info = json.loads(open('you-get.json').read()) +except: + proj_info = {'version': ''} + +force = False + +if sys.stdout.isatty(): + default_encoding = sys.stdout.encoding.lower() +else: + default_encoding = locale.getpreferredencoding().lower() + +def tr(s): + if default_encoding.startswith('utf') or default_encoding in ['cp936', '936', 'ms936', 'gbk']: + return s + else: + return s.encode('utf-8') + +def r1(pattern, text): + m = re.search(pattern, text) + if m: + return m.group(1) + +def r1_of(patterns, text): + for p in patterns: + x = r1(p, text) + if x: + return x + +def escape_file_path(path): + path = path.replace('/', '-') + path = path.replace('\\', '-') + path = path.replace('*', '-') + path = path.replace('?', '-') + return path + +def unescape_html(html): + from html import parser + html = parser.HTMLParser().unescape(html) + html = re.sub(r'&#(\d+);', lambda x: chr(int(x.group(1))), html) + return html + +def ungzip(s): + from io import BytesIO + import gzip + buffer = BytesIO(s) + f = gzip.GzipFile(fileobj = buffer) + return f.read() + +def undeflate(s): + import zlib + return zlib.decompress(s, -zlib.MAX_WBITS) + +def get_response(url): + response = request.urlopen(url) + data = response.read() + if response.info().get('Content-Encoding') == 'gzip': + data = ungzip(data) + elif response.info().get('Content-Encoding') == 'deflate': + data = undeflate(data) + response.data = data + return response + +def get_html(url, encoding = None): + content = get_response(url).data + return str(content, 'utf-8', 'ignore') + +def get_decoded_html(url): + response = get_response(url) + data = response.data + charset = r1(r'charset=([\w-]+)', response.headers['content-type']) + if charset: + return data.decode(charset) + else: + return data + +def url_size(url): + size = int(request.urlopen(url).headers['content-length']) + return size + +def urls_size(urls): + return sum(map(url_size, urls)) + +def url_info(url): + response = request.urlopen(request.Request(url)) + headers = response.headers + + type = headers['content-type'] + mapping = { + 'video/3gpp': '3gp', + 'video/f4v': 'flv', + 'video/mp4': 'mp4', + 'video/webm': 'webm', + 'video/x-flv': 'flv' + } + assert type in mapping, type + ext = mapping[type] + + size = int(headers['content-length']) + + return type, ext, size + +def url_save(url, filepath, bar, refer = None, is_part = False): + file_size = url_size(url) + + if os.path.exists(filepath): + if not force and file_size == os.path.getsize(filepath): + if not is_part: + if bar: + bar.done() + print('Skipping %s: file already exists' % tr(os.path.basename(filepath))) + else: + if bar: + bar.update_received(file_size) + return + else: + if not is_part: + if bar: + bar.done() + print('Overwriting %s' % tr(os.path.basename(filepath)), '...') + elif not os.path.exists(os.path.dirname(filepath)): + os.mkdir(os.path.dirname(filepath)) + + temp_filepath = filepath + '.download' + received = 0 + if not force: + open_mode = 'ab' + + if os.path.exists(temp_filepath): + received += os.path.getsize(temp_filepath) + if bar: + bar.update_received(os.path.getsize(temp_filepath)) + else: + open_mode = 'wb' + + if received < file_size: + headers = {} + if received: + headers['Range'] = 'bytes=' + str(received) + '-' + if refer: + headers['Referer'] = refer + + response = request.urlopen(request.Request(url, headers = headers), None) + assert file_size == received + int(response.headers['content-length']) + + with open(temp_filepath, open_mode) as output: + while True: + buffer = response.read(1024 * 256) + if not buffer: + if received == file_size: # Download finished + break + else: # Unexpected termination. Retry request + headers['Range'] = 'bytes=' + str(received) + '-' + response = request.urlopen(request.Request(url, headers = headers), None) + output.write(buffer) + received += len(buffer) + if bar: + bar.update_received(len(buffer)) + + assert received == os.path.getsize(temp_filepath), '%s == %s == %s' % (received, os.path.getsize(temp_filepath)) + + if os.access(filepath, os.W_OK): + os.remove(filepath) # on Windows rename could fail if destination filepath exists + os.rename(temp_filepath, filepath) + +class SimpleProgressBar: + def __init__(self, total_size, total_pieces = 1): + self.displayed = False + self.total_size = total_size + self.total_pieces = total_pieces + self.current_piece = 1 + self.received = 0 + + def update(self): + self.displayed = True + bar_size = 40 + percent = round(self.received * 100 / self.total_size, 1) + if percent > 100: + percent = 100 + dots = bar_size * int(percent) // 100 + plus = int(percent) - dots // bar_size * 100 + if plus > 0.8: + plus = '=' + elif plus > 0.4: + plus = '>' + else: + plus = '' + bar = '=' * dots + plus + bar = '{0:>5}% ({1:>5}/{2:<5}MB) [{3:<40}] {4}/{5}'.format(percent, round(self.received / 1048576, 1), round(self.total_size / 1048576, 1), bar, self.current_piece, self.total_pieces) + sys.stdout.write('\r' + bar) + sys.stdout.flush() + + def update_received(self, n): + self.received += n + self.update() + + def update_piece(self, n): + self.current_piece = n + + def done(self): + if self.displayed: + print() + self.displayed = False + +class PiecesProgressBar: + def __init__(self, total_size, total_pieces = 1): + self.displayed = False + self.total_size = total_size + self.total_pieces = total_pieces + self.current_piece = 1 + self.received = 0 + + def update(self): + self.displayed = True + bar = '{0:>5}%[{1:<40}] {2}/{3}'.format('?', '?' * 40, self.current_piece, self.total_pieces) + sys.stdout.write('\r' + bar) + sys.stdout.flush() + + def update_received(self, n): + self.received += n + self.update() + + def update_piece(self, n): + self.current_piece = n + + def done(self): + if self.displayed: + print() + self.displayed = False + +class DummyProgressBar: + def __init__(self, *args): + pass + def update_received(self, n): + pass + def update_piece(self, n): + pass + def done(self): + pass + +def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None, merge = True): + assert urls + assert ext in ('3gp', 'flv', 'mp4', 'webm') + if not total_size: + try: + total_size = urls_size(urls) + except: + import traceback + import sys + traceback.print_exc(file = sys.stdout) + pass + title = escape_file_path(title) + filename = '%s.%s' % (title, ext) + filepath = os.path.join(output_dir, filename) + if total_size: + if not force and os.path.exists(filepath) and os.path.getsize(filepath) >= total_size * 0.9: + print('Skipping %s: file already exists' % tr(filepath)) + return + bar = SimpleProgressBar(total_size, len(urls)) + else: + bar = PiecesProgressBar(total_size, len(urls)) + + if len(urls) == 1: + url = urls[0] + print('Downloading %s ...' % tr(filename)) + url_save(url, filepath, bar, refer = refer) + bar.done() + else: + flvs = [] + print('Downloading %s.%s ...' % (tr(title), ext)) + for i, url in enumerate(urls): + filename = '%s[%02d].%s' % (title, i, ext) + filepath = os.path.join(output_dir, filename) + flvs.append(filepath) + #print 'Downloading %s [%s/%s]...' % (tr(filename), i + 1, len(urls)) + bar.update_piece(i + 1) + url_save(url, filepath, bar, refer = refer, is_part = True) + bar.done() + if not merge: + return + if ext == 'flv': + from merge_flv import concat_flvs + concat_flvs(flvs, os.path.join(output_dir, title + '.flv')) + for flv in flvs: + os.remove(flv) + elif ext == 'mp4': + from merge_mp4 import concat_mp4s + concat_mp4s(flvs, os.path.join(output_dir, title + '.mp4')) + for flv in flvs: + os.remove(flv) + else: + print("Can't merge %s files" % ext) + +def playlist_not_supported(name): + def f(*args, **kwargs): + raise NotImplementedError('Playlist is not supported for ' + name) + return f + +def print_info(site_info, title, type, size): + if type in ['3gp']: + type = 'video/3gpp' + elif type in ['flv', 'f4v']: + type = 'video/x-flv' + elif type in ['mp4']: + type = 'video/mp4' + elif type in ['webm']: + type = 'video/webm' + + if type in ['video/3gpp']: + type_info = "3GPP multimedia file (%s)" % type + elif type in ['video/x-flv', 'video/f4v']: + type_info = "Flash video (%s)" % type + elif type in ['video/mp4', 'video/x-m4v']: + type_info = "MPEG-4 video (%s)" % type + elif type in ['video/webm']: + type_info = "WebM video (%s)" % type + #elif type in ['video/ogg']: + # type_info = "Ogg video (%s)" % type + #elif type in ['video/quicktime']: + # type_info = "QuickTime video (%s)" % type + #elif type in ['video/x-matroska']: + # type_info = "Matroska video (%s)" % type + #elif type in ['video/x-ms-wmv']: + # type_info = "Windows Media video (%s)" % type + #elif type in ['video/mpeg']: + # type_info = "MPEG video (%s)" % type + else: + type_info = "Unknown type (%s)" % type + + print("Video Site:", site_info) + print("Title: ", tr(title)) + print("Type: ", type_info) + print("Size: ", round(size / 1048576, 2), "MB (" + str(size) + " Bytes)") + +def set_http_proxy(proxy): + if proxy == None: # Use system default setting + proxy_support = request.ProxyHandler() + elif proxy == '': # Don't use any proxy + proxy_support = request.ProxyHandler({}) + else: # Use proxy + if not proxy.startswith('http://'): + proxy = 'http://' + proxy + proxy_support = request.ProxyHandler({'http': '%s' % proxy}) + opener = request.build_opener(proxy_support) + request.install_opener(opener) + +def main(script_name, download, download_playlist = None): + version = 'You-Get %s, a video downloader.' % proj_info['version'] + help = 'Usage: [python3] %s [OPTION]... [URL]...\n' % script_name + help += '''\nStartup options: + -V | --version Display the version and exit. + -h | --help Print this help and exit. + ''' + help += '''\nDownload options (use with URLs): + -f | --force Force overwriting existed files. + -i | --info Display the information of videos without downloading. + -l | --playlist Download playlists. (only available for some sites) + -n | --no-merge Don't merge video parts. + -o | --output-dir Set the output directory for downloaded videos. + -x | --http-proxy Use specific HTTP proxy for downloading. + --no-proxy Don't use any proxy. (ignore $http_proxy) + ''' + + short_opts = 'Vhfino:x:' + opts = ['version', 'help', 'force', 'info', 'no-merge', 'no-proxy', 'output-dir=', 'http-proxy='] + if download_playlist: + short_opts = 'l' + short_opts + opts = ['playlist'] + opts + + try: + opts, args = getopt.getopt(sys.argv[1:], short_opts, opts) + except getopt.GetoptError as err: + print(err) + print(help) + sys.exit(2) + + info_only = False + playlist = False + merge = True + output_dir = '.' + proxy = None + for o, a in opts: + if o in ('-V', '--version'): + print(version) + sys.exit() + elif o in ('-h', '--help'): + print(version) + print(help) + sys.exit() + elif o in ('-f', '--force'): + global force + force = True + elif o in ('-i', '--info'): + info_only = True + elif o in ('-l', '--playlist'): + playlist = True + elif o in ('-n', '--no-merge'): + merge = False + elif o in ('--no-proxy'): + proxy = '' + elif o in ('-o', '--output-dir'): + output_dir = a + elif o in ('-x', '--http-proxy'): + proxy = a + else: + print(help) + sys.exit(1) + if not args: + print(help) + sys.exit(1) + + set_http_proxy(proxy) + + for url in args: + if not url.startswith('http://'): + url = 'http://' + url + + if playlist: + download_playlist(url, output_dir = output_dir, merge = merge, info_only = info_only) + else: + download(url, output_dir = output_dir, merge = merge, info_only = info_only) diff --git a/get.py b/get.py new file mode 100755 index 00000000..11f17da3 --- /dev/null +++ b/get.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 + +from common import * +import get_tudou +import get_yinyuetai +import get_youku +import get_youtube + +def url_to_module(url): + site = r1(r'http://([^/]+)/', url) + assert site, 'invalid url: ' + url + + if site.endswith('.com.cn'): + site = site[:-3] + domain = r1(r'(\.[^.]+\.[^.]+)$', site) + assert domain, 'unsupported url: ' + url + + k = r1(r'([^.]+)', domain) + downloads = { + 'youtube': get_youtube, + 'youku': get_youku, + 'yinyuetai': get_yinyuetai, + 'tudou': get_tudou, + #TODO: + # 'acfun': get_acfun, + # 'bilibili': get_bilibili, + # 'kankanews': get_bilibili, + # 'iask': get_iask, + # 'sina': get_iask, + # 'ku6': get_ku6, + # 'pptv': get_pptv, + # 'iqiyi': get_iqiyi, + # 'sohu': get_sohu, + # '56': get_w56, + # 'cntv': get_cntv, + } + if k in downloads: + return downloads[k] + else: + raise NotImplementedError(url) + +def any_download(url, output_dir = '.', merge = True, info_only = False): + m = url_to_module(url) + m.download(url, output_dir = output_dir, merge = merge, info_only = info_only) + +def any_download_playlist(url, output_dir = '.', merge = True, info_only = False): + m = url_to_module(url) + m.download_playlist(url, output_dir = output_dir, merge = merge, info_only = info_only) + +if __name__ == '__main__': + main('get.py', any_download, any_download_playlist) diff --git a/get_tudou.py b/get_tudou.py new file mode 100755 index 00000000..1d142876 --- /dev/null +++ b/get_tudou.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 + +__all__ = ['tudou_download', 'tudou_download_playlist', 'tudou_download_by_id', 'tudou_download_by_iid'] + +from common import * + +def tudou_download_by_iid(iid, title, output_dir = '.', merge = True, info_only = False): + xml = get_html('http://v2.tudou.com/v?it=' + iid + '&st=1,2,3,4,99') + + from xml.dom.minidom import parseString + doc = parseString(xml) + title = title or doc.firstChild.getAttribute('tt') or doc.firstChild.getAttribute('title') + urls = [(int(n.getAttribute('brt')), n.firstChild.nodeValue.strip()) for n in doc.getElementsByTagName('f')] + + url = max(urls, key = lambda x:x[0])[1] + assert 'f4v' in url + + type, ext, size = url_info(url) + + print_info(site_info, title, type, size) + if not info_only: + #url_save(url, filepath, bar): + download_urls([url], title, ext, total_size = None, output_dir = output_dir, merge = merge) + +def tudou_download_by_id(id, title, output_dir = '.', merge = True): + html = get_html('http://www.tudou.com/programs/view/%s/' % id) + iid = r1(r'iid\s*=\s*(\S+)', html) + tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge) + +def tudou_download(url, output_dir = '.', merge = True, info_only = False): + html = get_decoded_html(url) + iid = r1(r'iid\s*[:=]\s*(\d+)', html) + assert iid + title = r1(r'kw\s*[:=]\s*"([^"]+)"', html) + assert title + title = unescape_html(title) + tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge, info_only = info_only) + +def parse_playlist(url): + #if r1('http://www.tudou.com/playlist/p/a(\d+)\.html', url): + # html = get_html(url) + # print re.search(r'', html, flags=re.S).group() + #else: + # raise NotImplementedError(url) + raise NotImplementedError() + +def parse_playlist(url): + aid = r1('http://www.tudou.com/playlist/p/a(\d+)(?:i\d+)?\.html', url) + html = get_decoded_html(url) + if not aid: + aid = r1(r"aid\s*[:=]\s*'(\d+)'", html) + if re.match(r'http://www.tudou.com/albumcover/', url): + atitle = r1(r"title\s*:\s*'([^']+)'", html) + elif re.match(r'http://www.tudou.com/playlist/p/', url): + atitle = r1(r'atitle\s*=\s*"([^"]+)"', html) + else: + raise NotImplementedError(url) + assert aid + assert atitle + import json + #url = 'http://www.tudou.com/playlist/service/getZyAlbumItems.html?aid='+aid + url = 'http://www.tudou.com/playlist/service/getAlbumItems.html?aid='+aid + return [(atitle + '-' + x['title'], str(x['itemId'])) for x in json.loads(get_html(url))['message']] + +def tudou_download_playlist(url, create_dir = False, output_dir = '.', merge = True): + if create_dir: + raise NotImplementedError('please report a bug so I can implement this') + videos = parse_playlist(url) + for i, (title, id) in enumerate(videos): + print('Downloading %s of %s videos...' % (i + 1, len(videos))) + tudou_download_by_iid(id, title, output_dir = output_dir, merge = merge) + +site_info = "Tudou.com" +download = tudou_download +download_playlist = tudou_download_playlist + +if __name__ == '__main__': + main('tudou', tudou_download, tudou_download_playlist) diff --git a/get_yinyuetai.py b/get_yinyuetai.py new file mode 100755 index 00000000..b5953bb9 --- /dev/null +++ b/get_yinyuetai.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 + +__all__ = ['yinyuetai_download', 'yinyuetai_download_by_id'] + +from common import * + +def yinyuetai_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): + assert title + html = get_html('http://www.yinyuetai.com/insite/get-video-info?flex=true&videoId=' + id) + #TODO: run a fully parse instead of text search + # url = r1(r'(http://flv.yinyuetai.com/uploads/videos/common/\w+\.flv\?t=[a-f0-9]{16})', html) + # url = r1(r'http://hc.yinyuetai.com/uploads/videos/common/[A-F0-9]{32}\.mp4\?v=\d{12}', html) + url = r1(r'(http://\w+\.yinyuetai\.com/uploads/videos/common/\w+\.(?:flv|mp4)\?(?:t=[a-f0-9]{16}|v=\d{12}))', html) + assert url + type, ext, size = url_info(url) + + print_info(site_info, title, type, size) + if not info_only: + download_urls([url], title, ext, size, output_dir, merge = merge) + +def yinyuetai_download(url, output_dir = '.', merge = True, info_only = False): + id = r1(r'http://www.yinyuetai.com/video/(\d+)$', url) + assert id + html = get_html(url, 'utf-8') + title = r1(r'', html) + assert title + title = parse.unquote(title) + title = escape_file_path(title) + yinyuetai_download_by_id(id, title, output_dir, merge = merge, info_only = info_only) + +site_info = "YinYueTai.com" +download = yinyuetai_download +download_playlist = playlist_not_supported('yinyuetai') + +if __name__ == '__main__': + main('get_yinyuetai.py', yinyuetai_download) diff --git a/get_youku.py b/get_youku.py new file mode 100755 index 00000000..bd77c393 --- /dev/null +++ b/get_youku.py @@ -0,0 +1,173 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +__all__ = ['youku_download', 'youku_download_playlist', 'youku_download_by_id'] + +from common import * + +import json +from random import randint +from time import time +import re +import sys + +def trim_title(title): + title = title.replace(' - 视频 - 优酷视频 - 在线观看', '') + title = title.replace(' - 专辑 - 优酷视频', '') + title = re.sub(r'—([^—]+)—优酷网,视频高清在线观看', '', title) + return title + +def find_video_id_from_url(url): + patterns = [r'^http://v.youku.com/v_show/id_([\w=]+).html', + r'^http://player.youku.com/player.php/sid/([\w=]+)/v.swf', + r'^loader\.swf\?VideoIDS=([\w=]+)', + r'^([\w=]+)$'] + return r1_of(patterns, url) + +def find_video_id_from_show_page(url): + return re.search(r'
.*href="([^"]+)"', get_html(url)).group(1) + +def youku_url(url): + id = find_video_id_from_url(url) + if id: + return 'http://v.youku.com/v_show/id_%s.html' % id + if re.match(r'http://www.youku.com/show_page/id_\w+.html', url): + return find_video_id_from_show_page(url) + if re.match(r'http://v.youku.com/v_playlist/\w+.html', url): + return url + raise Exception('Invalid Youku URL: '+url) + +def parse_page(url): + url = youku_url(url) + page = get_html(url) + id2 = re.search(r"var\s+videoId2\s*=\s*'(\S+)'", page).group(1) + if re.search(r'v_playlist', url): + # if we are playing a video from playlist, the meta title might be incorrect + title = re.search(r'([^<>]*)', page).group(1) + else: + title = re.search(r'', page).group(1) + title = trim_title(title) + if re.search(r'v_playlist', url) and re.search(r'-.*\S+', title): + title = re.sub(r'^[^-]+-\s*', '', title) # remove the special name from title for playlist video + title = unescape_html(title) + subtitle = re.search(r'([^<>]*)', page) + if subtitle: + subtitle = subtitle.group(1).strip() + if subtitle == title: + subtitle = None + return id2, title, subtitle + +def get_info(videoId2): + return json.loads(get_html('http://v.youku.com/player/getPlayList/VideoIDS/' + videoId2)) + +def find_video(info, stream_type = None): + #key = '%s%x' % (info['data'][0]['key2'], int(info['data'][0]['key1'], 16) ^ 0xA55AA5A5) + segs = info['data'][0]['segs'] + types = segs.keys() + if not stream_type: + for x in ['hd2', 'mp4', 'flv']: + if x in types: + stream_type = x + break + else: + raise NotImplementedError() + assert stream_type in ('hd2', 'mp4', 'flv') + file_type = {'hd2': 'flv', 'mp4': 'mp4', 'flv': 'flv'}[stream_type] + + seed = info['data'][0]['seed'] + source = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\\:._-1234567890") + mixed = '' + while source: + seed = (seed * 211 + 30031) & 0xFFFF + index = seed * len(source) >> 16 + c = source.pop(index) + mixed += c + + ids = info['data'][0]['streamfileids'][stream_type].split('*')[:-1] + vid = ''.join(mixed[int(i)] for i in ids) + + sid = '%s%s%s' % (int(time() * 1000), randint(1000, 1999), randint(1000, 9999)) + + urls = [] + for s in segs[stream_type]: + no = '%02x' % int(s['no']) + url = 'http://f.youku.com/player/getFlvPath/sid/%s_%s/st/%s/fileid/%s%s%s?K=%s&ts=%s' % (sid, no, file_type, vid[:8], no.upper(), vid[10:], s['k'], s['seconds']) + urls.append((url, int(s['size']))) + return urls + +def file_type_of_url(url): + return str(re.search(r'/st/([^/]+)/', url).group(1)) + +def youku_download_by_id(id2, title, output_dir = '.', stream_type = None, merge = True, info_only = False): + info = get_info(id2) + urls, sizes = zip(*find_video(info, stream_type)) + total_size = sum(sizes) + + print_info(site_info, title, file_type_of_url(urls[0]), total_size) + if not info_only: + download_urls(urls, title, file_type_of_url(urls[0]), total_size, output_dir, merge = merge) + +def youku_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False): + id2, title, subtitle = parse_page(url) + if subtitle: + title += '-' + subtitle + + youku_download_by_id(id2, title, output_dir, merge = merge, info_only = info_only) + +def parse_playlist_videos(html): + return re.findall(r'id="A_(\w+)"', html) + +def parse_playlist_pages(html): + m = re.search(r'
    .*?
', html, flags = re.S) + if m: + urls = re.findall(r'href="([^"]+)"', m.group()) + x1, x2, x3 = re.match(r'^(.*page_)(\d+)(_.*)$', urls[-1]).groups() + return ['http://v.youku.com%s%s%s?__rt=1&__ro=listShow' % (x1, i, x3) for i in range(2, int(x2) + 1)] + else: + return [] + +def parse_playlist(url): + html = get_html(url) + video_id = re.search(r"var\s+videoId\s*=\s*'(\d+)'", html).group(1) + show_id = re.search(r'var\s+showid\s*=\s*"(\d+)"', html).group(1) + list_url = 'http://v.youku.com/v_vpofficiallist/page_1_showid_%s_id_%s.html?__rt=1&__ro=listShow' % (show_id, video_id) + html = get_html(list_url) + ids = parse_playlist_videos(html) + for url in parse_playlist_pages(html): + ids.extend(parse_playlist_videos(get_html(url))) + return ids + +def parse_vplaylist(url): + id = r1_of([r'^http://www.youku.com/playlist_show/id_(\d+)(?:_ascending_\d_mode_pic(?:_page_\d+)?)?.html', + r'^http://v.youku.com/v_playlist/f(\d+)o[01]p\d+.html', + r'^http://u.youku.com/user_playlist/pid_(\d+)_id_[\w=]+(?:_page_\d+)?.html'], + url) + assert id, 'not valid vplaylist url: ' + url + url = 'http://www.youku.com/playlist_show/id_%s.html' % id + n = int(re.search(r'(\d+)', get_html(url)).group(1)) + return ['http://v.youku.com/v_playlist/f%so0p%s.html' % (id, i) for i in range(n)] + +def youku_download_playlist(url, output_dir = '.', merge = True, info_only = False): + if re.match(r'http://www.youku.com/show_page/id_\w+.html', url): + url = find_video_id_from_show_page(url) + + if re.match(r'http://www.youku.com/playlist_show/id_\d+(?:_ascending_\d_mode_pic(?:_page_\d+)?)?.html', url): + ids = parse_vplaylist(url) + elif re.match(r'http://v.youku.com/v_playlist/f\d+o[01]p\d+.html', url): + ids = parse_vplaylist(url) + elif re.match(r'http://u.youku.com/user_playlist/pid_(\d+)_id_[\w=]+(?:_page_\d+)?.html', url): + ids = parse_vplaylist(url) + else: + assert re.match(r'http://v.youku.com/v_show/id_([\w=]+).html', url), 'URL not supported as playlist' + ids = parse_playlist(url) + + for i, id in enumerate(ids): + print('Processing %s of %s videos...' % (i + 1, len(ids))) + youku_download(id, output_dir, merge = merge, info_only = info_only) + +site_info = "Youku.com" +download = youku_download +download_playlist = youku_download_playlist + +if __name__ == '__main__': + main('get_youku.py', youku_download, youku_download_playlist) diff --git a/get_youtube.py b/get_youtube.py new file mode 100755 index 00000000..747c6fac --- /dev/null +++ b/get_youtube.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 + +__all__ = ['youtube_download', 'youtube_download_by_id'] + +from common import * + +def youtube_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): + try: + url = parse.parse_qs(parse.unquote(request.urlopen('http://www.youtube.com/get_video_info?&video_id=' + id).read().decode('utf-8')))['url_encoded_fmt_stream_map'][0][4:] + except: + url = parse.parse_qs(parse.unquote(request.urlopen('http://www.youtube.com/watch?v=' + id).read().decode('utf-8')))['url_encoded_fmt_stream_map'][0][4:] + type, ext, size = url_info(url) + + print_info(site_info, title, type, size) + if not info_only: + download_urls([url], title, ext, size, output_dir, merge = merge) + +def youtube_download(url, output_dir = '.', merge = True, info_only = False): + id = parse.parse_qs(parse.urlparse(url).query)['v'][0] + assert id + try: + title = parse.parse_qs(parse.unquote(request.urlopen('http://www.youtube.com/get_video_info?&video_id=' + id).read().decode('utf-8')))['title'][0] + except: + html = get_html(url, 'utf-8') + title = r1(r'"title": "([^"]+)"', html) + assert title + title = parse.unquote(title) + title = escape_file_path(title) + youtube_download_by_id(id, title, output_dir, merge = merge, info_only = info_only) + +site_info = "YouTube.com" +download = youtube_download +download_playlist = playlist_not_supported('youtube') + +if __name__ == '__main__': + main('get_youtube.py', youtube_download) diff --git a/merge_flv.py b/merge_flv.py new file mode 100755 index 00000000..f2a0dcbe --- /dev/null +++ b/merge_flv.py @@ -0,0 +1,365 @@ +#!/usr/bin/env python3 + +import struct +from io import BytesIO + +TAG_TYPE_METADATA = 18 + +################################################## +# AMF0 +################################################## + +AMF_TYPE_NUMBER = 0x00 +AMF_TYPE_BOOLEAN = 0x01 +AMF_TYPE_STRING = 0x02 +AMF_TYPE_OBJECT = 0x03 +AMF_TYPE_MOVIECLIP = 0x04 +AMF_TYPE_NULL = 0x05 +AMF_TYPE_UNDEFINED = 0x06 +AMF_TYPE_REFERENCE = 0x07 +AMF_TYPE_MIXED_ARRAY = 0x08 +AMF_TYPE_END_OF_OBJECT = 0x09 +AMF_TYPE_ARRAY = 0x0A +AMF_TYPE_DATE = 0x0B +AMF_TYPE_LONG_STRING = 0x0C +AMF_TYPE_UNSUPPORTED = 0x0D +AMF_TYPE_RECORDSET = 0x0E +AMF_TYPE_XML = 0x0F +AMF_TYPE_CLASS_OBJECT = 0x10 +AMF_TYPE_AMF3_OBJECT = 0x11 + +class ECMAObject: + def __init__(self, max_number): + self.max_number = max_number + self.data = [] + self.map = {} + def put(self, k, v): + self.data.append((k, v)) + self.map[k] = v + def get(self, k): + return self.map[k] + def set(self, k, v): + for i in range(len(self.data)): + if self.data[i][0] == k: + self.data[i] = (k, v) + break + else: + raise KeyError(k) + self.map[k] = v + def keys(self): + return self.map.keys() + def __str__(self): + return 'ECMAObject<' + repr(self.map) + '>' + def __eq__(self, other): + return self.max_number == other.max_number and self.data == other.data + +def read_amf_number(stream): + return struct.unpack('>d', stream.read(8))[0] + +def read_amf_boolean(stream): + b = read_byte(stream) + assert b in (0, 1) + return bool(b) + +def read_amf_string(stream): + xx = stream.read(2) + if xx == b'': + # dirty fix for the invalid Qiyi flv + return None + n = struct.unpack('>H', xx)[0] + s = stream.read(n) + assert len(s) == n + return s.decode('utf-8') + +def read_amf_object(stream): + obj = {} + while True: + k = read_amf_string(stream) + if not k: + assert read_byte(stream) == AMF_TYPE_END_OF_OBJECT + break + v = read_amf(stream) + obj[k] = v + return obj + +def read_amf_mixed_array(stream): + max_number = read_uint(stream) + mixed_results = ECMAObject(max_number) + while True: + k = read_amf_string(stream) + if k is None: + # dirty fix for the invalid Qiyi flv + break + if not k: + assert read_byte(stream) == AMF_TYPE_END_OF_OBJECT + break + v = read_amf(stream) + mixed_results.put(k, v) + assert len(mixed_results.data) == max_number + return mixed_results + +def read_amf_array(stream): + n = read_uint(stream) + v = [] + for i in range(n): + v.append(read_amf(stream)) + return v + +amf_readers = { + AMF_TYPE_NUMBER: read_amf_number, + AMF_TYPE_BOOLEAN: read_amf_boolean, + AMF_TYPE_STRING: read_amf_string, + AMF_TYPE_OBJECT: read_amf_object, + AMF_TYPE_MIXED_ARRAY: read_amf_mixed_array, + AMF_TYPE_ARRAY: read_amf_array, +} + +def read_amf(stream): + return amf_readers[read_byte(stream)](stream) + +def write_amf_number(stream, v): + stream.write(struct.pack('>d', v)) + +def write_amf_boolean(stream, v): + if v: + stream.write(b'\x01') + else: + stream.write(b'\x00') + +def write_amf_string(stream, s): + s = s.encode('utf-8') + stream.write(struct.pack('>H', len(s))) + stream.write(s) + +def write_amf_object(stream, o): + for k in o: + write_amf_string(stream, k) + write_amf(stream, o[k]) + write_amf_string(stream, '') + write_byte(stream, AMF_TYPE_END_OF_OBJECT) + +def write_amf_mixed_array(stream, o): + write_uint(stream, o.max_number) + for k, v in o.data: + write_amf_string(stream, k) + write_amf(stream, v) + write_amf_string(stream, '') + write_byte(stream, AMF_TYPE_END_OF_OBJECT) + +def write_amf_array(stream, o): + write_uint(stream, len(o)) + for v in o: + write_amf(stream, v) + +amf_writers_tags = { + float: AMF_TYPE_NUMBER, + bool: AMF_TYPE_BOOLEAN, + str: AMF_TYPE_STRING, + dict: AMF_TYPE_OBJECT, + ECMAObject: AMF_TYPE_MIXED_ARRAY, + list: AMF_TYPE_ARRAY, +} + +amf_writers = { + AMF_TYPE_NUMBER: write_amf_number, + AMF_TYPE_BOOLEAN: write_amf_boolean, + AMF_TYPE_STRING: write_amf_string, + AMF_TYPE_OBJECT: write_amf_object, + AMF_TYPE_MIXED_ARRAY: write_amf_mixed_array, + AMF_TYPE_ARRAY: write_amf_array, +} + +def write_amf(stream, v): + if isinstance(v, ECMAObject): + tag = amf_writers_tags[ECMAObject] + else: + tag = amf_writers_tags[type(v)] + write_byte(stream, tag) + amf_writers[tag](stream, v) + +################################################## +# FLV +################################################## + +def read_int(stream): + return struct.unpack('>i', stream.read(4))[0] + +def read_uint(stream): + return struct.unpack('>I', stream.read(4))[0] + +def write_uint(stream, n): + stream.write(struct.pack('>I', n)) + +def read_byte(stream): + return ord(stream.read(1)) + +def write_byte(stream, b): + stream.write(bytes([b])) + +def read_unsigned_medium_int(stream): + x1, x2, x3 = struct.unpack('BBB', stream.read(3)) + return (x1 << 16) | (x2 << 8) | x3 + +def read_tag(stream): + # header size: 15 bytes + header = stream.read(15) + if len(header) == 4: + return + x = struct.unpack('>IBBBBBBBBBBB', header) + previous_tag_size = x[0] + data_type = x[1] + body_size = (x[2] << 16) | (x[3] << 8) | x[4] + assert body_size < 1024 * 1024 * 128, 'tag body size too big (> 128MB)' + timestamp = (x[5] << 16) | (x[6] << 8) | x[7] + timestamp += x[8] << 24 + assert x[9:] == (0, 0, 0) + body = stream.read(body_size) + return (data_type, timestamp, body_size, body, previous_tag_size) + #previous_tag_size = read_uint(stream) + #data_type = read_byte(stream) + #body_size = read_unsigned_medium_int(stream) + #assert body_size < 1024*1024*128, 'tag body size too big (> 128MB)' + #timestamp = read_unsigned_medium_int(stream) + #timestamp += read_byte(stream) << 24 + #assert read_unsigned_medium_int(stream) == 0 + #body = stream.read(body_size) + #return (data_type, timestamp, body_size, body, previous_tag_size) + +def write_tag(stream, tag): + data_type, timestamp, body_size, body, previous_tag_size = tag + write_uint(stream, previous_tag_size) + write_byte(stream, data_type) + write_byte(stream, body_size>>16 & 0xff) + write_byte(stream, body_size>>8 & 0xff) + write_byte(stream, body_size & 0xff) + write_byte(stream, timestamp>>16 & 0xff) + write_byte(stream, timestamp>>8 & 0xff) + write_byte(stream, timestamp & 0xff) + write_byte(stream, timestamp>>24 & 0xff) + stream.write(b'\0\0\0') + stream.write(body) + +def read_flv_header(stream): + assert stream.read(3) == b'FLV' + header_version = read_byte(stream) + assert header_version == 1 + type_flags = read_byte(stream) + assert type_flags == 5 + data_offset = read_uint(stream) + assert data_offset == 9 + +def write_flv_header(stream): + stream.write(b'FLV') + write_byte(stream, 1) + write_byte(stream, 5) + write_uint(stream, 9) + +def read_meta_data(stream): + meta_type = read_amf(stream) + meta = read_amf(stream) + return meta_type, meta + +def read_meta_tag(tag): + data_type, timestamp, body_size, body, previous_tag_size = tag + assert data_type == TAG_TYPE_METADATA + assert timestamp == 0 + assert previous_tag_size == 0 + return read_meta_data(BytesIO(body)) + +#def write_meta_data(stream, meta_type, meta_data): +# assert isinstance(meta_type, basesting) +# write_amf(meta_type) +# write_amf(meta_data) + +def write_meta_tag(stream, meta_type, meta_data): + buffer = BytesIO() + write_amf(buffer, meta_type) + write_amf(buffer, meta_data) + body = buffer.getvalue() + write_tag(stream, (TAG_TYPE_METADATA, 0, len(body), body, 0)) + + +################################################## +# main +################################################## + +def guess_output(inputs): + import os.path + inputs = map(os.path.basename, inputs) + n = min(map(len, inputs)) + for i in reversed(range(1, n)): + if len(set(s[:i] for s in inputs)) == 1: + return inputs[0][:i] + '.flv' + return 'output.flv' + +def concat_flvs(flvs, output = None): + assert flvs, 'no flv file found' + import os.path + if not output: + output = guess_output(flvs) + elif os.path.isdir(output): + output = os.path.join(output, guess_output(flvs)) + + print('Merging video parts...') + ins = [open(flv, 'rb') for flv in flvs] + for stream in ins: + read_flv_header(stream) + meta_tags = map(read_tag, ins) + metas = list(map(read_meta_tag, meta_tags)) + meta_types, metas = zip(*metas) + assert len(set(meta_types)) == 1 + meta_type = meta_types[0] + + # must merge fields: duration + # TODO: check other meta info, update other meta info + total_duration = sum(meta.get('duration') for meta in metas) + meta_data = metas[0] + meta_data.set('duration', total_duration) + + out = open(output, 'wb') + write_flv_header(out) + write_meta_tag(out, meta_type, meta_data) + timestamp_start = 0 + for stream in ins: + while True: + tag = read_tag(stream) + if tag: + data_type, timestamp, body_size, body, previous_tag_size = tag + timestamp += timestamp_start + tag = data_type, timestamp, body_size, body, previous_tag_size + write_tag(out, tag) + else: + break + timestamp_start = timestamp + write_uint(out, previous_tag_size) + + return output + +def usage(): + print('Usage: [python3] merge_flv.py --output TARGET.flv flv...') + +def main(): + import sys, getopt + try: + opts, args = getopt.getopt(sys.argv[1:], "ho:", ["help", "output="]) + except getopt.GetoptError as err: + usage() + sys.exit(1) + output = None + for o, a in opts: + if o in ("-h", "--help"): + usage() + sys.exit() + elif o in ("-o", "--output"): + output = a + else: + usage() + sys.exit(1) + if not args: + usage() + sys.exit(1) + + concat_flvs(args, output) + +if __name__ == '__main__': + main() diff --git a/merge_mp4.py b/merge_mp4.py new file mode 100755 index 00000000..5b551c38 --- /dev/null +++ b/merge_mp4.py @@ -0,0 +1,907 @@ +#!/usr/bin/env python3 + +# reference: c041828_ISO_IEC_14496-12_2005(E).pdf + +################################################## +# reader and writer +################################################## + +import struct +from io import BytesIO + +def skip(stream, n): + stream.seek(stream.tell() + n) + +def skip_zeros(stream, n): + assert stream.read(n) == b'\x00' * n + +def read_int(stream): + return struct.unpack('>i', stream.read(4))[0] + +def read_uint(stream): + return struct.unpack('>I', stream.read(4))[0] + +def write_uint(stream, n): + stream.write(struct.pack('>I', n)) + +def read_ushort(stream): + return struct.unpack('>H', stream.read(2))[0] + +def read_ulong(stream): + return struct.unpack('>Q', stream.read(8))[0] + +def read_byte(stream): + return ord(stream.read(1)) + +def copy_stream(source, target, n): + buffer_size = 1024 * 1024 + while n > 0: + to_read = min(buffer_size, n) + s = source.read(to_read) + assert len(s) == to_read, 'no enough data' + target.write(s) + n -= to_read + +class Atom: + def __init__(self, type, size, body): + assert len(type) == 4 + self.type = type + self.size = size + self.body = body + def __str__(self): + #return '' % (self.type, repr(self.body)) + return '' % (self.type, '') + def __repr__(self): + return str(self) + def write1(self, stream): + write_uint(stream, self.size) + stream.write(self.type) + def write(self, stream): + assert type(self.body) == bytes, '%s: %s' % (self.type, type(self.body)) + assert self.size == 8 + len(self.body) + self.write1(stream) + stream.write(self.body) + def calsize(self): + return self.size + +class CompositeAtom(Atom): + def __init__(self, type, size, body): + assert isinstance(body, list) + Atom.__init__(self, type, size, body) + def write(self, stream): + assert type(self.body) == list + self.write1(stream) + for atom in self.body: + atom.write(stream) + def calsize(self): + self.size = 8 + sum([atom.calsize() for atom in self.body]) + return self.size + def get1(self, k): + for a in self.body: + if a.type == k: + return a + else: + raise Exception('atom not found: ' + k) + def get(self, *keys): + atom = self + for k in keys: + atom = atom.get1(k) + return atom + def get_all(self, k): + return list(filter(lambda x: x.type == k, self.body)) + +class VariableAtom(Atom): + def __init__(self, type, size, body, variables): + assert isinstance(body, bytes) + Atom.__init__(self, type, size, body) + self.variables = variables + def write(self, stream): + self.write1(stream) + i = 0 + n = 0 + for name, offset, value in self.variables: + stream.write(self.body[i:offset]) + write_uint(stream, value) + n += offset - i + 4 + i = offset + 4 + stream.write(self.body[i:]) + n += len(self.body) - i + assert n == len(self.body) + def get(self, k): + for v in self.variables: + if v[0] == k: + return v[2] + else: + raise Exception('field not found: ' + k) + def set(self, k, v): + for i in range(len(self.variables)): + variable = self.variables[i] + if variable[0] == k: + self.variables[i] = (k, variable[1], v) + break + else: + raise Exception('field not found: '+k) + +def read_raw(stream, size, left, type): + assert size == left + 8 + body = stream.read(left) + return Atom(type, size, body) + +def read_body_stream(stream, left): + body = stream.read(left) + assert len(body) == left + return body, BytesIO(body) + +def read_full_atom(stream): + value = read_uint(stream) + version = value >> 24 + flags = value & 0xffffff + assert version == 0 + return value + +def read_mvhd(stream, size, left, type): + body, stream = read_body_stream(stream, left) + value = read_full_atom(stream) + left -= 4 + + # new Date(movieTime * 1000 - 2082850791998L); + creation_time = read_uint(stream) + modification_time = read_uint(stream) + time_scale = read_uint(stream) + duration = read_uint(stream) + left -= 16 + + qt_preferred_fate = read_uint(stream) + qt_preferred_volume = read_ushort(stream) + assert stream.read(10) == b'\x00' * 10 + qt_matrixA = read_uint(stream) + qt_matrixB = read_uint(stream) + qt_matrixU = read_uint(stream) + qt_matrixC = read_uint(stream) + qt_matrixD = read_uint(stream) + qt_matrixV = read_uint(stream) + qt_matrixX = read_uint(stream) + qt_matrixY = read_uint(stream) + qt_matrixW = read_uint(stream) + qt_previewTime = read_uint(stream) + qt_previewDuration = read_uint(stream) + qt_posterTime = read_uint(stream) + qt_selectionTime = read_uint(stream) + qt_selectionDuration = read_uint(stream) + qt_currentTime = read_uint(stream) + nextTrackID = read_uint(stream) + left -= 80 + assert left == 0 + return VariableAtom(b'mvhd', size, body, [('duration', 16, duration)]) + +def read_tkhd(stream, size, left, type): + body, stream = read_body_stream(stream, left) + value = read_full_atom(stream) + left -= 4 + + # new Date(movieTime * 1000 - 2082850791998L); + creation_time = read_uint(stream) + modification_time = read_uint(stream) + track_id = read_uint(stream) + assert stream.read(4) == b'\x00' * 4 + duration = read_uint(stream) + left -= 20 + + assert stream.read(8) == b'\x00' * 8 + qt_layer = read_ushort(stream) + qt_alternate_group = read_ushort(stream) + qt_volume = read_ushort(stream) + assert stream.read(2) == b'\x00\x00' + qt_matrixA = read_uint(stream) + qt_matrixB = read_uint(stream) + qt_matrixU = read_uint(stream) + qt_matrixC = read_uint(stream) + qt_matrixD = read_uint(stream) + qt_matrixV = read_uint(stream) + qt_matrixX = read_uint(stream) + qt_matrixY = read_uint(stream) + qt_matrixW = read_uint(stream) + qt_track_width = read_uint(stream) + width = qt_track_width >> 16 + qt_track_height = read_uint(stream) + height = qt_track_height >> 16 + left -= 60 + assert left == 0 + return VariableAtom(b'tkhd', size, body, [('duration', 20, duration)]) + +def read_mdhd(stream, size, left, type): + body, stream = read_body_stream(stream, left) + value = read_full_atom(stream) + left -= 4 + + # new Date(movieTime * 1000 - 2082850791998L); + creation_time = read_uint(stream) + modification_time = read_uint(stream) + time_scale = read_uint(stream) + duration = read_uint(stream) + left -= 16 + + packed_language = read_ushort(stream) + qt_quality = read_ushort(stream) + left -= 4 + + assert left == 0 + return VariableAtom(b'mdhd', size, body, [('duration', 16, duration)]) + +def read_hdlr(stream, size, left, type): + body, stream = read_body_stream(stream, left) + value = read_full_atom(stream) + left -= 4 + + qt_component_type = read_uint(stream) + handler_type = read_uint(stream) + qt_component_manufacturer = read_uint(stream) + qt_component_flags = read_uint(stream) + qt_component_flags_mask = read_uint(stream) + left -= 20 + + track_name = stream.read(left - 1) + assert stream.read(1) == b'\x00' + + return Atom(b'hdlr', size, body) + +def read_vmhd(stream, size, left, type): + body, stream = read_body_stream(stream, left) + value = read_full_atom(stream) + left -= 4 + + assert left == 8 + graphic_mode = read_ushort(stream) + op_color_read = read_ushort(stream) + op_color_green = read_ushort(stream) + op_color_blue = read_ushort(stream) + + return Atom(b'vmhd', size, body) + +def read_stsd(stream, size, left, type): + value = read_full_atom(stream) + left -= 4 + + entry_count = read_uint(stream) + left -= 4 + + children = [] + for i in range(entry_count): + atom = read_atom(stream) + children.append(atom) + left -= atom.size + + assert left == 0 + #return Atom('stsd', size, children) + class stsd_atom(Atom): + def __init__(self, type, size, body): + Atom.__init__(self, type, size, body) + def write(self, stream): + self.write1(stream) + write_uint(stream, self.body[0]) + write_uint(stream, len(self.body[1])) + for atom in self.body[1]: + atom.write(stream) + def calsize(self): + oldsize = self.size # TODO: remove + self.size = 8 + 4 + 4 + sum([atom.calsize() for atom in self.body[1]]) + assert oldsize == self.size, '%s: %d, %d' % (self.type, oldsize, self.size) # TODO: remove + return self.size + return stsd_atom(b'stsd', size, (value, children)) + +def read_avc1(stream, size, left, type): + body, stream = read_body_stream(stream, left) + + skip_zeros(stream, 6) + data_reference_index = read_ushort(stream) + skip_zeros(stream, 2) + skip_zeros(stream, 2) + skip_zeros(stream, 12) + width = read_ushort(stream) + height = read_ushort(stream) + horizontal_rez = read_uint(stream) >> 16 + vertical_rez = read_uint(stream) >> 16 + assert stream.read(4) == b'\x00' * 4 + frame_count = read_ushort(stream) + string_len = read_byte(stream) + compressor_name = stream.read(31) + depth = read_ushort(stream) + assert stream.read(2) == b'\xff\xff' + left -= 78 + + child = read_atom(stream) + assert child.type in (b'avcC', b'pasp'), 'if the sub atom is not avcC or pasp (actual %s), you should not cache raw body' % child.type + left -= child.size + stream.read(left) # XXX + return Atom(b'avc1', size, body) + +def read_avcC(stream, size, left, type): + stream.read(left) + return Atom(b'avcC', size, None) + +def read_stts(stream, size, left, type): + value = read_full_atom(stream) + left -= 4 + + entry_count = read_uint(stream) + assert entry_count == 1 + left -= 4 + + samples = [] + for i in range(entry_count): + sample_count = read_uint(stream) + sample_duration = read_uint(stream) + samples.append((sample_count, sample_duration)) + left -= 8 + + assert left == 0 + #return Atom('stts', size, None) + class stts_atom(Atom): + def __init__(self, type, size, body): + Atom.__init__(self, type, size, body) + def write(self, stream): + self.write1(stream) + write_uint(stream, self.body[0]) + write_uint(stream, len(self.body[1])) + for sample_count, sample_duration in self.body[1]: + write_uint(stream, sample_count) + write_uint(stream, sample_duration) + def calsize(self): + oldsize = self.size # TODO: remove + self.size = 8 + 4 + 4 + len(self.body[1]) * 8 + assert oldsize == self.size, '%s: %d, %d' % (self.type, oldsize, self.size) # TODO: remove + return self.size + return stts_atom(b'stts', size, (value, samples)) + +def read_stss(stream, size, left, type): + value = read_full_atom(stream) + left -= 4 + + entry_count = read_uint(stream) + left -= 4 + + samples = [] + for i in range(entry_count): + sample = read_uint(stream) + samples.append(sample) + left -= 4 + + assert left == 0 + #return Atom('stss', size, None) + class stss_atom(Atom): + def __init__(self, type, size, body): + Atom.__init__(self, type, size, body) + def write(self, stream): + self.write1(stream) + write_uint(stream, self.body[0]) + write_uint(stream, len(self.body[1])) + for sample in self.body[1]: + write_uint(stream, sample) + def calsize(self): + self.size = 8 + 4 + 4 + len(self.body[1]) * 4 + return self.size + return stss_atom(b'stss', size, (value, samples)) + +def read_stsc(stream, size, left, type): + value = read_full_atom(stream) + left -= 4 + + entry_count = read_uint(stream) + left -= 4 + + chunks = [] + for i in range(entry_count): + first_chunk = read_uint(stream) + samples_per_chunk = read_uint(stream) + sample_description_index = read_uint(stream) + assert sample_description_index == 1 # what is it? + chunks.append((first_chunk, samples_per_chunk, sample_description_index)) + left -= 12 + #chunks, samples = zip(*chunks) + #total = 0 + #for c, s in zip(chunks[1:], samples): + # total += c*s + #print 'total', total + + assert left == 0 + #return Atom('stsc', size, None) + class stsc_atom(Atom): + def __init__(self, type, size, body): + Atom.__init__(self, type, size, body) + def write(self, stream): + self.write1(stream) + write_uint(stream, self.body[0]) + write_uint(stream, len(self.body[1])) + for first_chunk, samples_per_chunk, sample_description_index in self.body[1]: + write_uint(stream, first_chunk) + write_uint(stream, samples_per_chunk) + write_uint(stream, sample_description_index) + def calsize(self): + self.size = 8 + 4 + 4 + len(self.body[1]) * 12 + return self.size + return stsc_atom(b'stsc', size, (value, chunks)) + +def read_stsz(stream, size, left, type): + value = read_full_atom(stream) + left -= 4 + + sample_size = read_uint(stream) + sample_count = read_uint(stream) + left -= 8 + + assert sample_size == 0 + total = 0 + sizes = [] + if sample_size == 0: + for i in range(sample_count): + entry_size = read_uint(stream) + sizes.append(entry_size) + total += entry_size + left -= 4 + + assert left == 0 + #return Atom('stsz', size, None) + class stsz_atom(Atom): + def __init__(self, type, size, body): + Atom.__init__(self, type, size, body) + def write(self, stream): + self.write1(stream) + write_uint(stream, self.body[0]) + write_uint(stream, self.body[1]) + write_uint(stream, self.body[2]) + for entry_size in self.body[3]: + write_uint(stream, entry_size) + def calsize(self): + self.size = 8 + 4 + 8 + len(self.body[3]) * 4 + return self.size + return stsz_atom(b'stsz', size, (value, sample_size, sample_count, sizes)) + +def read_stco(stream, size, left, type): + value = read_full_atom(stream) + left -= 4 + + entry_count = read_uint(stream) + left -= 4 + + offsets = [] + for i in range(entry_count): + chunk_offset = read_uint(stream) + offsets.append(chunk_offset) + left -= 4 + + assert left == 0 + #return Atom('stco', size, None) + class stco_atom(Atom): + def __init__(self, type, size, body): + Atom.__init__(self, type, size, body) + def write(self, stream): + self.write1(stream) + write_uint(stream, self.body[0]) + write_uint(stream, len(self.body[1])) + for chunk_offset in self.body[1]: + write_uint(stream, chunk_offset) + def calsize(self): + self.size = 8 + 4 + 4 + len(self.body[1]) * 4 + return self.size + return stco_atom(b'stco', size, (value, offsets)) + +def read_ctts(stream, size, left, type): + value = read_full_atom(stream) + left -= 4 + + entry_count = read_uint(stream) + left -= 4 + + samples = [] + for i in range(entry_count): + sample_count = read_uint(stream) + sample_offset = read_uint(stream) + samples.append((sample_count, sample_offset)) + left -= 8 + + assert left == 0 + class ctts_atom(Atom): + def __init__(self, type, size, body): + Atom.__init__(self, type, size, body) + def write(self, stream): + self.write1(stream) + write_uint(stream, self.body[0]) + write_uint(stream, len(self.body[1])) + for sample_count, sample_offset in self.body[1]: + write_uint(stream, sample_count) + write_uint(stream, sample_offset) + def calsize(self): + self.size = 8 + 4 + 4 + len(self.body[1]) * 8 + return self.size + return ctts_atom(b'ctts', size, (value, samples)) + +def read_smhd(stream, size, left, type): + body, stream = read_body_stream(stream, left) + value = read_full_atom(stream) + left -= 4 + + balance = read_ushort(stream) + assert stream.read(2) == b'\x00\x00' + left -= 4 + + assert left == 0 + return Atom(b'smhd', size, body) + +def read_mp4a(stream, size, left, type): + body, stream = read_body_stream(stream, left) + + assert stream.read(6) == b'\x00' * 6 + data_reference_index = read_ushort(stream) + assert stream.read(8) == b'\x00' * 8 + channel_count = read_ushort(stream) + sample_size = read_ushort(stream) + assert stream.read(4) == b'\x00' * 4 + time_scale = read_ushort(stream) + assert stream.read(2) == b'\x00' * 2 + left -= 28 + + atom = read_atom(stream) + assert atom.type == b'esds' + left -= atom.size + + assert left == 0 + return Atom(b'mp4a', size, body) + +def read_descriptor(stream): + tag = read_byte(stream) + raise NotImplementedError() + +def read_esds(stream, size, left, type): + value = read_uint(stream) + version = value >> 24 + assert version == 0 + flags = value & 0xffffff + left -= 4 + + body = stream.read(left) + return Atom(b'esds', size, None) + +def read_composite_atom(stream, size, left, type): + children = [] + while left > 0: + atom = read_atom(stream) + children.append(atom) + left -= atom.size + assert left == 0, left + return CompositeAtom(type, size, children) + +def read_mdat(stream, size, left, type): + source_start = stream.tell() + source_size = left + skip(stream, left) + #return Atom(type, size, None) + #raise NotImplementedError() + class mdat_atom(Atom): + def __init__(self, type, size, body): + Atom.__init__(self, type, size, body) + def write(self, stream): + self.write1(stream) + self.write2(stream) + def write2(self, stream): + source, source_start, source_size = self.body + original = source.tell() + source.seek(source_start) + copy_stream(source, stream, source_size) + def calsize(self): + return self.size + return mdat_atom(b'mdat', size, (stream, source_start, source_size)) + +atom_readers = { + b'mvhd': read_mvhd, # merge duration + b'tkhd': read_tkhd, # merge duration + b'mdhd': read_mdhd, # merge duration + b'hdlr': read_hdlr, # nothing + b'vmhd': read_vmhd, # nothing + b'stsd': read_stsd, # nothing + b'avc1': read_avc1, # nothing + b'avcC': read_avcC, # nothing + b'stts': read_stts, # sample_count, sample_duration + b'stss': read_stss, # join indexes + b'stsc': read_stsc, # merge # sample numbers + b'stsz': read_stsz, # merge # samples + b'stco': read_stco, # merge # chunk offsets + b'ctts': read_ctts, # merge + b'smhd': read_smhd, # nothing + b'mp4a': read_mp4a, # nothing + b'esds': read_esds, # noting + + b'ftyp': read_raw, + b'yqoo': read_raw, + b'moov': read_composite_atom, + b'trak': read_composite_atom, + b'mdia': read_composite_atom, + b'minf': read_composite_atom, + b'dinf': read_composite_atom, + b'stbl': read_composite_atom, + b'iods': read_raw, + b'dref': read_raw, + b'free': read_raw, + b'edts': read_raw, + b'pasp': read_raw, + + b'mdat': read_mdat, +} +#stsd sample descriptions (codec types, initialization etc.) +#stts (decoding) time-to-sample +#ctts (composition) time to sample +#stsc sample-to-chunk, partial data-offset information +#stsz sample sizes (framing) +#stz2 compact sample sizes (framing) +#stco chunk offset, partial data-offset information +#co64 64-bit chunk offset +#stss sync sample table (random access points) +#stsh shadow sync sample table +#padb sample padding bits +#stdp sample degradation priority +#sdtp independent and disposable samples +#sbgp sample-to-group +#sgpd sample group description +#subs sub-sample information + + +def read_atom(stream): + header = stream.read(8) + if not header: + return + assert len(header) == 8 + n = 0 + size = struct.unpack('>I', header[:4])[0] + assert size > 0 + n += 4 + type = header[4:8] + n += 4 + assert type != b'uuid' + if size == 1: + size = read_ulong(stream) + n += 8 + + left = size - n + if type in atom_readers: + return atom_readers[type](stream, size, left, type) + raise NotImplementedError('%s: %d' % (type, left)) + +def write_atom(stream, atom): + atom.write(stream) + +def parse_atoms(stream): + atoms = [] + while True: + atom = read_atom(stream) + if atom: + atoms.append(atom) + else: + break + return atoms + +def read_mp4(stream): + atoms = parse_atoms(stream) + moov = list(filter(lambda x: x.type == b'moov', atoms)) + mdat = list(filter(lambda x: x.type == b'mdat', atoms)) + assert len(moov) == 1 + assert len(mdat) == 1 + moov = moov[0] + mdat = mdat[0] + return atoms, moov, mdat + +################################################## +# merge +################################################## + +def merge_stts(samples_list): + sample_list = [] + for samples in samples_list: + assert len(samples) == 1 + sample_list.append(samples[0]) + counts, durations = zip(*sample_list) + assert len(set(durations)) == 1, 'not all durations equal' + return [(sum(counts), durations[0])] + +def merge_stss(samples, sample_number_list): + results = [] + start = 0 + for samples, sample_number_list in zip(samples, sample_number_list): + results.extend(map(lambda x: start + x, samples)) + start += sample_number_list + return results + +def merge_stsc(chunks_list, total_chunk_number_list): + results = [] + chunk_index = 1 + for chunks, total in zip(chunks_list, total_chunk_number_list): + for i in range(len(chunks)): + if i < len(chunks) - 1: + chunk_number = chunks[i + 1][0] - chunks[i][0] + else: + chunk_number = total + 1 - chunks[i][0] + sample_number = chunks[i][1] + description = chunks[i][2] + results.append((chunk_index, sample_number, description)) + chunk_index += chunk_number + return results + +def merge_stco(offsets_list, mdats): + offset = 0 + results = [] + for offsets, mdat in zip(offsets_list, mdats): + results.extend(offset + x - mdat.body[1] for x in offsets) + offset += mdat.size - 8 + return results + +def merge_stsz(sizes_list): + return sum(sizes_list, []) + +def merge_mdats(mdats): + total_size = sum(x.size - 8 for x in mdats) + 8 + class multi_mdat_atom(Atom): + def __init__(self, type, size, body): + Atom.__init__(self, type, size, body) + def write(self, stream): + self.write1(stream) + self.write2(stream) + def write2(self, stream): + for mdat in self.body: + mdat.write2(stream) + def calsize(self): + return self.size + return multi_mdat_atom(b'mdat', total_size, mdats) + +def merge_moov(moovs, mdats): + mvhd_duration = 0 + for x in moovs: + mvhd_duration += x.get(b'mvhd').get('duration') + tkhd_durations = [0, 0] + mdhd_durations = [0, 0] + for x in moovs: + traks = x.get_all(b'trak') + assert len(traks) == 2 + tkhd_durations[0] += traks[0].get(b'tkhd').get('duration') + tkhd_durations[1] += traks[1].get(b'tkhd').get('duration') + mdhd_durations[0] += traks[0].get(b'mdia', b'mdhd').get('duration') + mdhd_durations[1] += traks[1].get(b'mdia', b'mdhd').get('duration') + #mvhd_duration = min(mvhd_duration, tkhd_durations) + + trak0s = [x.get_all(b'trak')[0] for x in moovs] + trak1s = [x.get_all(b'trak')[1] for x in moovs] + + stts0 = merge_stts(x.get(b'mdia', b'minf', b'stbl', b'stts').body[1] for x in trak0s) + stts1 = merge_stts(x.get(b'mdia', b'minf', b'stbl', b'stts').body[1] for x in trak1s) + + stss = merge_stss((x.get(b'mdia', b'minf', b'stbl', b'stss').body[1] for x in trak0s), (len(x.get(b'mdia', b'minf', b'stbl', b'stsz').body[3]) for x in trak0s)) + + stsc0 = merge_stsc((x.get(b'mdia', b'minf', b'stbl', b'stsc').body[1] for x in trak0s), (len(x.get(b'mdia', b'minf', b'stbl', b'stco').body[1]) for x in trak0s)) + stsc1 = merge_stsc((x.get(b'mdia', b'minf', b'stbl', b'stsc').body[1] for x in trak1s), (len(x.get(b'mdia', b'minf', b'stbl', b'stco').body[1]) for x in trak1s)) + + stco0 = merge_stco((x.get(b'mdia', b'minf', b'stbl', b'stco').body[1] for x in trak0s), mdats) + stco1 = merge_stco((x.get(b'mdia', b'minf', b'stbl', b'stco').body[1] for x in trak1s), mdats) + + stsz0 = merge_stsz((x.get(b'mdia', b'minf', b'stbl', b'stsz').body[3] for x in trak0s)) + stsz1 = merge_stsz((x.get(b'mdia', b'minf', b'stbl', b'stsz').body[3] for x in trak1s)) + + ctts = sum((x.get(b'mdia', b'minf', b'stbl', b'ctts').body[1] for x in trak0s), []) + + moov = moovs[0] + + moov.get(b'mvhd').set('duration', mvhd_duration) + trak0 = moov.get_all(b'trak')[0] + trak1 = moov.get_all(b'trak')[1] + trak0.get(b'tkhd').set('duration', tkhd_durations[0]) + trak1.get(b'tkhd').set('duration', tkhd_durations[1]) + trak0.get(b'mdia', b'mdhd').set('duration', mdhd_durations[0]) + trak1.get(b'mdia', b'mdhd').set('duration', mdhd_durations[1]) + + stts_atom = trak0.get(b'mdia', b'minf', b'stbl', b'stts') + stts_atom.body = stts_atom.body[0], stts0 + stts_atom = trak1.get(b'mdia', b'minf', b'stbl', b'stts') + stts_atom.body = stts_atom.body[0], stts1 + + stss_atom = trak0.get(b'mdia', b'minf', b'stbl', b'stss') + stss_atom.body = stss_atom.body[0], stss + + stsc_atom = trak0.get(b'mdia', b'minf', b'stbl', b'stsc') + stsc_atom.body = stsc_atom.body[0], stsc0 + stsc_atom = trak1.get(b'mdia', b'minf', b'stbl', b'stsc') + stsc_atom.body = stsc_atom.body[0], stsc1 + + stco_atom = trak0.get(b'mdia', b'minf', b'stbl', b'stco') + stco_atom.body = stss_atom.body[0], stco0 + stco_atom = trak1.get(b'mdia', b'minf', b'stbl', b'stco') + stco_atom.body = stss_atom.body[0], stco1 + + stsz_atom = trak0.get(b'mdia', b'minf', b'stbl', b'stsz') + stsz_atom.body = stsz_atom.body[0], stsz_atom.body[1], len(stsz0), stsz0 + stsz_atom = trak1.get(b'mdia', b'minf', b'stbl', b'stsz') + stsz_atom.body = stsz_atom.body[0], stsz_atom.body[1], len(stsz1), stsz1 + + ctts_atom = trak0.get(b'mdia', b'minf', b'stbl', b'ctts') + ctts_atom.body = ctts_atom.body[0], ctts + + old_moov_size = moov.size + new_moov_size = moov.calsize() + new_mdat_start = mdats[0].body[1] + new_moov_size - old_moov_size + stco0 = list(map(lambda x: x + new_mdat_start, stco0)) + stco1 = list(map(lambda x: x + new_mdat_start, stco1)) + stco_atom = trak0.get(b'mdia', b'minf', b'stbl', b'stco') + stco_atom.body = stss_atom.body[0], stco0 + stco_atom = trak1.get(b'mdia', b'minf', b'stbl', b'stco') + stco_atom.body = stss_atom.body[0], stco1 + + return moov + +def merge_mp4s(files, output): + assert files + ins = [open(mp4, 'rb') for mp4 in files] + mp4s = list(map(read_mp4, ins)) + moovs = list(map(lambda x: x[1], mp4s)) + mdats = list(map(lambda x: x[2], mp4s)) + moov = merge_moov(moovs, mdats) + mdat = merge_mdats(mdats) + with open(output, 'wb') as output: + for x in mp4s[0][0]: + if x.type == b'moov': + moov.write(output) + elif x.type == b'mdat': + mdat.write(output) + else: + x.write(output) + +################################################## +# main +################################################## + +# TODO: FIXME: duplicate of merge_flv + +def guess_output(inputs): + import os.path + inputs = map(os.path.basename, inputs) + n = min(map(len, inputs)) + for i in reversed(range(1, n)): + if len(set(s[:i] for s in inputs)) == 1: + return inputs[0][:i] + '.mp4' + return 'output.mp4' + +def concat_mp4s(mp4s, output = None): + assert mp4s, 'no mp4 file found' + import os.path + if not output: + output = guess_output(mp4s) + elif os.path.isdir(output): + output = os.path.join(output, guess_output(mp4s)) + + print('Merging video parts...') + merge_mp4s(mp4s, output) + + return output + +def usage(): + print('Usage: [python3] merge_mp4.py --output TARGET.mp4 mp4...') + +def main(): + import sys, getopt + try: + opts, args = getopt.getopt(sys.argv[1:], "ho:", ["help", "output="]) + except getopt.GetoptError as err: + usage() + sys.exit(1) + output = None + for o, a in opts: + if o in ("-h", "--help"): + usage() + sys.exit() + elif o in ("-o", "--output"): + output = a + else: + usage() + sys.exit(1) + if not args: + usage() + sys.exit(1) + + concat_mp4s(args, output) + +if __name__ == '__main__': + main() diff --git a/you-get b/you-get new file mode 100755 index 00000000..76023f77 --- /dev/null +++ b/you-get @@ -0,0 +1,6 @@ +#!/usr/bin/env python3 + +from get import * + +if __name__ == '__main__': + main('you-get', any_download, any_download_playlist) diff --git a/you-get.json b/you-get.json new file mode 100644 index 00000000..89ebc85d --- /dev/null +++ b/you-get.json @@ -0,0 +1,19 @@ +{ + "version": "0.0.1", + "date": "2012-08-20", + "author": "Mort Yao ", + "file_list": [ + "LICENSE", + "README.md", + "common.py", + "get.py", + "get_tudou.py", + "get_yinyuetai.py", + "get_youku.py", + "get_youtube.py", + "merge_flv.py", + "merge_mp4.py", + "you-get", + "you-get.json" + ] +}