initial commit

2025-04-13 02:42:30 +03:00 · 2012-08-20 23:54:03 +08:00 · 2012-08-20 23:54:03 +08:00 · 146bae2f97
commit 146bae2f97
13 changed files with 2228 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,6 @@
+_*
+*.py[cod]
+
+*.flv
+*.mp4
+*.webm
--- a/23
+++ b/23
@ -0,0 +1,23 @@
+==============================================
+This is a copy of the MIT license.
+==============================================
+Copyright (C) 2012 Mort Yao <mort.yao@gmail.com>
+Copyright (C) 2012 Boyu Guo <iambus@gmail.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/README.md
+++ b/README.md
@ -0,0 +1,98 @@
+# You-Get
+
+一个Python 3的YouTube/优酷视频下载脚本。
+
+### Python版本
+
+Python 3.x
+
+### 说明
+
+基于优酷下载脚本[iambus/youku-lixian](https://github.com/iambus/youku-lixian)用Python 3改写而成，增加了以下功能：
+
+* 支持YouTube
+* 支持断点续传
+* 可设置HTTP代理
+
+### 支持的站点（持续更新中）
+
+目前根据本人需求，仅实现了对有限几个视频站点的支持，以后会继续增加（・∀・）
+
+* YouTube <http://www.youtube.com>
+* 音悦台 <http://www.yinyuetai.com>
+* 优酷 <http://www.youku.com>
+* 土豆 <http://www.tudou.com>
+
+### 输出视频格式
+
+* WebM (*.webm)
+* MP4 (*.mp4)
+* FLV (*.flv)
+* 3GP (*.3gp)
+
+对于YouTube，程序将下载画质最高的[编码格式](http://en.wikipedia.org/wiki/Youtube#Quality_and_codecs)。
+
+### 如何下载视频
+
+（以下命令均以Linux shell为例……Windows用户请自行脑补正确的命令格式）
+
+显示视频信息，但不进行下载（`-i`或`--info`选项）：
+
+    $ ./you-get -i http://www.yinyuetai.com/video/463772
+
+下载视频：
+
+    $ ./you-get http://www.yinyuetai.com/video/463772
+
+下载多个视频：
+
+    $ ./you-get http://www.yinyuetai.com/video/463772 http://www.yinyuetai.com/video/471500
+
+若当前目录下已有与视频标题同名的文件，下载时会自动跳过。若有同名的`.download`临时文件，程序会从上次中断处开始下载。
+如要强制重新下载该视频，可使用`-f`（`--force`）选项：
+
+    $ ./you-get -f http://www.yinyuetai.com/video/463772
+
+`-l`（`--playlist`）选项用于下载播放列表（只对某些网站适用）：
+
+    $ ./you-get -l http://www.youku.com/playlist_show/id_5344313.html
+
+指定视频文件的下载目录：
+
+    $ ./you-get -o ~/Downloads http://www.yinyuetai.com/video/463772
+
+显示详细帮助：
+
+    $ ./you-get -h
+
+### 如何设置代理
+
+默认情况下，Python自动使用系统的代理配置。可以通过环境变量`http_proxy`来设置系统的HTTP代理。
+
+`-x`（`--http-proxy`）选项用于手动指定You-Get所使用的HTTP代理。例如：GoAgent的代理服务器是`http://127.0.0.1:8087`，则使用该代理下载某YouTube视频的命令是：
+
+    $ ./you-get -x 127.0.0.1:8087 http://www.youtube.com/watch?v=KbtO_Ayjw0M
+
+Windows下的自由门等翻墙软件会自动设置系统全局代理，因此无需指定HTTP代理即可下载YouTube视频：
+
+    $ ./you-get http://www.youtube.com/watch?v=KbtO_Ayjw0M
+
+如果不希望程序在下载过程中使用任何代理（包括系统的代理配置），可以显式地指定`--no-proxy`选项：
+
+    $ ./you-get --no-proxy http://v.youku.com/v_show/id_XMjI0ODc1NTc2.html
+
+### 断点续传
+
+下载未完成时意外中止（因为网络中断或程序被强行终止等），在目标路径中会有一个扩展名为`.download`的临时文件。
+
+下次运行只要在目标路径中找到相应的`.download`临时文件，程序会自动从中断处继续下载。（除非指定了`-f`选项）
+
+### 使用Python 2？
+
+优酷等国内视频网站的下载，请移步：[iambus/youku-lixian](https://github.com/iambus/youku-lixian)
+
+YouTube等国外视频网站的下载，请移步：[rg3/youtube-dl](https://github.com/rg3/youtube-dl)
+
+### 许可证
+
+源码在MIT License下发布。
--- a/common.py
+++ b/common.py
@ -0,0 +1,430 @@
+#!/usr/bin/env python3
+
+import getopt
+import json
+import os
+import re
+import sys
+from urllib import request, parse
+
+try:
+    proj_info = json.loads(open('you-get.json').read())
+except:
+    proj_info = {'version': ''}
+
+force = False
+
+if sys.stdout.isatty():
+    default_encoding = sys.stdout.encoding.lower()
+else:
+    default_encoding = locale.getpreferredencoding().lower()
+
+def tr(s):
+    if default_encoding.startswith('utf') or default_encoding in ['cp936', '936', 'ms936', 'gbk']:
+        return s
+    else:
+        return s.encode('utf-8')
+
+def r1(pattern, text):
+    m = re.search(pattern, text)
+    if m:
+        return m.group(1)
+
+def r1_of(patterns, text):
+    for p in patterns:
+        x = r1(p, text)
+        if x:
+            return x
+
+def escape_file_path(path):
+    path = path.replace('/', '-')
+    path = path.replace('\\', '-')
+    path = path.replace('*', '-')
+    path = path.replace('?', '-')
+    return path
+
+def unescape_html(html):
+    from html import parser
+    html = parser.HTMLParser().unescape(html)
+    html = re.sub(r'&#(\d+);', lambda x: chr(int(x.group(1))), html)
+    return html
+
+def ungzip(s):
+    from io import BytesIO
+    import gzip
+    buffer = BytesIO(s)
+    f = gzip.GzipFile(fileobj = buffer)
+    return f.read()
+
+def undeflate(s):
+    import zlib
+    return zlib.decompress(s, -zlib.MAX_WBITS)
+
+def get_response(url):
+    response = request.urlopen(url)
+    data = response.read()
+    if response.info().get('Content-Encoding') == 'gzip':
+        data = ungzip(data)
+    elif response.info().get('Content-Encoding') == 'deflate':
+        data = undeflate(data)
+    response.data = data
+    return response
+
+def get_html(url, encoding = None):
+    content = get_response(url).data
+    return str(content, 'utf-8', 'ignore')
+
+def get_decoded_html(url):
+    response = get_response(url)
+    data = response.data
+    charset = r1(r'charset=([\w-]+)', response.headers['content-type'])
+    if charset:
+        return data.decode(charset)
+    else:
+        return data
+
+def url_size(url):
+    size = int(request.urlopen(url).headers['content-length'])
+    return size
+
+def urls_size(urls):
+    return sum(map(url_size, urls))
+
+def url_info(url):
+    response = request.urlopen(request.Request(url))
+    headers = response.headers
+    
+    type = headers['content-type']
+    mapping = {
+        'video/3gpp': '3gp',
+        'video/f4v': 'flv',
+        'video/mp4': 'mp4',
+        'video/webm': 'webm',
+        'video/x-flv': 'flv'
+    }
+    assert type in mapping, type
+    ext = mapping[type]
+    
+    size = int(headers['content-length'])
+    
+    return type, ext, size
+
+def url_save(url, filepath, bar, refer = None, is_part = False):
+    file_size = url_size(url)
+    
+    if os.path.exists(filepath):
+        if not force and file_size == os.path.getsize(filepath):
+            if not is_part:
+                if bar:
+                    bar.done()
+                print('Skipping %s: file already exists' % tr(os.path.basename(filepath)))
+            else:
+                if bar:
+                    bar.update_received(file_size)
+            return
+        else:
+            if not is_part:
+                if bar:
+                    bar.done()
+                print('Overwriting %s' % tr(os.path.basename(filepath)), '...')
+    elif not os.path.exists(os.path.dirname(filepath)):
+        os.mkdir(os.path.dirname(filepath))
+    
+    temp_filepath = filepath + '.download'
+    received = 0
+    if not force:
+        open_mode = 'ab'
+        
+        if os.path.exists(temp_filepath):
+            received += os.path.getsize(temp_filepath)
+            if bar:
+                bar.update_received(os.path.getsize(temp_filepath))
+    else:
+        open_mode = 'wb'
+    
+    if received < file_size:
+        headers = {}
+        if received:
+            headers['Range'] = 'bytes=' + str(received) + '-'
+        if refer:
+            headers['Referer'] = refer
+        
+        response = request.urlopen(request.Request(url, headers = headers), None)
+        assert file_size == received + int(response.headers['content-length'])
+        
+        with open(temp_filepath, open_mode) as output:
+            while True:
+                buffer = response.read(1024 * 256)
+                if not buffer:
+                    if received == file_size: # Download finished
+                        break
+                    else: # Unexpected termination. Retry request
+                        headers['Range'] = 'bytes=' + str(received) + '-'
+                        response = request.urlopen(request.Request(url, headers = headers), None)
+                output.write(buffer)
+                received += len(buffer)
+                if bar:
+                    bar.update_received(len(buffer))
+    
+    assert received == os.path.getsize(temp_filepath), '%s == %s == %s' % (received, os.path.getsize(temp_filepath))
+    
+    if os.access(filepath, os.W_OK):
+        os.remove(filepath) # on Windows rename could fail if destination filepath exists
+    os.rename(temp_filepath, filepath)
+
+class SimpleProgressBar:
+    def __init__(self, total_size, total_pieces = 1):
+        self.displayed = False
+        self.total_size = total_size
+        self.total_pieces = total_pieces
+        self.current_piece = 1
+        self.received = 0
+        
+    def update(self):
+        self.displayed = True
+        bar_size = 40
+        percent = round(self.received * 100 / self.total_size, 1)
+        if percent > 100:
+            percent = 100
+        dots = bar_size * int(percent) // 100
+        plus = int(percent) - dots // bar_size * 100
+        if plus > 0.8:
+            plus = '='
+        elif plus > 0.4:
+            plus = '>'
+        else:
+            plus = ''
+        bar = '=' * dots + plus
+        bar = '{0:>5}% ({1:>5}/{2:<5}MB) [{3:<40}] {4}/{5}'.format(percent, round(self.received / 1048576, 1), round(self.total_size / 1048576, 1), bar, self.current_piece, self.total_pieces)
+        sys.stdout.write('\r' + bar)
+        sys.stdout.flush()
+        
+    def update_received(self, n):
+        self.received += n
+        self.update()
+        
+    def update_piece(self, n):
+        self.current_piece = n
+        
+    def done(self):
+        if self.displayed:
+            print()
+            self.displayed = False
+
+class PiecesProgressBar:
+    def __init__(self, total_size, total_pieces = 1):
+        self.displayed = False
+        self.total_size = total_size
+        self.total_pieces = total_pieces
+        self.current_piece = 1
+        self.received = 0
+        
+    def update(self):
+        self.displayed = True
+        bar = '{0:>5}%[{1:<40}] {2}/{3}'.format('?', '?' * 40, self.current_piece, self.total_pieces)
+        sys.stdout.write('\r' + bar)
+        sys.stdout.flush()
+        
+    def update_received(self, n):
+        self.received += n
+        self.update()
+        
+    def update_piece(self, n):
+        self.current_piece = n
+        
+    def done(self):
+        if self.displayed:
+            print()
+            self.displayed = False
+
+class DummyProgressBar:
+    def __init__(self, *args):
+        pass
+    def update_received(self, n):
+        pass
+    def update_piece(self, n):
+        pass
+    def done(self):
+        pass
+
+def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None, merge = True):
+    assert urls
+    assert ext in ('3gp', 'flv', 'mp4', 'webm')
+    if not total_size:
+        try:
+            total_size = urls_size(urls)
+        except:
+            import traceback
+            import sys
+            traceback.print_exc(file = sys.stdout)
+            pass
+    title = escape_file_path(title)
+    filename = '%s.%s' % (title, ext)
+    filepath = os.path.join(output_dir, filename)
+    if total_size:
+        if not force and os.path.exists(filepath) and os.path.getsize(filepath) >= total_size * 0.9:
+            print('Skipping %s: file already exists' % tr(filepath))
+            return
+        bar = SimpleProgressBar(total_size, len(urls))
+    else:
+        bar = PiecesProgressBar(total_size, len(urls))
+    
+    if len(urls) == 1:
+        url = urls[0]
+        print('Downloading %s ...' % tr(filename))
+        url_save(url, filepath, bar, refer = refer)
+        bar.done()
+    else:
+        flvs = []
+        print('Downloading %s.%s ...' % (tr(title), ext))
+        for i, url in enumerate(urls):
+            filename = '%s[%02d].%s' % (title, i, ext)
+            filepath = os.path.join(output_dir, filename)
+            flvs.append(filepath)
+            #print 'Downloading %s [%s/%s]...' % (tr(filename), i + 1, len(urls))
+            bar.update_piece(i + 1)
+            url_save(url, filepath, bar, refer = refer, is_part = True)
+        bar.done()
+        if not merge:
+            return
+        if ext == 'flv':
+            from merge_flv import concat_flvs
+            concat_flvs(flvs, os.path.join(output_dir, title + '.flv'))
+            for flv in flvs:
+                os.remove(flv)
+        elif ext == 'mp4':
+            from merge_mp4 import concat_mp4s
+            concat_mp4s(flvs, os.path.join(output_dir, title + '.mp4'))
+            for flv in flvs:
+                os.remove(flv)
+        else:
+            print("Can't merge %s files" % ext)
+
+def playlist_not_supported(name):
+    def f(*args, **kwargs):
+        raise NotImplementedError('Playlist is not supported for ' + name)
+    return f
+
+def print_info(site_info, title, type, size):
+    if type in ['3gp']:
+        type = 'video/3gpp'
+    elif type in ['flv', 'f4v']:
+        type = 'video/x-flv'
+    elif type in ['mp4']:
+        type = 'video/mp4'
+    elif type in ['webm']:
+        type = 'video/webm'
+    
+    if type in ['video/3gpp']:
+        type_info = "3GPP multimedia file (%s)" % type
+    elif type in ['video/x-flv', 'video/f4v']:
+        type_info = "Flash video (%s)" % type
+    elif type in ['video/mp4', 'video/x-m4v']:
+        type_info = "MPEG-4 video (%s)" % type
+    elif type in ['video/webm']:
+        type_info = "WebM video (%s)" % type
+    #elif type in ['video/ogg']:
+    #    type_info = "Ogg video (%s)" % type
+    #elif type in ['video/quicktime']:
+    #    type_info = "QuickTime video (%s)" % type
+    #elif type in ['video/x-matroska']:
+    #    type_info = "Matroska video (%s)" % type
+    #elif type in ['video/x-ms-wmv']:
+    #    type_info = "Windows Media video (%s)" % type
+    #elif type in ['video/mpeg']:
+    #    type_info = "MPEG video (%s)" % type
+    else:
+        type_info = "Unknown type (%s)" % type
+    
+    print("Video Site:", site_info)
+    print("Title:     ", tr(title))
+    print("Type:      ", type_info)
+    print("Size:      ", round(size / 1048576, 2), "MB (" + str(size) + " Bytes)")
+
+def set_http_proxy(proxy):
+    if proxy == None: # Use system default setting
+        proxy_support = request.ProxyHandler()
+    elif proxy == '': # Don't use any proxy
+        proxy_support = request.ProxyHandler({})
+    else: # Use proxy
+        if not proxy.startswith('http://'):
+            proxy = 'http://' + proxy
+        proxy_support = request.ProxyHandler({'http': '%s' % proxy})
+    opener = request.build_opener(proxy_support)
+    request.install_opener(opener)
+
+def main(script_name, download, download_playlist = None):
+    version = 'You-Get %s, a video downloader.' % proj_info['version']
+    help = 'Usage: [python3] %s [OPTION]... [URL]...\n' % script_name
+    help += '''\nStartup options:
+    -V | --version                           Display the version and exit.
+    -h | --help                              Print this help and exit.
+    '''
+    help += '''\nDownload options (use with URLs):
+    -f | --force                             Force overwriting existed files.
+    -i | --info                              Display the information of videos without downloading.
+    -l | --playlist                          Download playlists. (only available for some sites)
+    -n | --no-merge                          Don't merge video parts.
+    -o | --output-dir <PATH>                 Set the output directory for downloaded videos.
+    -x | --http-proxy <PROXY-SERVER-IP:PORT> Use specific HTTP proxy for downloading.
+         --no-proxy                          Don't use any proxy. (ignore $http_proxy)
+    '''
+    
+    short_opts = 'Vhfino:x:'
+    opts = ['version', 'help', 'force', 'info', 'no-merge', 'no-proxy', 'output-dir=', 'http-proxy=']
+    if download_playlist:
+        short_opts = 'l' + short_opts
+        opts = ['playlist'] + opts
+    
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], short_opts, opts)
+    except getopt.GetoptError as err:
+        print(err)
+        print(help)
+        sys.exit(2)
+    
+    info_only = False
+    playlist = False
+    merge = True
+    output_dir = '.'
+    proxy = None
+    for o, a in opts:
+        if o in ('-V', '--version'):
+            print(version)
+            sys.exit()
+        elif o in ('-h', '--help'):
+            print(version)
+            print(help)
+            sys.exit()
+        elif o in ('-f', '--force'):
+            global force
+            force = True
+        elif o in ('-i', '--info'):
+            info_only = True
+        elif o in ('-l', '--playlist'):
+            playlist = True
+        elif o in ('-n', '--no-merge'):
+            merge = False
+        elif o in ('--no-proxy'):
+            proxy = ''
+        elif o in ('-o', '--output-dir'):
+            output_dir = a
+        elif o in ('-x', '--http-proxy'):
+            proxy = a
+        else:
+            print(help)
+            sys.exit(1)
+    if not args:
+        print(help)
+        sys.exit(1)
+    
+    set_http_proxy(proxy)
+    
+    for url in args:
+        if not url.startswith('http://'):
+            url = 'http://' + url
+        
+        if playlist:
+            download_playlist(url, output_dir = output_dir, merge = merge, info_only = info_only)
+        else:
+            download(url, output_dir = output_dir, merge = merge, info_only = info_only)
--- a/get.py
+++ b/get.py
@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+
+from common import *
+import get_tudou
+import get_yinyuetai
+import get_youku
+import get_youtube
+
+def url_to_module(url):
+    site = r1(r'http://([^/]+)/', url)
+    assert site, 'invalid url: ' + url
+    
+    if site.endswith('.com.cn'):
+        site = site[:-3]
+    domain = r1(r'(\.[^.]+\.[^.]+)$', site)
+    assert domain, 'unsupported url: ' + url
+    
+    k = r1(r'([^.]+)', domain)
+    downloads = {
+        'youtube': get_youtube,
+        'youku': get_youku,
+        'yinyuetai': get_yinyuetai,
+        'tudou': get_tudou,
+        #TODO:
+        # 'acfun': get_acfun,
+        # 'bilibili': get_bilibili,
+        # 'kankanews': get_bilibili,
+        # 'iask': get_iask,
+        # 'sina': get_iask,
+        # 'ku6': get_ku6,
+        # 'pptv': get_pptv,
+        # 'iqiyi': get_iqiyi,
+        # 'sohu': get_sohu,
+        # '56': get_w56,
+        # 'cntv': get_cntv,
+    }
+    if k in downloads:
+        return downloads[k]
+    else:
+        raise NotImplementedError(url)
+
+def any_download(url, output_dir = '.', merge = True, info_only = False):
+    m = url_to_module(url)
+    m.download(url, output_dir = output_dir, merge = merge, info_only = info_only)
+
+def any_download_playlist(url, output_dir = '.', merge = True, info_only = False):
+    m = url_to_module(url)
+    m.download_playlist(url, output_dir = output_dir, merge = merge, info_only = info_only)
+
+if __name__ == '__main__':
+    main('get.py', any_download, any_download_playlist)
--- a/get_tudou.py
+++ b/get_tudou.py
@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+
+__all__ = ['tudou_download', 'tudou_download_playlist', 'tudou_download_by_id', 'tudou_download_by_iid']
+
+from common import *
+
+def tudou_download_by_iid(iid, title, output_dir = '.', merge = True, info_only = False):
+    xml = get_html('http://v2.tudou.com/v?it=' + iid + '&st=1,2,3,4,99')
+    
+    from xml.dom.minidom import parseString
+    doc = parseString(xml)
+    title = title or doc.firstChild.getAttribute('tt') or doc.firstChild.getAttribute('title')
+    urls = [(int(n.getAttribute('brt')), n.firstChild.nodeValue.strip()) for n in doc.getElementsByTagName('f')]
+    
+    url = max(urls, key = lambda x:x[0])[1]
+    assert 'f4v' in url
+    
+    type, ext, size = url_info(url)
+    
+    print_info(site_info, title, type, size)
+    if not info_only:
+        #url_save(url, filepath, bar):
+        download_urls([url], title, ext, total_size = None, output_dir = output_dir, merge = merge)
+
+def tudou_download_by_id(id, title, output_dir = '.', merge = True):
+    html = get_html('http://www.tudou.com/programs/view/%s/' % id)
+    iid = r1(r'iid\s*=\s*(\S+)', html)
+    tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge)
+
+def tudou_download(url, output_dir = '.', merge = True, info_only = False):
+    html = get_decoded_html(url)
+    iid = r1(r'iid\s*[:=]\s*(\d+)', html)
+    assert iid
+    title = r1(r'kw\s*[:=]\s*"([^"]+)"', html)
+    assert title
+    title = unescape_html(title)
+    tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge, info_only = info_only)
+
+def parse_playlist(url):
+    #if r1('http://www.tudou.com/playlist/p/a(\d+)\.html', url):
+    #	html = get_html(url)
+    #	print re.search(r'<script>var.*?</script>', html, flags=re.S).group()
+    #else:
+    #	raise NotImplementedError(url)
+    raise NotImplementedError()
+
+def parse_playlist(url):
+    aid = r1('http://www.tudou.com/playlist/p/a(\d+)(?:i\d+)?\.html', url)
+    html = get_decoded_html(url)
+    if not aid:
+        aid = r1(r"aid\s*[:=]\s*'(\d+)'", html)
+    if re.match(r'http://www.tudou.com/albumcover/', url):
+        atitle = r1(r"title\s*:\s*'([^']+)'", html)
+    elif re.match(r'http://www.tudou.com/playlist/p/', url):
+        atitle = r1(r'atitle\s*=\s*"([^"]+)"', html)
+    else:
+        raise NotImplementedError(url)
+    assert aid
+    assert atitle
+    import json
+    #url = 'http://www.tudou.com/playlist/service/getZyAlbumItems.html?aid='+aid
+    url = 'http://www.tudou.com/playlist/service/getAlbumItems.html?aid='+aid
+    return [(atitle + '-' + x['title'], str(x['itemId'])) for x in json.loads(get_html(url))['message']]
+
+def tudou_download_playlist(url, create_dir = False, output_dir = '.', merge = True):
+    if create_dir:
+        raise NotImplementedError('please report a bug so I can implement this')
+    videos = parse_playlist(url)
+    for i, (title, id) in enumerate(videos):
+        print('Downloading %s of %s videos...' % (i + 1, len(videos)))
+        tudou_download_by_iid(id, title, output_dir = output_dir, merge = merge)
+
+site_info = "Tudou.com"
+download = tudou_download
+download_playlist = tudou_download_playlist
+
+if __name__ == '__main__':
+    main('tudou', tudou_download, tudou_download_playlist)
--- a/get_yinyuetai.py
+++ b/get_yinyuetai.py
@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+
+__all__ = ['yinyuetai_download', 'yinyuetai_download_by_id']
+
+from common import *
+
+def yinyuetai_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
+    assert title
+    html = get_html('http://www.yinyuetai.com/insite/get-video-info?flex=true&videoId=' + id)
+    #TODO: run a fully parse instead of text search
+    # url = r1(r'(http://flv.yinyuetai.com/uploads/videos/common/\w+\.flv\?t=[a-f0-9]{16})', html)
+    # url = r1(r'http://hc.yinyuetai.com/uploads/videos/common/[A-F0-9]{32}\.mp4\?v=\d{12}', html)
+    url = r1(r'(http://\w+\.yinyuetai\.com/uploads/videos/common/\w+\.(?:flv|mp4)\?(?:t=[a-f0-9]{16}|v=\d{12}))', html)
+    assert url
+    type, ext, size = url_info(url)
+    
+    print_info(site_info, title, type, size)
+    if not info_only:
+        download_urls([url], title, ext, size, output_dir, merge = merge)
+
+def yinyuetai_download(url, output_dir = '.', merge = True, info_only = False):
+    id = r1(r'http://www.yinyuetai.com/video/(\d+)$', url)
+    assert id
+    html = get_html(url, 'utf-8')
+    title = r1(r'<meta property="og:title" content="([^"]+)"/>', html)
+    assert title
+    title = parse.unquote(title)
+    title = escape_file_path(title)
+    yinyuetai_download_by_id(id, title, output_dir, merge = merge, info_only = info_only)
+
+site_info = "YinYueTai.com"
+download = yinyuetai_download
+download_playlist = playlist_not_supported('yinyuetai')
+
+if __name__ == '__main__':
+    main('get_yinyuetai.py', yinyuetai_download)
--- a/get_youku.py
+++ b/get_youku.py
@ -0,0 +1,173 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+__all__ = ['youku_download', 'youku_download_playlist', 'youku_download_by_id']
+
+from common import *
+
+import json
+from random import randint
+from time import time
+import re
+import sys
+
+def trim_title(title):
+    title = title.replace(' - 视频 - 优酷视频 - 在线观看', '')
+    title = title.replace(' - 专辑 - 优酷视频', '')
+    title = re.sub(r'—([^—]+)—优酷网，视频高清在线观看', '', title)
+    return title
+
+def find_video_id_from_url(url):
+    patterns = [r'^http://v.youku.com/v_show/id_([\w=]+).html',
+                r'^http://player.youku.com/player.php/sid/([\w=]+)/v.swf',
+                r'^loader\.swf\?VideoIDS=([\w=]+)',
+                r'^([\w=]+)$']
+    return r1_of(patterns, url)
+
+def find_video_id_from_show_page(url):
+    return re.search(r'<div class="btnplay">.*href="([^"]+)"', get_html(url)).group(1)
+
+def youku_url(url):
+    id = find_video_id_from_url(url)
+    if id:
+        return 'http://v.youku.com/v_show/id_%s.html' % id
+    if re.match(r'http://www.youku.com/show_page/id_\w+.html', url):
+        return find_video_id_from_show_page(url)
+    if re.match(r'http://v.youku.com/v_playlist/\w+.html', url):
+        return url
+    raise Exception('Invalid Youku URL: '+url)
+
+def parse_page(url):
+    url = youku_url(url)
+    page = get_html(url)
+    id2 = re.search(r"var\s+videoId2\s*=\s*'(\S+)'", page).group(1)
+    if re.search(r'v_playlist', url):
+        # if we are playing a video from playlist, the meta title might be incorrect
+        title = re.search(r'<title>([^<>]*)</title>', page).group(1)
+    else:
+        title = re.search(r'<meta name="title" content="([^"]*)">', page).group(1)
+    title = trim_title(title)
+    if re.search(r'v_playlist', url) and re.search(r'-.*\S+', title):
+        title = re.sub(r'^[^-]+-\s*', '', title) # remove the special name from title for playlist video
+    title = unescape_html(title)
+    subtitle = re.search(r'<span class="subtitle" id="subtitle">([^<>]*)</span>', page)
+    if subtitle:
+        subtitle = subtitle.group(1).strip()
+    if subtitle == title:
+        subtitle = None
+    return id2, title, subtitle
+
+def get_info(videoId2):
+    return json.loads(get_html('http://v.youku.com/player/getPlayList/VideoIDS/' + videoId2))
+
+def find_video(info, stream_type = None):
+    #key = '%s%x' % (info['data'][0]['key2'], int(info['data'][0]['key1'], 16) ^ 0xA55AA5A5)
+    segs = info['data'][0]['segs']
+    types = segs.keys()
+    if not stream_type:
+        for x in ['hd2', 'mp4', 'flv']:
+            if x in types:
+                stream_type = x
+                break
+        else:
+            raise NotImplementedError()
+    assert stream_type in ('hd2', 'mp4', 'flv')
+    file_type = {'hd2': 'flv', 'mp4': 'mp4', 'flv': 'flv'}[stream_type]
+    
+    seed = info['data'][0]['seed']
+    source = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\\:._-1234567890")
+    mixed = ''
+    while source:
+        seed = (seed * 211 + 30031) & 0xFFFF
+        index = seed * len(source) >> 16
+        c = source.pop(index)
+        mixed += c
+    
+    ids = info['data'][0]['streamfileids'][stream_type].split('*')[:-1]
+    vid = ''.join(mixed[int(i)] for i in ids)
+    
+    sid = '%s%s%s' % (int(time() * 1000), randint(1000, 1999), randint(1000, 9999))
+    
+    urls = []
+    for s in segs[stream_type]:
+        no = '%02x' % int(s['no'])
+        url = 'http://f.youku.com/player/getFlvPath/sid/%s_%s/st/%s/fileid/%s%s%s?K=%s&ts=%s' % (sid, no, file_type, vid[:8], no.upper(), vid[10:], s['k'], s['seconds'])
+        urls.append((url, int(s['size'])))
+    return urls
+
+def file_type_of_url(url):
+    return str(re.search(r'/st/([^/]+)/', url).group(1))
+
+def youku_download_by_id(id2, title, output_dir = '.', stream_type = None, merge = True, info_only = False):
+    info = get_info(id2)
+    urls, sizes = zip(*find_video(info, stream_type))
+    total_size = sum(sizes)
+    
+    print_info(site_info, title, file_type_of_url(urls[0]), total_size)
+    if not info_only:
+        download_urls(urls, title, file_type_of_url(urls[0]), total_size, output_dir, merge = merge)
+
+def youku_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False):
+    id2, title, subtitle = parse_page(url)
+    if subtitle:
+        title += '-' + subtitle
+    
+    youku_download_by_id(id2, title, output_dir, merge = merge, info_only = info_only)
+
+def parse_playlist_videos(html):
+    return re.findall(r'id="A_(\w+)"', html)
+
+def parse_playlist_pages(html):
+    m = re.search(r'<ul class="pages">.*?</ul>', html, flags = re.S)
+    if m:
+        urls = re.findall(r'href="([^"]+)"', m.group())
+        x1, x2, x3 = re.match(r'^(.*page_)(\d+)(_.*)$', urls[-1]).groups()
+        return ['http://v.youku.com%s%s%s?__rt=1&__ro=listShow' % (x1, i, x3) for i in range(2, int(x2) + 1)]
+    else:
+        return []
+
+def parse_playlist(url):
+    html = get_html(url)
+    video_id = re.search(r"var\s+videoId\s*=\s*'(\d+)'", html).group(1)
+    show_id = re.search(r'var\s+showid\s*=\s*"(\d+)"', html).group(1)
+    list_url = 'http://v.youku.com/v_vpofficiallist/page_1_showid_%s_id_%s.html?__rt=1&__ro=listShow' % (show_id, video_id)
+    html = get_html(list_url)
+    ids = parse_playlist_videos(html)
+    for url in parse_playlist_pages(html):
+        ids.extend(parse_playlist_videos(get_html(url)))
+    return ids
+
+def parse_vplaylist(url):
+    id = r1_of([r'^http://www.youku.com/playlist_show/id_(\d+)(?:_ascending_\d_mode_pic(?:_page_\d+)?)?.html',
+                r'^http://v.youku.com/v_playlist/f(\d+)o[01]p\d+.html',
+                r'^http://u.youku.com/user_playlist/pid_(\d+)_id_[\w=]+(?:_page_\d+)?.html'],
+                url)
+    assert id, 'not valid vplaylist url: ' + url
+    url = 'http://www.youku.com/playlist_show/id_%s.html' % id
+    n = int(re.search(r'<span class="num">(\d+)</span>', get_html(url)).group(1))
+    return ['http://v.youku.com/v_playlist/f%so0p%s.html' % (id, i) for i in range(n)]
+
+def youku_download_playlist(url, output_dir = '.', merge = True, info_only = False):
+    if re.match(r'http://www.youku.com/show_page/id_\w+.html', url):
+        url = find_video_id_from_show_page(url)
+    
+    if re.match(r'http://www.youku.com/playlist_show/id_\d+(?:_ascending_\d_mode_pic(?:_page_\d+)?)?.html', url):
+        ids = parse_vplaylist(url)
+    elif re.match(r'http://v.youku.com/v_playlist/f\d+o[01]p\d+.html', url):
+        ids = parse_vplaylist(url)
+    elif re.match(r'http://u.youku.com/user_playlist/pid_(\d+)_id_[\w=]+(?:_page_\d+)?.html', url):
+        ids = parse_vplaylist(url)
+    else:
+        assert re.match(r'http://v.youku.com/v_show/id_([\w=]+).html', url), 'URL not supported as playlist'
+        ids = parse_playlist(url)
+    
+    for i, id in enumerate(ids):
+        print('Processing %s of %s videos...' % (i + 1, len(ids)))
+        youku_download(id, output_dir, merge = merge, info_only = info_only)
+
+site_info = "Youku.com"
+download = youku_download
+download_playlist = youku_download_playlist
+
+if __name__ == '__main__':
+    main('get_youku.py', youku_download, youku_download_playlist)
--- a/get_youtube.py
+++ b/get_youtube.py
@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+
+__all__ = ['youtube_download', 'youtube_download_by_id']
+
+from common import *
+
+def youtube_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
+    try:
+        url = parse.parse_qs(parse.unquote(request.urlopen('http://www.youtube.com/get_video_info?&video_id=' + id).read().decode('utf-8')))['url_encoded_fmt_stream_map'][0][4:]
+    except:
+        url = parse.parse_qs(parse.unquote(request.urlopen('http://www.youtube.com/watch?v=' + id).read().decode('utf-8')))['url_encoded_fmt_stream_map'][0][4:]
+    type, ext, size = url_info(url)
+    
+    print_info(site_info, title, type, size)
+    if not info_only:
+        download_urls([url], title, ext, size, output_dir, merge = merge)
+
+def youtube_download(url, output_dir = '.', merge = True, info_only = False):
+    id = parse.parse_qs(parse.urlparse(url).query)['v'][0]
+    assert id
+    try:
+        title = parse.parse_qs(parse.unquote(request.urlopen('http://www.youtube.com/get_video_info?&video_id=' + id).read().decode('utf-8')))['title'][0]
+    except:
+        html = get_html(url, 'utf-8')
+        title = r1(r'"title": "([^"]+)"', html)
+    assert title
+    title = parse.unquote(title)
+    title = escape_file_path(title)
+    youtube_download_by_id(id, title, output_dir, merge = merge, info_only = info_only)
+
+site_info = "YouTube.com"
+download = youtube_download
+download_playlist = playlist_not_supported('youtube')
+
+if __name__ == '__main__':
+    main('get_youtube.py', youtube_download)
--- a/merge_flv.py
+++ b/merge_flv.py
@ -0,0 +1,365 @@
+#!/usr/bin/env python3
+
+import struct
+from io import BytesIO
+
+TAG_TYPE_METADATA = 18
+
+##################################################
+# AMF0
+##################################################
+
+AMF_TYPE_NUMBER = 0x00
+AMF_TYPE_BOOLEAN = 0x01
+AMF_TYPE_STRING = 0x02
+AMF_TYPE_OBJECT = 0x03
+AMF_TYPE_MOVIECLIP = 0x04
+AMF_TYPE_NULL = 0x05
+AMF_TYPE_UNDEFINED = 0x06
+AMF_TYPE_REFERENCE = 0x07
+AMF_TYPE_MIXED_ARRAY = 0x08
+AMF_TYPE_END_OF_OBJECT = 0x09
+AMF_TYPE_ARRAY = 0x0A
+AMF_TYPE_DATE = 0x0B
+AMF_TYPE_LONG_STRING = 0x0C
+AMF_TYPE_UNSUPPORTED = 0x0D
+AMF_TYPE_RECORDSET = 0x0E
+AMF_TYPE_XML = 0x0F
+AMF_TYPE_CLASS_OBJECT = 0x10
+AMF_TYPE_AMF3_OBJECT = 0x11
+
+class ECMAObject:
+    def __init__(self, max_number):
+        self.max_number = max_number
+        self.data = []
+        self.map = {}
+    def put(self, k, v):
+        self.data.append((k, v))
+        self.map[k] = v
+    def get(self, k):
+        return self.map[k]
+    def set(self, k, v):
+        for i in range(len(self.data)):
+            if self.data[i][0] == k:
+                self.data[i] = (k, v)
+                break
+        else:
+            raise KeyError(k)
+        self.map[k] = v
+    def keys(self):
+        return self.map.keys()
+    def __str__(self):
+        return 'ECMAObject<' + repr(self.map) + '>'
+    def __eq__(self, other):
+        return self.max_number == other.max_number and self.data == other.data
+
+def read_amf_number(stream):
+    return struct.unpack('>d', stream.read(8))[0]
+
+def read_amf_boolean(stream):
+    b = read_byte(stream)
+    assert b in (0, 1)
+    return bool(b)
+
+def read_amf_string(stream):
+    xx = stream.read(2)
+    if xx == b'':
+        # dirty fix for the invalid Qiyi flv
+        return None
+    n = struct.unpack('>H', xx)[0]
+    s = stream.read(n)
+    assert len(s) == n
+    return s.decode('utf-8')
+
+def read_amf_object(stream):
+    obj = {}
+    while True:
+        k = read_amf_string(stream)
+        if not k:
+            assert read_byte(stream) == AMF_TYPE_END_OF_OBJECT
+            break
+        v = read_amf(stream)
+        obj[k] = v
+    return obj
+
+def read_amf_mixed_array(stream):
+    max_number = read_uint(stream)
+    mixed_results = ECMAObject(max_number)
+    while True:
+        k = read_amf_string(stream)
+        if k is None:
+            # dirty fix for the invalid Qiyi flv
+            break
+        if not k:
+            assert read_byte(stream) == AMF_TYPE_END_OF_OBJECT
+            break
+        v = read_amf(stream)
+        mixed_results.put(k, v)
+    assert len(mixed_results.data) == max_number
+    return mixed_results
+
+def read_amf_array(stream):
+    n = read_uint(stream)
+    v = []
+    for i in range(n):
+        v.append(read_amf(stream))
+    return v
+
+amf_readers = {
+    AMF_TYPE_NUMBER: read_amf_number,
+    AMF_TYPE_BOOLEAN: read_amf_boolean,
+    AMF_TYPE_STRING: read_amf_string,
+    AMF_TYPE_OBJECT: read_amf_object,
+    AMF_TYPE_MIXED_ARRAY: read_amf_mixed_array,
+    AMF_TYPE_ARRAY: read_amf_array,
+}
+
+def read_amf(stream):
+    return amf_readers[read_byte(stream)](stream)
+
+def write_amf_number(stream, v):
+    stream.write(struct.pack('>d', v))
+
+def write_amf_boolean(stream, v):
+    if v:
+        stream.write(b'\x01')
+    else:
+        stream.write(b'\x00')
+
+def write_amf_string(stream, s):
+    s = s.encode('utf-8')
+    stream.write(struct.pack('>H', len(s)))
+    stream.write(s)
+
+def write_amf_object(stream, o):
+    for k in o:
+        write_amf_string(stream, k)
+        write_amf(stream, o[k])
+    write_amf_string(stream, '')
+    write_byte(stream, AMF_TYPE_END_OF_OBJECT)
+
+def write_amf_mixed_array(stream, o):
+    write_uint(stream, o.max_number)
+    for k, v in o.data:
+        write_amf_string(stream, k)
+        write_amf(stream, v)
+    write_amf_string(stream, '')
+    write_byte(stream, AMF_TYPE_END_OF_OBJECT)
+
+def write_amf_array(stream, o):
+    write_uint(stream, len(o))
+    for v in o:
+        write_amf(stream, v)
+
+amf_writers_tags = {
+    float: AMF_TYPE_NUMBER,
+    bool: AMF_TYPE_BOOLEAN,
+    str: AMF_TYPE_STRING,
+    dict: AMF_TYPE_OBJECT,
+    ECMAObject: AMF_TYPE_MIXED_ARRAY,
+    list: AMF_TYPE_ARRAY,
+}
+
+amf_writers = {
+    AMF_TYPE_NUMBER: write_amf_number,
+    AMF_TYPE_BOOLEAN: write_amf_boolean,
+    AMF_TYPE_STRING: write_amf_string,
+    AMF_TYPE_OBJECT: write_amf_object,
+    AMF_TYPE_MIXED_ARRAY: write_amf_mixed_array,
+    AMF_TYPE_ARRAY: write_amf_array,
+}
+
+def write_amf(stream, v):
+    if isinstance(v, ECMAObject):
+        tag = amf_writers_tags[ECMAObject]
+    else:
+        tag = amf_writers_tags[type(v)]
+    write_byte(stream, tag)
+    amf_writers[tag](stream, v)
+
+##################################################
+# FLV
+##################################################
+
+def read_int(stream):
+    return struct.unpack('>i', stream.read(4))[0]
+
+def read_uint(stream):
+    return struct.unpack('>I', stream.read(4))[0]
+
+def write_uint(stream, n):
+    stream.write(struct.pack('>I', n))
+
+def read_byte(stream):
+    return ord(stream.read(1))
+
+def write_byte(stream, b):
+    stream.write(bytes([b]))
+
+def read_unsigned_medium_int(stream):
+    x1, x2, x3 = struct.unpack('BBB', stream.read(3))
+    return (x1 << 16) | (x2 << 8) | x3
+
+def read_tag(stream):
+    # header size: 15 bytes
+    header = stream.read(15)
+    if len(header) == 4:
+        return
+    x = struct.unpack('>IBBBBBBBBBBB', header)
+    previous_tag_size = x[0]
+    data_type = x[1]
+    body_size = (x[2] << 16) | (x[3] << 8) | x[4]
+    assert body_size < 1024 * 1024 * 128, 'tag body size too big (> 128MB)'
+    timestamp = (x[5] << 16) | (x[6] << 8) | x[7]
+    timestamp += x[8] << 24
+    assert x[9:] == (0, 0, 0)
+    body = stream.read(body_size)
+    return (data_type, timestamp, body_size, body, previous_tag_size)
+    #previous_tag_size = read_uint(stream)
+    #data_type = read_byte(stream)
+    #body_size = read_unsigned_medium_int(stream)
+    #assert body_size < 1024*1024*128, 'tag body size too big (> 128MB)'
+    #timestamp = read_unsigned_medium_int(stream)
+    #timestamp += read_byte(stream) << 24
+    #assert read_unsigned_medium_int(stream) == 0
+    #body = stream.read(body_size)
+    #return (data_type, timestamp, body_size, body, previous_tag_size)
+
+def write_tag(stream, tag):
+    data_type, timestamp, body_size, body, previous_tag_size = tag
+    write_uint(stream, previous_tag_size)
+    write_byte(stream, data_type)
+    write_byte(stream, body_size>>16 & 0xff)
+    write_byte(stream, body_size>>8  & 0xff)
+    write_byte(stream, body_size     & 0xff)
+    write_byte(stream, timestamp>>16 & 0xff)
+    write_byte(stream, timestamp>>8  & 0xff)
+    write_byte(stream, timestamp     & 0xff)
+    write_byte(stream, timestamp>>24 & 0xff)
+    stream.write(b'\0\0\0')
+    stream.write(body)
+
+def read_flv_header(stream):
+    assert stream.read(3) == b'FLV'
+    header_version = read_byte(stream)
+    assert header_version == 1
+    type_flags = read_byte(stream)
+    assert type_flags == 5
+    data_offset = read_uint(stream)
+    assert data_offset == 9
+
+def write_flv_header(stream):
+    stream.write(b'FLV')
+    write_byte(stream, 1)
+    write_byte(stream, 5)
+    write_uint(stream, 9)
+
+def read_meta_data(stream):
+    meta_type = read_amf(stream)
+    meta = read_amf(stream)
+    return meta_type, meta
+
+def read_meta_tag(tag):
+    data_type, timestamp, body_size, body, previous_tag_size = tag
+    assert data_type == TAG_TYPE_METADATA
+    assert timestamp == 0
+    assert previous_tag_size == 0
+    return read_meta_data(BytesIO(body))
+
+#def write_meta_data(stream, meta_type, meta_data):
+#    assert isinstance(meta_type, basesting)
+#    write_amf(meta_type)
+#    write_amf(meta_data)
+
+def write_meta_tag(stream, meta_type, meta_data):
+    buffer = BytesIO()
+    write_amf(buffer, meta_type)
+    write_amf(buffer, meta_data)
+    body = buffer.getvalue()
+    write_tag(stream, (TAG_TYPE_METADATA, 0, len(body), body, 0))
+
+
+##################################################
+# main
+##################################################
+
+def guess_output(inputs):
+    import os.path
+    inputs = map(os.path.basename, inputs)
+    n = min(map(len, inputs))
+    for i in reversed(range(1, n)):
+        if len(set(s[:i] for s in inputs)) == 1:
+            return inputs[0][:i] + '.flv'
+    return 'output.flv'
+
+def concat_flvs(flvs, output = None):
+    assert flvs, 'no flv file found'
+    import os.path
+    if not output:
+        output = guess_output(flvs)
+    elif os.path.isdir(output):
+        output = os.path.join(output, guess_output(flvs))
+    
+    print('Merging video parts...')
+    ins = [open(flv, 'rb') for flv in flvs]
+    for stream in ins:
+        read_flv_header(stream)
+    meta_tags = map(read_tag, ins)
+    metas = list(map(read_meta_tag, meta_tags))
+    meta_types, metas = zip(*metas)
+    assert len(set(meta_types)) == 1
+    meta_type = meta_types[0]
+    
+    # must merge fields: duration
+    # TODO: check other meta info, update other meta info
+    total_duration = sum(meta.get('duration') for meta in metas)
+    meta_data = metas[0]
+    meta_data.set('duration', total_duration)
+    
+    out = open(output, 'wb')
+    write_flv_header(out)
+    write_meta_tag(out, meta_type, meta_data)
+    timestamp_start = 0
+    for stream in ins:
+        while True:
+            tag = read_tag(stream)
+            if tag:
+                data_type, timestamp, body_size, body, previous_tag_size = tag
+                timestamp += timestamp_start
+                tag = data_type, timestamp, body_size, body, previous_tag_size
+                write_tag(out, tag)
+            else:
+                break
+        timestamp_start = timestamp
+    write_uint(out, previous_tag_size)
+    
+    return output
+
+def usage():
+    print('Usage: [python3] merge_flv.py --output TARGET.flv flv...')
+
+def main():
+    import sys, getopt
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "ho:", ["help", "output="])
+    except getopt.GetoptError as err:
+        usage()
+        sys.exit(1)
+    output = None
+    for o, a in opts:
+        if o in ("-h", "--help"):
+            usage()
+            sys.exit()
+        elif o in ("-o", "--output"):
+            output = a
+        else:
+            usage()
+            sys.exit(1)
+    if not args:
+        usage()
+        sys.exit(1)
+    
+    concat_flvs(args, output)
+
+if __name__ == '__main__':
+    main()
--- a/merge_mp4.py
+++ b/merge_mp4.py
@ -0,0 +1,907 @@
+#!/usr/bin/env python3
+
+# reference: c041828_ISO_IEC_14496-12_2005(E).pdf
+
+##################################################
+# reader and writer
+##################################################
+
+import struct
+from io import BytesIO
+
+def skip(stream, n):
+    stream.seek(stream.tell() + n)
+
+def skip_zeros(stream, n):
+    assert stream.read(n) == b'\x00' * n
+
+def read_int(stream):
+    return struct.unpack('>i', stream.read(4))[0]
+
+def read_uint(stream):
+    return struct.unpack('>I', stream.read(4))[0]
+
+def write_uint(stream, n):
+    stream.write(struct.pack('>I', n))
+
+def read_ushort(stream):
+    return struct.unpack('>H', stream.read(2))[0]
+
+def read_ulong(stream):
+    return struct.unpack('>Q', stream.read(8))[0]
+
+def read_byte(stream):
+    return ord(stream.read(1))
+
+def copy_stream(source, target, n):
+    buffer_size = 1024 * 1024
+    while n > 0:
+        to_read = min(buffer_size, n)
+        s = source.read(to_read)
+        assert len(s) == to_read, 'no enough data'
+        target.write(s)
+        n -= to_read
+
+class Atom:
+    def __init__(self, type, size, body):
+        assert len(type) == 4
+        self.type = type
+        self.size = size
+        self.body = body
+    def __str__(self):
+        #return '<Atom(%s):%s>' % (self.type, repr(self.body))
+        return '<Atom(%s):%s>' % (self.type, '')
+    def __repr__(self):
+        return str(self)
+    def write1(self, stream):
+        write_uint(stream, self.size)
+        stream.write(self.type)
+    def write(self, stream):
+        assert type(self.body) == bytes, '%s: %s' % (self.type, type(self.body))
+        assert self.size == 8 + len(self.body)
+        self.write1(stream)
+        stream.write(self.body)
+    def calsize(self):
+        return self.size
+
+class CompositeAtom(Atom):
+    def __init__(self, type, size, body):
+        assert isinstance(body, list)
+        Atom.__init__(self, type, size, body)
+    def write(self, stream):
+        assert type(self.body) == list
+        self.write1(stream)
+        for atom in self.body:
+            atom.write(stream)
+    def calsize(self):
+        self.size = 8 + sum([atom.calsize() for atom in self.body])
+        return self.size
+    def get1(self, k):
+        for a in self.body:
+            if a.type == k:
+                return a
+        else:
+            raise Exception('atom not found: ' + k)
+    def get(self, *keys):
+        atom = self
+        for k in keys:
+            atom = atom.get1(k)
+        return atom
+    def get_all(self, k):
+        return list(filter(lambda x: x.type == k, self.body))
+
+class VariableAtom(Atom):
+    def __init__(self, type, size, body, variables):
+        assert isinstance(body, bytes)
+        Atom.__init__(self, type, size, body)
+        self.variables = variables
+    def write(self, stream):
+        self.write1(stream)
+        i = 0
+        n = 0
+        for name, offset, value in self.variables:
+            stream.write(self.body[i:offset])
+            write_uint(stream, value)
+            n += offset - i + 4
+            i = offset + 4
+        stream.write(self.body[i:])
+        n += len(self.body) - i
+        assert n == len(self.body)
+    def get(self, k):
+        for v in self.variables:
+            if v[0] == k:
+                return v[2]
+        else:
+            raise Exception('field not found: ' + k)
+    def set(self, k, v):
+        for i in range(len(self.variables)):
+            variable = self.variables[i]
+            if variable[0] == k:
+                self.variables[i] = (k, variable[1], v)
+                break
+        else:
+            raise Exception('field not found: '+k)
+
+def read_raw(stream, size, left, type):
+    assert size == left + 8
+    body = stream.read(left)
+    return Atom(type, size, body)
+
+def read_body_stream(stream, left):
+    body = stream.read(left)
+    assert len(body) == left
+    return body, BytesIO(body)
+
+def read_full_atom(stream):
+    value = read_uint(stream)
+    version = value >> 24
+    flags = value & 0xffffff
+    assert version == 0
+    return value
+
+def read_mvhd(stream, size, left, type):
+    body, stream = read_body_stream(stream, left)
+    value = read_full_atom(stream)
+    left -= 4
+    
+    # new Date(movieTime * 1000 - 2082850791998L); 
+    creation_time = read_uint(stream)
+    modification_time = read_uint(stream)
+    time_scale = read_uint(stream)
+    duration = read_uint(stream)
+    left -= 16
+    
+    qt_preferred_fate = read_uint(stream)
+    qt_preferred_volume = read_ushort(stream)
+    assert stream.read(10) == b'\x00' * 10
+    qt_matrixA = read_uint(stream)
+    qt_matrixB = read_uint(stream)
+    qt_matrixU = read_uint(stream)
+    qt_matrixC = read_uint(stream)
+    qt_matrixD = read_uint(stream)
+    qt_matrixV = read_uint(stream)
+    qt_matrixX = read_uint(stream)
+    qt_matrixY = read_uint(stream)
+    qt_matrixW = read_uint(stream)
+    qt_previewTime = read_uint(stream)
+    qt_previewDuration = read_uint(stream)
+    qt_posterTime = read_uint(stream)
+    qt_selectionTime = read_uint(stream)
+    qt_selectionDuration = read_uint(stream)
+    qt_currentTime = read_uint(stream)
+    nextTrackID = read_uint(stream)
+    left -= 80
+    assert left == 0
+    return VariableAtom(b'mvhd', size, body, [('duration', 16, duration)])
+
+def read_tkhd(stream, size, left, type):
+    body, stream = read_body_stream(stream, left)
+    value = read_full_atom(stream)
+    left -= 4
+    
+    # new Date(movieTime * 1000 - 2082850791998L); 
+    creation_time = read_uint(stream)
+    modification_time = read_uint(stream)
+    track_id = read_uint(stream)
+    assert stream.read(4) == b'\x00' * 4
+    duration = read_uint(stream)
+    left -= 20
+    
+    assert stream.read(8) == b'\x00' * 8
+    qt_layer = read_ushort(stream)
+    qt_alternate_group = read_ushort(stream)
+    qt_volume = read_ushort(stream)
+    assert stream.read(2) == b'\x00\x00'
+    qt_matrixA = read_uint(stream)
+    qt_matrixB = read_uint(stream)
+    qt_matrixU = read_uint(stream)
+    qt_matrixC = read_uint(stream)
+    qt_matrixD = read_uint(stream)
+    qt_matrixV = read_uint(stream)
+    qt_matrixX = read_uint(stream)
+    qt_matrixY = read_uint(stream)
+    qt_matrixW = read_uint(stream)
+    qt_track_width = read_uint(stream)
+    width = qt_track_width >> 16
+    qt_track_height = read_uint(stream)
+    height = qt_track_height >> 16
+    left -= 60
+    assert left == 0
+    return VariableAtom(b'tkhd', size, body, [('duration', 20, duration)])
+
+def read_mdhd(stream, size, left, type):
+    body, stream = read_body_stream(stream, left)
+    value = read_full_atom(stream)
+    left -= 4
+    
+    # new Date(movieTime * 1000 - 2082850791998L); 
+    creation_time = read_uint(stream)
+    modification_time = read_uint(stream)
+    time_scale = read_uint(stream)
+    duration = read_uint(stream)
+    left -= 16
+    
+    packed_language = read_ushort(stream)
+    qt_quality = read_ushort(stream)
+    left -= 4
+    
+    assert left == 0
+    return VariableAtom(b'mdhd', size, body, [('duration', 16, duration)])
+
+def read_hdlr(stream, size, left, type):
+    body, stream = read_body_stream(stream, left)
+    value = read_full_atom(stream)
+    left -= 4
+    
+    qt_component_type = read_uint(stream)
+    handler_type = read_uint(stream)
+    qt_component_manufacturer = read_uint(stream)
+    qt_component_flags = read_uint(stream)
+    qt_component_flags_mask = read_uint(stream)
+    left -= 20
+    
+    track_name = stream.read(left - 1)
+    assert stream.read(1) == b'\x00'
+    
+    return Atom(b'hdlr', size, body)
+
+def read_vmhd(stream, size, left, type):
+    body, stream = read_body_stream(stream, left)
+    value = read_full_atom(stream)
+    left -= 4
+    
+    assert left == 8
+    graphic_mode = read_ushort(stream)
+    op_color_read = read_ushort(stream)
+    op_color_green = read_ushort(stream)
+    op_color_blue = read_ushort(stream)
+    
+    return Atom(b'vmhd', size, body)
+
+def read_stsd(stream, size, left, type):
+    value = read_full_atom(stream)
+    left -= 4
+    
+    entry_count = read_uint(stream)
+    left -= 4
+    
+    children = []
+    for i in range(entry_count):
+        atom = read_atom(stream)
+        children.append(atom)
+        left -= atom.size
+    
+    assert left == 0
+    #return Atom('stsd', size, children)
+    class stsd_atom(Atom):
+        def __init__(self, type, size, body):
+            Atom.__init__(self, type, size, body)
+        def write(self, stream):
+            self.write1(stream)
+            write_uint(stream, self.body[0])
+            write_uint(stream, len(self.body[1]))
+            for atom in self.body[1]:
+                atom.write(stream)
+        def calsize(self):
+            oldsize = self.size # TODO: remove
+            self.size = 8 + 4 + 4 + sum([atom.calsize() for atom in self.body[1]])
+            assert oldsize == self.size, '%s: %d, %d' % (self.type, oldsize, self.size) # TODO: remove
+            return self.size
+    return stsd_atom(b'stsd', size, (value, children))
+
+def read_avc1(stream, size, left, type):
+    body, stream = read_body_stream(stream, left)
+    
+    skip_zeros(stream, 6)
+    data_reference_index = read_ushort(stream)
+    skip_zeros(stream, 2)
+    skip_zeros(stream, 2)
+    skip_zeros(stream, 12)
+    width = read_ushort(stream)
+    height = read_ushort(stream)
+    horizontal_rez = read_uint(stream) >> 16
+    vertical_rez = read_uint(stream) >> 16
+    assert stream.read(4) == b'\x00' * 4
+    frame_count = read_ushort(stream)
+    string_len = read_byte(stream)
+    compressor_name = stream.read(31)
+    depth = read_ushort(stream)
+    assert stream.read(2) == b'\xff\xff'
+    left -= 78
+    
+    child = read_atom(stream)
+    assert child.type in (b'avcC', b'pasp'), 'if the sub atom is not avcC or pasp (actual %s), you should not cache raw body' % child.type
+    left -= child.size
+    stream.read(left) # XXX
+    return Atom(b'avc1', size, body)
+
+def read_avcC(stream, size, left, type):
+    stream.read(left)
+    return Atom(b'avcC', size, None)
+
+def read_stts(stream, size, left, type):
+    value = read_full_atom(stream)
+    left -= 4
+    
+    entry_count = read_uint(stream)
+    assert entry_count == 1
+    left -= 4
+    
+    samples = []
+    for i in range(entry_count):
+            sample_count = read_uint(stream)
+            sample_duration = read_uint(stream)
+            samples.append((sample_count, sample_duration))
+            left -= 8
+    
+    assert left == 0
+    #return Atom('stts', size, None)
+    class stts_atom(Atom):
+        def __init__(self, type, size, body):
+            Atom.__init__(self, type, size, body)
+        def write(self, stream):
+            self.write1(stream)
+            write_uint(stream, self.body[0])
+            write_uint(stream, len(self.body[1]))
+            for sample_count, sample_duration in self.body[1]:
+                write_uint(stream, sample_count)
+                write_uint(stream, sample_duration)
+        def calsize(self):
+            oldsize = self.size # TODO: remove
+            self.size = 8 + 4 + 4 + len(self.body[1]) * 8
+            assert oldsize == self.size, '%s: %d, %d' % (self.type, oldsize, self.size) # TODO: remove
+            return self.size
+    return stts_atom(b'stts', size, (value, samples))
+
+def read_stss(stream, size, left, type):
+    value = read_full_atom(stream)
+    left -= 4
+    
+    entry_count = read_uint(stream)
+    left -= 4
+    
+    samples = []
+    for i in range(entry_count):
+            sample = read_uint(stream)
+            samples.append(sample)
+            left -= 4
+    
+    assert left == 0
+    #return Atom('stss', size, None)
+    class stss_atom(Atom):
+        def __init__(self, type, size, body):
+            Atom.__init__(self, type, size, body)
+        def write(self, stream):
+            self.write1(stream)
+            write_uint(stream, self.body[0])
+            write_uint(stream, len(self.body[1]))
+            for sample in self.body[1]:
+                write_uint(stream, sample)
+        def calsize(self):
+            self.size = 8 + 4 + 4 + len(self.body[1]) * 4
+            return self.size
+    return stss_atom(b'stss', size, (value, samples))
+
+def read_stsc(stream, size, left, type):
+    value = read_full_atom(stream)
+    left -= 4
+    
+    entry_count = read_uint(stream)
+    left -= 4
+    
+    chunks = []
+    for i in range(entry_count):
+        first_chunk = read_uint(stream)
+        samples_per_chunk = read_uint(stream)
+        sample_description_index = read_uint(stream)
+        assert sample_description_index == 1 # what is it?
+        chunks.append((first_chunk, samples_per_chunk, sample_description_index))
+        left -= 12
+    #chunks, samples = zip(*chunks)
+    #total = 0
+    #for c, s in zip(chunks[1:], samples):
+    #	total += c*s
+    #print 'total', total
+    
+    assert left == 0
+    #return Atom('stsc', size, None)
+    class stsc_atom(Atom):
+        def __init__(self, type, size, body):
+            Atom.__init__(self, type, size, body)
+        def write(self, stream):
+            self.write1(stream)
+            write_uint(stream, self.body[0])
+            write_uint(stream, len(self.body[1]))
+            for first_chunk, samples_per_chunk, sample_description_index in self.body[1]:
+                write_uint(stream, first_chunk)
+                write_uint(stream, samples_per_chunk)
+                write_uint(stream, sample_description_index)
+        def calsize(self):
+            self.size = 8 + 4 + 4 + len(self.body[1]) * 12
+            return self.size
+    return stsc_atom(b'stsc', size, (value, chunks))
+
+def read_stsz(stream, size, left, type):
+    value = read_full_atom(stream)
+    left -= 4
+    
+    sample_size = read_uint(stream)
+    sample_count = read_uint(stream)
+    left -= 8
+    
+    assert sample_size == 0
+    total = 0
+    sizes = []
+    if sample_size == 0:
+        for i in range(sample_count):
+            entry_size = read_uint(stream)
+            sizes.append(entry_size)
+            total += entry_size
+            left -= 4
+    
+    assert left == 0
+    #return Atom('stsz', size, None)
+    class stsz_atom(Atom):
+        def __init__(self, type, size, body):
+            Atom.__init__(self, type, size, body)
+        def write(self, stream):
+            self.write1(stream)
+            write_uint(stream, self.body[0])
+            write_uint(stream, self.body[1])
+            write_uint(stream, self.body[2])
+            for entry_size in self.body[3]:
+                write_uint(stream, entry_size)
+        def calsize(self):
+            self.size = 8 + 4 + 8 + len(self.body[3]) * 4
+            return self.size
+    return stsz_atom(b'stsz', size, (value, sample_size, sample_count, sizes))
+
+def read_stco(stream, size, left, type):
+    value = read_full_atom(stream)
+    left -= 4
+    
+    entry_count = read_uint(stream)
+    left -= 4
+    
+    offsets = []
+    for i in range(entry_count):
+        chunk_offset = read_uint(stream)
+        offsets.append(chunk_offset)
+        left -= 4
+    
+    assert left == 0
+    #return Atom('stco', size, None)
+    class stco_atom(Atom):
+        def __init__(self, type, size, body):
+            Atom.__init__(self, type, size, body)
+        def write(self, stream):
+            self.write1(stream)
+            write_uint(stream, self.body[0])
+            write_uint(stream, len(self.body[1]))
+            for chunk_offset in self.body[1]:
+                write_uint(stream, chunk_offset)
+        def calsize(self):
+            self.size = 8 + 4 + 4 + len(self.body[1]) * 4
+            return self.size
+    return stco_atom(b'stco', size, (value, offsets))
+
+def read_ctts(stream, size, left, type):
+    value = read_full_atom(stream)
+    left -= 4
+    
+    entry_count = read_uint(stream)
+    left -= 4
+    
+    samples = []
+    for i in range(entry_count):
+        sample_count = read_uint(stream)
+        sample_offset = read_uint(stream)
+        samples.append((sample_count, sample_offset))
+        left -= 8
+    
+    assert left == 0
+    class ctts_atom(Atom):
+        def __init__(self, type, size, body):
+            Atom.__init__(self, type, size, body)
+        def write(self, stream):
+            self.write1(stream)
+            write_uint(stream, self.body[0])
+            write_uint(stream, len(self.body[1]))
+            for sample_count, sample_offset in self.body[1]:
+                write_uint(stream, sample_count)
+                write_uint(stream, sample_offset)
+        def calsize(self):
+            self.size = 8 + 4 + 4 + len(self.body[1]) * 8
+            return self.size
+    return ctts_atom(b'ctts', size, (value, samples))
+
+def read_smhd(stream, size, left, type):
+    body, stream = read_body_stream(stream, left)
+    value = read_full_atom(stream)
+    left -= 4
+    
+    balance = read_ushort(stream)
+    assert stream.read(2) == b'\x00\x00'
+    left -= 4
+    
+    assert left == 0
+    return Atom(b'smhd', size, body)
+
+def read_mp4a(stream, size, left, type):
+    body, stream = read_body_stream(stream, left)
+    
+    assert stream.read(6) == b'\x00' * 6
+    data_reference_index = read_ushort(stream)
+    assert stream.read(8) == b'\x00' * 8
+    channel_count = read_ushort(stream)
+    sample_size = read_ushort(stream)
+    assert stream.read(4) == b'\x00' * 4
+    time_scale = read_ushort(stream)
+    assert stream.read(2) == b'\x00' * 2
+    left -= 28
+    
+    atom = read_atom(stream)
+    assert atom.type == b'esds'
+    left -= atom.size
+    
+    assert left == 0
+    return Atom(b'mp4a', size, body)
+
+def read_descriptor(stream):
+    tag = read_byte(stream)
+    raise NotImplementedError()
+
+def read_esds(stream, size, left, type):
+    value = read_uint(stream)
+    version = value >> 24
+    assert version == 0
+    flags = value & 0xffffff
+    left -= 4
+    
+    body = stream.read(left)
+    return Atom(b'esds', size, None)
+
+def read_composite_atom(stream, size, left, type):
+    children = []
+    while left > 0:
+        atom = read_atom(stream)
+        children.append(atom)
+        left -= atom.size
+    assert left == 0, left
+    return CompositeAtom(type, size, children)
+
+def read_mdat(stream, size, left, type):
+    source_start = stream.tell()
+    source_size = left
+    skip(stream, left)
+    #return Atom(type, size, None)
+    #raise NotImplementedError()
+    class mdat_atom(Atom):
+        def __init__(self, type, size, body):
+            Atom.__init__(self, type, size, body)
+        def write(self, stream):
+            self.write1(stream)
+            self.write2(stream)
+        def write2(self, stream):
+            source, source_start, source_size = self.body
+            original = source.tell()
+            source.seek(source_start)
+            copy_stream(source, stream, source_size)
+        def calsize(self):
+            return self.size
+    return mdat_atom(b'mdat', size, (stream, source_start, source_size))
+
+atom_readers = {
+    b'mvhd': read_mvhd, # merge duration
+    b'tkhd': read_tkhd, # merge duration
+    b'mdhd': read_mdhd, # merge duration
+    b'hdlr': read_hdlr, # nothing
+    b'vmhd': read_vmhd, # nothing
+    b'stsd': read_stsd, # nothing
+    b'avc1': read_avc1, # nothing
+    b'avcC': read_avcC, # nothing
+    b'stts': read_stts, # sample_count, sample_duration
+    b'stss': read_stss, # join indexes
+    b'stsc': read_stsc, # merge # sample numbers
+    b'stsz': read_stsz, # merge # samples
+    b'stco': read_stco, # merge # chunk offsets
+    b'ctts': read_ctts, # merge
+    b'smhd': read_smhd, # nothing
+    b'mp4a': read_mp4a, # nothing
+    b'esds': read_esds, # noting
+    
+    b'ftyp': read_raw,
+    b'yqoo': read_raw,
+    b'moov': read_composite_atom,
+    b'trak': read_composite_atom,
+    b'mdia': read_composite_atom,
+    b'minf': read_composite_atom,
+    b'dinf': read_composite_atom,
+    b'stbl': read_composite_atom,
+    b'iods': read_raw,
+    b'dref': read_raw,
+    b'free': read_raw,
+    b'edts': read_raw,
+    b'pasp': read_raw,
+    
+    b'mdat': read_mdat,
+}
+#stsd sample descriptions (codec types, initialization etc.) 
+#stts (decoding) time-to-sample  
+#ctts (composition) time to sample 
+#stsc sample-to-chunk, partial data-offset information 
+#stsz sample sizes (framing) 
+#stz2 compact sample sizes (framing) 
+#stco chunk offset, partial data-offset information 
+#co64 64-bit chunk offset 
+#stss sync sample table (random access points) 
+#stsh shadow sync sample table 
+#padb sample padding bits 
+#stdp sample degradation priority 
+#sdtp independent and disposable samples 
+#sbgp sample-to-group 
+#sgpd sample group description 
+#subs sub-sample information
+
+
+def read_atom(stream):
+    header = stream.read(8)
+    if not header:
+        return
+    assert len(header) == 8
+    n = 0
+    size = struct.unpack('>I', header[:4])[0]
+    assert size > 0
+    n += 4
+    type = header[4:8]
+    n += 4
+    assert type != b'uuid'
+    if size == 1:
+        size = read_ulong(stream)
+        n += 8
+    
+    left = size - n
+    if type in atom_readers:
+        return atom_readers[type](stream, size, left, type)
+    raise NotImplementedError('%s: %d' % (type, left))
+
+def write_atom(stream, atom):
+    atom.write(stream)
+
+def parse_atoms(stream):
+    atoms = []
+    while True:
+        atom = read_atom(stream)
+        if atom:
+            atoms.append(atom)
+        else:
+            break
+    return atoms
+
+def read_mp4(stream):
+    atoms = parse_atoms(stream)
+    moov = list(filter(lambda x: x.type == b'moov', atoms))
+    mdat = list(filter(lambda x: x.type == b'mdat', atoms))
+    assert len(moov) == 1
+    assert len(mdat) == 1
+    moov = moov[0]
+    mdat = mdat[0]
+    return atoms, moov, mdat
+
+##################################################
+# merge
+##################################################
+
+def merge_stts(samples_list):
+    sample_list = []
+    for samples in samples_list:
+        assert len(samples) == 1
+        sample_list.append(samples[0])
+    counts, durations = zip(*sample_list)
+    assert len(set(durations)) == 1, 'not all durations equal'
+    return [(sum(counts), durations[0])]
+
+def merge_stss(samples, sample_number_list):
+    results = []
+    start = 0
+    for samples, sample_number_list in zip(samples, sample_number_list):
+        results.extend(map(lambda x: start + x, samples))
+        start += sample_number_list
+    return results
+
+def merge_stsc(chunks_list, total_chunk_number_list):
+    results = []
+    chunk_index = 1
+    for chunks, total in zip(chunks_list, total_chunk_number_list):
+        for i in range(len(chunks)):
+            if i < len(chunks) - 1:
+                chunk_number = chunks[i + 1][0] - chunks[i][0]
+            else:
+                chunk_number = total + 1 - chunks[i][0]
+            sample_number = chunks[i][1]
+            description = chunks[i][2]
+            results.append((chunk_index, sample_number, description))
+            chunk_index += chunk_number
+    return results
+
+def merge_stco(offsets_list, mdats):
+    offset = 0
+    results = []
+    for offsets, mdat in zip(offsets_list, mdats):
+        results.extend(offset + x - mdat.body[1] for x in offsets)
+        offset += mdat.size - 8
+    return results
+
+def merge_stsz(sizes_list):
+    return sum(sizes_list, [])
+
+def merge_mdats(mdats):
+    total_size = sum(x.size - 8 for x in mdats) + 8
+    class multi_mdat_atom(Atom):
+        def __init__(self, type, size, body):
+            Atom.__init__(self, type, size, body)
+        def write(self, stream):
+            self.write1(stream)
+            self.write2(stream)
+        def write2(self, stream):
+            for mdat in self.body:
+                mdat.write2(stream)
+        def calsize(self):
+            return self.size
+    return multi_mdat_atom(b'mdat', total_size, mdats)
+
+def merge_moov(moovs, mdats):
+    mvhd_duration = 0
+    for x in moovs:
+        mvhd_duration += x.get(b'mvhd').get('duration')
+    tkhd_durations = [0, 0]
+    mdhd_durations = [0, 0]
+    for x in moovs:
+        traks = x.get_all(b'trak')
+        assert len(traks) == 2
+        tkhd_durations[0] += traks[0].get(b'tkhd').get('duration')
+        tkhd_durations[1] += traks[1].get(b'tkhd').get('duration')
+        mdhd_durations[0] += traks[0].get(b'mdia', b'mdhd').get('duration')
+        mdhd_durations[1] += traks[1].get(b'mdia', b'mdhd').get('duration')
+    #mvhd_duration = min(mvhd_duration, tkhd_durations)
+    
+    trak0s = [x.get_all(b'trak')[0] for x in moovs]
+    trak1s = [x.get_all(b'trak')[1] for x in moovs]
+    
+    stts0 = merge_stts(x.get(b'mdia', b'minf', b'stbl', b'stts').body[1] for x in trak0s)
+    stts1 = merge_stts(x.get(b'mdia', b'minf', b'stbl', b'stts').body[1] for x in trak1s)
+    
+    stss = merge_stss((x.get(b'mdia', b'minf', b'stbl', b'stss').body[1] for x in trak0s), (len(x.get(b'mdia', b'minf', b'stbl', b'stsz').body[3]) for x in trak0s))
+    
+    stsc0 = merge_stsc((x.get(b'mdia', b'minf', b'stbl', b'stsc').body[1] for x in trak0s), (len(x.get(b'mdia', b'minf', b'stbl', b'stco').body[1]) for x in trak0s))
+    stsc1 = merge_stsc((x.get(b'mdia', b'minf', b'stbl', b'stsc').body[1] for x in trak1s), (len(x.get(b'mdia', b'minf', b'stbl', b'stco').body[1]) for x in trak1s))
+    
+    stco0 = merge_stco((x.get(b'mdia', b'minf', b'stbl', b'stco').body[1] for x in trak0s), mdats)
+    stco1 = merge_stco((x.get(b'mdia', b'minf', b'stbl', b'stco').body[1] for x in trak1s), mdats)
+    
+    stsz0 = merge_stsz((x.get(b'mdia', b'minf', b'stbl', b'stsz').body[3] for x in trak0s))
+    stsz1 = merge_stsz((x.get(b'mdia', b'minf', b'stbl', b'stsz').body[3] for x in trak1s))
+    
+    ctts = sum((x.get(b'mdia', b'minf', b'stbl', b'ctts').body[1] for x in trak0s), [])
+    
+    moov = moovs[0]
+    
+    moov.get(b'mvhd').set('duration', mvhd_duration)
+    trak0 = moov.get_all(b'trak')[0]
+    trak1 = moov.get_all(b'trak')[1]
+    trak0.get(b'tkhd').set('duration', tkhd_durations[0])
+    trak1.get(b'tkhd').set('duration', tkhd_durations[1])
+    trak0.get(b'mdia', b'mdhd').set('duration', mdhd_durations[0])
+    trak1.get(b'mdia', b'mdhd').set('duration', mdhd_durations[1])
+    
+    stts_atom = trak0.get(b'mdia', b'minf', b'stbl', b'stts')
+    stts_atom.body = stts_atom.body[0], stts0
+    stts_atom = trak1.get(b'mdia', b'minf', b'stbl', b'stts')
+    stts_atom.body = stts_atom.body[0], stts1
+    
+    stss_atom = trak0.get(b'mdia', b'minf', b'stbl', b'stss')
+    stss_atom.body = stss_atom.body[0], stss
+    
+    stsc_atom = trak0.get(b'mdia', b'minf', b'stbl', b'stsc')
+    stsc_atom.body = stsc_atom.body[0], stsc0
+    stsc_atom = trak1.get(b'mdia', b'minf', b'stbl', b'stsc')
+    stsc_atom.body = stsc_atom.body[0], stsc1
+    
+    stco_atom = trak0.get(b'mdia', b'minf', b'stbl', b'stco')
+    stco_atom.body = stss_atom.body[0], stco0
+    stco_atom = trak1.get(b'mdia', b'minf', b'stbl', b'stco')
+    stco_atom.body = stss_atom.body[0], stco1
+    
+    stsz_atom = trak0.get(b'mdia', b'minf', b'stbl', b'stsz')
+    stsz_atom.body = stsz_atom.body[0], stsz_atom.body[1], len(stsz0), stsz0
+    stsz_atom = trak1.get(b'mdia', b'minf', b'stbl', b'stsz')
+    stsz_atom.body = stsz_atom.body[0], stsz_atom.body[1], len(stsz1), stsz1
+    
+    ctts_atom = trak0.get(b'mdia', b'minf', b'stbl', b'ctts')
+    ctts_atom.body = ctts_atom.body[0], ctts
+    
+    old_moov_size = moov.size
+    new_moov_size = moov.calsize()
+    new_mdat_start = mdats[0].body[1] + new_moov_size - old_moov_size
+    stco0 = list(map(lambda x: x + new_mdat_start, stco0))
+    stco1 = list(map(lambda x: x + new_mdat_start, stco1))
+    stco_atom = trak0.get(b'mdia', b'minf', b'stbl', b'stco')
+    stco_atom.body = stss_atom.body[0], stco0
+    stco_atom = trak1.get(b'mdia', b'minf', b'stbl', b'stco')
+    stco_atom.body = stss_atom.body[0], stco1
+    
+    return moov
+
+def merge_mp4s(files, output):
+    assert files
+    ins = [open(mp4, 'rb') for mp4 in files]
+    mp4s = list(map(read_mp4, ins))
+    moovs = list(map(lambda x: x[1], mp4s))
+    mdats = list(map(lambda x: x[2], mp4s))
+    moov = merge_moov(moovs, mdats)
+    mdat = merge_mdats(mdats)
+    with open(output, 'wb') as output:
+        for x in mp4s[0][0]:
+            if x.type == b'moov':
+                moov.write(output)
+            elif x.type == b'mdat':
+                mdat.write(output)
+            else:
+                x.write(output)
+
+##################################################
+# main
+##################################################
+
+# TODO: FIXME: duplicate of merge_flv
+
+def guess_output(inputs):
+    import os.path
+    inputs = map(os.path.basename, inputs)
+    n = min(map(len, inputs))
+    for i in reversed(range(1, n)):
+        if len(set(s[:i] for s in inputs)) == 1:
+            return inputs[0][:i] + '.mp4'
+    return 'output.mp4'
+
+def concat_mp4s(mp4s, output = None):
+    assert mp4s, 'no mp4 file found'
+    import os.path
+    if not output:
+        output = guess_output(mp4s)
+    elif os.path.isdir(output):
+        output = os.path.join(output, guess_output(mp4s))
+    
+    print('Merging video parts...')
+    merge_mp4s(mp4s, output)
+    
+    return output
+
+def usage():
+    print('Usage: [python3] merge_mp4.py --output TARGET.mp4 mp4...')
+
+def main():
+    import sys, getopt
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "ho:", ["help", "output="])
+    except getopt.GetoptError as err:
+        usage()
+        sys.exit(1)
+    output = None
+    for o, a in opts:
+        if o in ("-h", "--help"):
+            usage()
+            sys.exit()
+        elif o in ("-o", "--output"):
+            output = a
+        else:
+            usage()
+            sys.exit(1)
+    if not args:
+        usage()
+        sys.exit(1)
+    
+    concat_mp4s(args, output)
+
+if __name__ == '__main__':
+    main()
--- a/6
+++ b/6
@ -0,0 +1,6 @@
+#!/usr/bin/env python3
+
+from get import *
+
+if __name__ == '__main__':
+    main('you-get', any_download, any_download_playlist)
--- a/you-get.json
+++ b/you-get.json
@ -0,0 +1,19 @@
+{
+    "version": "0.0.1",
+    "date": "2012-08-20",
+    "author": "Mort Yao <mort.yao@gmail.com>",
+    "file_list": [
+        "LICENSE",
+        "README.md",
+        "common.py",
+        "get.py",
+        "get_tudou.py",
+        "get_yinyuetai.py",
+        "get_youku.py",
+        "get_youtube.py",
+        "merge_flv.py",
+        "merge_mp4.py",
+        "you-get",
+        "you-get.json"
+    ]
+}