From 05e7b98e2f20e9a7add6b9eb93c456853fae5980 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 18 Sep 2012 22:23:10 +0200 Subject: [PATCH] use FFmpeg for converting and joining --- .gitignore | 2 ++ you_get/common.py | 60 +++++++++++++++++++++++++++-------- you_get/downloader/cntv.py | 2 +- you_get/processor/__init__.py | 4 +-- you_get/processor/ffmpeg.py | 60 +++++++++++++++++++++++++++++++++++ you_get/processor/join_ts.py | 59 ---------------------------------- 6 files changed, 111 insertions(+), 76 deletions(-) create mode 100644 you_get/processor/ffmpeg.py delete mode 100755 you_get/processor/join_ts.py diff --git a/.gitignore b/.gitignore index d5ea8c08..f0cef17f 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,8 @@ _*/ *.cmt.* *.3gp *.flv +*.mkv *.mp4 +*.mpg *.ts *.webm diff --git a/you_get/common.py b/you_get/common.py index 40dd4545..6cc317f8 100644 --- a/you_get/common.py +++ b/you_get/common.py @@ -352,7 +352,7 @@ class DummyProgressBar: def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None, merge = True, faker = False): assert urls if dry_run: - print('Real URLs:\n', urls) + print('Real URLs:\n', urls, '\n') return assert ext in ('3gp', 'flv', 'mp4', 'webm') @@ -392,6 +392,7 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None, bar.update_piece(i + 1) url_save(url, filepath, bar, refer = refer, is_part = True, faker = faker) bar.done() + if not merge: print() return @@ -401,10 +402,20 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None, for part in parts: os.remove(part) elif ext == 'mp4': - from .processor.join_mp4 import concat_mp4 - concat_mp4(parts, os.path.join(output_dir, title + '.mp4')) - for part in parts: - os.remove(part) + try: + from .processor.join_mp4 import concat_mp4 + concat_mp4(parts, os.path.join(output_dir, title + '.mp4')) + for part in parts: + os.remove(part) + except: + from .processor.ffmpeg import has_ffmpeg_installed + if has_ffmpeg_installed(): + from .processor.ffmpeg import ffmpeg_concat_mp4_to_mpg + ffmpeg_concat_mp4_to_mpg(parts, os.path.join(output_dir, title + '.mp4')) + for part in parts: + os.remove(part) + else: + print('No ffmpeg is found. Merging aborted.') else: print("Can't merge %s files" % ext) @@ -413,16 +424,16 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None, def download_urls_chunked(urls, title, ext, total_size, output_dir = '.', refer = None, merge = True, faker = False): assert urls if dry_run: - print('Real URLs:\n', urls) + print('Real URLs:\n', urls, '\n') return assert ext in ('ts') title = escape_file_path(title) - filename = '%s.%s' % (title, ext) + filename = '%s.%s' % (title, 'ts') filepath = os.path.join(output_dir, filename) if total_size: - if not force and os.path.exists(filepath) and os.path.getsize(filepath) >= total_size * 0.9: - print('Skipping %s: file already exists' % tr(filepath)) + if not force and os.path.exists(filepath[:-3] + '.mkv'): + print('Skipping %s: file already exists' % tr(filepath[:-3] + '.mkv')) print() return bar = SimpleProgressBar(total_size, len(urls)) @@ -430,10 +441,28 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir = '.', refer bar = PiecesProgressBar(total_size, len(urls)) if len(urls) == 1: + parts = [] url = urls[0] print('Downloading %s ...' % tr(filename)) + filepath = os.path.join(output_dir, filename) + parts.append(filepath) url_save_chunked(url, filepath, bar, refer = refer, faker = faker) bar.done() + + if not merge: + print() + return + if ext == 'ts': + from .processor.ffmpeg import has_ffmpeg_installed + if has_ffmpeg_installed(): + from .processor.ffmpeg import ffmpeg_convert_ts_to_mkv + ffmpeg_convert_ts_to_mkv(parts, os.path.join(output_dir, title + '.mkv')) + for part in parts: + os.remove(part) + else: + print('No ffmpeg is found. Conversion aborted.') + else: + print("Can't convert %s files" % ext) else: parts = [] print('Downloading %s.%s ...' % (tr(title), ext)) @@ -445,14 +474,19 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir = '.', refer bar.update_piece(i + 1) url_save_chunked(url, filepath, bar, refer = refer, is_part = True, faker = faker) bar.done() + if not merge: print() return if ext == 'ts': - from .processor.join_ts import concat_ts - concat_ts(parts, os.path.join(output_dir, title + '.ts')) - for part in parts: - os.remove(part) + from .processor.ffmpeg import has_ffmpeg_installed + if has_ffmpeg_installed(): + from .processor.ffmpeg import ffmpeg_concat_ts_to_mkv + ffmpeg_concat_ts_to_mkv(parts, os.path.join(output_dir, title + '.mkv')) + for part in parts: + os.remove(part) + else: + print('No ffmpeg is found. Merging aborted.') else: print("Can't merge %s files" % ext) diff --git a/you_get/downloader/cntv.py b/you_get/downloader/cntv.py index 20974bc8..d807ec27 100644 --- a/you_get/downloader/cntv.py +++ b/you_get/downloader/cntv.py @@ -25,7 +25,7 @@ def cntv_download_by_id(id, title = None, output_dir = '.', merge = True, info_o print_info(site_info, title, ext, size) if not info_only: - download_urls(urls, title, ext, size, output_dir = output_dir, merge = False) + download_urls(urls, title, ext, size, output_dir = output_dir, merge = merge) def cntv_download(url, output_dir = '.', merge = True, info_only = False): if re.match(r'http://\w+\.cntv\.cn/(\w+/\w+/classpage/video/)?\d+/\d+\.shtml', url): diff --git a/you_get/processor/__init__.py b/you_get/processor/__init__.py index 92294adc..88616f31 100644 --- a/you_get/processor/__init__.py +++ b/you_get/processor/__init__.py @@ -1,7 +1,5 @@ #!/usr/bin/env python -__all__ = ['concat_flv', 'concat_mp4', 'concat_ts'] - from .join_flv import concat_flv from .join_mp4 import concat_mp4 -from .join_ts import concat_ts +from .ffmpeg import * diff --git a/you_get/processor/ffmpeg.py b/you_get/processor/ffmpeg.py new file mode 100644 index 00000000..41ec56a6 --- /dev/null +++ b/you_get/processor/ffmpeg.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python + +import os.path +import subprocess + +def has_ffmpeg_installed(): + try: + subprocess.call(['ffmpeg', '-loglevel', '0']) + return True + except: + return False + +def ffmpeg_convert_ts_to_mkv(files, output = 'output.mkv'): + for file in files: + if os.path.isfile(file): + params = ['ffmpeg', '-i'] + params.append(file) + params.append(output) + subprocess.call(params) + + return + +def ffmpeg_concat_mp4_to_mpg(files, output = 'output.mpg'): + for file in files: + if os.path.isfile(file): + params = ['ffmpeg', '-i'] + params.append(file) + params.append(file + '.mpg') + subprocess.call(params) + + inputs = [open(file + '.mpg', 'rb') for file in files] + with open(output + '.mpg', 'wb') as o: + for input in inputs: + o.write(input.read()) + + params = ['ffmpeg', '-i'] + params.append(output + '.mpg') + params += ['-vcodec', 'copy', '-acodec', 'copy'] + params.append(output) + subprocess.call(params) + + for file in files: + os.remove(file + '.mpg') + os.remove(output + '.mpg') + + return + +def ffmpeg_concat_ts_to_mkv(files, output = 'output.mkv'): + params = ['ffmpeg', '-isync', '-i'] + params.append('concat:') + for file in files: + if os.path.isfile(file): + params[-1] += file + '|' + params += ['-f', 'matroska', '-c', 'copy', output] + + try: + subprocess.call(params) + return True + except: + return False diff --git a/you_get/processor/join_ts.py b/you_get/processor/join_ts.py deleted file mode 100755 index f99700fb..00000000 --- a/you_get/processor/join_ts.py +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env python - -################################################## -# main -################################################## - -def guess_output(inputs): - import os.path - inputs = map(os.path.basename, inputs) - n = min(map(len, inputs)) - for i in reversed(range(1, n)): - if len(set(s[:i] for s in inputs)) == 1: - return inputs[0][:i] + '.ts' - return 'output.ts' - -def concat_ts(tss, output = None): - assert tss, 'no ts file found' - import os.path - if not output: - output = guess_output(tss) - elif os.path.isdir(output): - output = os.path.join(output, guess_output(tss)) - - print('Merging video parts...') - ins = [open(ts, 'rb') for ts in tss] - with open(output, 'wb') as output: - for i in ins: - output.write(i.read()) - - return output - -def usage(): - print('Usage: [python3] join_ts.py --output TARGET.ts ts...') - -def main(): - import sys, getopt - try: - opts, args = getopt.getopt(sys.argv[1:], "ho:", ["help", "output="]) - except getopt.GetoptError as err: - usage() - sys.exit(1) - output = None - for o, a in opts: - if o in ("-h", "--help"): - usage() - sys.exit() - elif o in ("-o", "--output"): - output = a - else: - usage() - sys.exit(1) - if not args: - usage() - sys.exit(1) - - concat_ts(args, output) - -if __name__ == '__main__': - main()