From b2d97bcaea247e1aabfb2ced7d326866c127c6fb Mon Sep 17 00:00:00 2001 From: Sergey Zmushko Date: Sun, 19 Nov 2017 21:07:51 +0300 Subject: [PATCH] [coub] add coub.com support Video and audio files with high quality are downloaded. We use ffmpeg for audio and video combining into one file --- src/you_get/common.py | 1 + src/you_get/extractors/__init__.py | 1 + src/you_get/extractors/coub.py | 91 ++++++++++++++++++++++++++++++ src/you_get/processor/ffmpeg.py | 31 +++++++++- 4 files changed, 122 insertions(+), 2 deletions(-) create mode 100644 src/you_get/extractors/coub.py diff --git a/src/you_get/common.py b/src/you_get/common.py index fe8fbbd0..ce7ee11e 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -13,6 +13,7 @@ SITES = { 'cctv' : 'cntv', 'cntv' : 'cntv', 'cbs' : 'cbs', + 'coub' : 'coub', 'dailymotion' : 'dailymotion', 'dilidili' : 'dilidili', 'douban' : 'douban', diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index b078db08..f1acf000 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -11,6 +11,7 @@ from .bokecc import * from .cbs import * from .ckplayer import * from .cntv import * +from .coub import * from .dailymotion import * from .dilidili import * from .douban import * diff --git a/src/you_get/extractors/coub.py b/src/you_get/extractors/coub.py new file mode 100644 index 00000000..7f5dccb6 --- /dev/null +++ b/src/you_get/extractors/coub.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python + +__all__ = ['coub_download'] + +from ..common import * +from ..processor import ffmpeg +from ..util.fs import legitimize + + +def coub_download(url, output_dir='.', merge=True, info_only=False, **kwargs): + html = get_content(url) + + try: + json_data = get_coub_data(html) + title, video_url, audio_url = get_title_and_urls(json_data) + video_file_name, video_file_path = get_file_path(merge, output_dir, title, video_url) + audio_file_name, audio_file_path = get_file_path(merge, output_dir, title, audio_url) + download_url(audio_url, merge, output_dir, title, info_only) + download_url(video_url, merge, output_dir, title, info_only) + if not info_only: + try: + fix_coub_video_file(video_file_path) + audio_duration = float(ffmpeg.ffprobe_get_media_duration(audio_file_path)) + video_duration = float(ffmpeg.ffprobe_get_media_duration(video_file_path)) + loop_file_path = get_loop_file_path(title, output_dir) + single_file_path = audio_file_path + if audio_duration > video_duration: + write_loop_file(int(audio_duration / video_duration), loop_file_path, video_file_name) + else: + single_file_path = audio_file_path + write_loop_file(int(video_duration / audio_duration), loop_file_path, audio_file_name) + + ffmpeg.ffmpeg_concat_audio_and_video([loop_file_path, single_file_path], title + "_full", "mp4") + cleanup_files([video_file_path, audio_file_path, loop_file_path]) + except EnvironmentError as err: + print("Error preparing full coub video. {}".format(err)) + except Exception as err: + print("Error while downloading files. {}".format(err)) + + +def write_loop_file(records_number, loop_file_path, file_name): + with open(loop_file_path, 'a') as file: + for i in range(records_number): + file.write("file '{}'\n".format(file_name)) + + +def download_url(url, merge, output_dir, title, info_only): + mime, ext, size = url_info(url) + print_info(site_info, title, mime, size) + if not info_only: + download_urls([url], title, ext, size, output_dir, merge=merge) + + +def fix_coub_video_file(file_path): + with open(file_path, 'r+b') as file: + file.seek(0) + file.write(bytes(2)) + + +def get_title_and_urls(json_data): + title = legitimize(json_data['title'].replace(" ", "_")) + video_url = json_data['file_versions']['html5']['video']['high']['url'] + audio_url = json_data['file_versions']['html5']['audio']['high']['url'] + return title, video_url, audio_url + + +def get_coub_data(html): + coub_data = r1(r'', html) + json_data = json.loads(coub_data) + return json_data + + +def get_file_path(merge, output_dir, title, url): + mime, ext, size = url_info(url) + file_name = get_output_filename([], title, ext, output_dir, merge) + file_path = os.path.join(output_dir, file_name) + return file_name, file_path + + +def get_loop_file_path(title, output_dir): + return os.path.join(output_dir, get_output_filename([], title, "txt", None, False)) + + +def cleanup_files(files): + for file in files: + os.remove(file) + + +site_info = "coub.com" +download = coub_download +download_playlist = playlist_not_supported('coub') diff --git a/src/you_get/processor/ffmpeg.py b/src/you_get/processor/ffmpeg.py index c7b362e1..89d53e50 100755 --- a/src/you_get/processor/ffmpeg.py +++ b/src/you_get/processor/ffmpeg.py @@ -28,11 +28,11 @@ def get_usable_ffmpeg(cmd): print('It seems that your ffmpeg is a nightly build.') print('Please switch to the latest stable if merging failed.') version = [1, 0] - return cmd, version + return cmd, 'ffprobe', version except: return None -FFMPEG, FFMPEG_VERSION = get_usable_ffmpeg('ffmpeg') or get_usable_ffmpeg('avconv') or (None, None) +FFMPEG, FFPROBE, FFMPEG_VERSION = get_usable_ffmpeg('ffmpeg') or get_usable_ffmpeg('avconv') or (None, None, None) if logging.getLogger().isEnabledFor(logging.DEBUG): LOGLEVEL = ['-loglevel', 'info'] STDIN = None @@ -250,3 +250,30 @@ def ffmpeg_download_stream(files, title, ext, params={}, output_dir='.', stream= pass return True + + +def ffmpeg_concat_audio_and_video(files, output, ext): + print('Merging video and audio parts... ', end="", flush=True) + if has_ffmpeg_installed: + params = [FFMPEG] + LOGLEVEL + params.extend(['-f', 'concat']) + for file in files: + if os.path.isfile(file): + params.extend(['-i', file]) + params.extend(['-c:v', 'copy']) + params.extend(['-c:a', 'aac']) + params.extend(['-strict', 'experimental']) + params.append(output+"."+ext) + return subprocess.call(params, stdin=STDIN) + else: + raise EnvironmentError('No ffmpeg found') + + +def ffprobe_get_media_duration(file): + print('Getting {} duration'.format(file)) + params = [FFPROBE] + params.extend(['-i', file]) + params.extend(['-show_entries', 'format=duration']) + params.extend(['-v', 'quiet']) + params.extend(['-of', 'csv=p=0']) + return subprocess.check_output(params, stdin=STDIN, stderr=subprocess.STDOUT).decode().strip()