From 44e60c3e2193d3198899f211a8b7c9767b0b6d5e Mon Sep 17 00:00:00 2001 From: Valdemar Erk Date: Sat, 10 Dec 2016 12:23:35 +0100 Subject: [PATCH 01/37] Initial support for yizhibo.com --- src/you_get/common.py | 1 + src/you_get/extractors/yizhibo.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 src/you_get/extractors/yizhibo.py diff --git a/src/you_get/common.py b/src/you_get/common.py index 7db4fba2..fd727cf4 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -91,6 +91,7 @@ SITES = { 'xiaojiadianvideo' : 'fc2video', 'yinyuetai' : 'yinyuetai', 'miaopai' : 'yixia', + 'yizhibo' : 'yizhibo', 'youku' : 'youku', 'youtu' : 'youtube', 'youtube' : 'youtube', diff --git a/src/you_get/extractors/yizhibo.py b/src/you_get/extractors/yizhibo.py new file mode 100644 index 00000000..f524a0a8 --- /dev/null +++ b/src/you_get/extractors/yizhibo.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python + +__all__ = ['yizhibo_download'] + +from ..common import * +import json +import time + +def yizhibo_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): + video_id = url[url.rfind('/')+1:].split(".")[0] + json_request_url = 'http://www.yizhibo.com/live/h5api/get_basic_live_info?scid={}'.format(video_id) + content = get_html(json_request_url) + error = json.loads(content)['result'] + if (error != 1): + raise ValueError("Error : {}".format(error)) + + data = json.loads(content)#['data'] + title = data.get('data')['live_title'] + if (title == ''): + title = data.get('data')['nickname'] + real_url = data.get('data')['play_url'] + + print_info(site_info, title, 'flv', float('inf')) + if not info_only: + download_url_ffmpeg(real_url, title, 'flv', None, output_dir, merge = merge) + +site_info = "yizhibo.com" +download = yizhibo_download +download_playlist = playlist_not_supported('yizhibo') From 0f33e471ad65c2c2dfb0a1e4480cb39d1f2430a2 Mon Sep 17 00:00:00 2001 From: Valdemar Erk Date: Sat, 10 Dec 2016 12:26:06 +0100 Subject: [PATCH 02/37] minor correction --- src/you_get/extractors/yizhibo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/yizhibo.py b/src/you_get/extractors/yizhibo.py index f524a0a8..0744e1f9 100644 --- a/src/you_get/extractors/yizhibo.py +++ b/src/you_get/extractors/yizhibo.py @@ -14,7 +14,7 @@ def yizhibo_download(url, output_dir = '.', merge = True, info_only = False, **k if (error != 1): raise ValueError("Error : {}".format(error)) - data = json.loads(content)#['data'] + data = json.loads(content) title = data.get('data')['live_title'] if (title == ''): title = data.get('data')['nickname'] From 0f1d5beb1494ca6b64b90e3d8d5949de29b2c31b Mon Sep 17 00:00:00 2001 From: Valdemar Erk Date: Sun, 11 Dec 2016 01:46:23 +0100 Subject: [PATCH 03/37] Changed the plugin to use download_urls instead of ffmpeg --- src/you_get/extractors/yizhibo.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/you_get/extractors/yizhibo.py b/src/you_get/extractors/yizhibo.py index 0744e1f9..37fa043c 100644 --- a/src/you_get/extractors/yizhibo.py +++ b/src/you_get/extractors/yizhibo.py @@ -9,7 +9,7 @@ import time def yizhibo_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): video_id = url[url.rfind('/')+1:].split(".")[0] json_request_url = 'http://www.yizhibo.com/live/h5api/get_basic_live_info?scid={}'.format(video_id) - content = get_html(json_request_url) + content = get_content(json_request_url) error = json.loads(content)['result'] if (error != 1): raise ValueError("Error : {}".format(error)) @@ -18,11 +18,17 @@ def yizhibo_download(url, output_dir = '.', merge = True, info_only = False, **k title = data.get('data')['live_title'] if (title == ''): title = data.get('data')['nickname'] - real_url = data.get('data')['play_url'] - - print_info(site_info, title, 'flv', float('inf')) + m3u8_url = data.get('data')['play_url'] + m3u8 = get_content(m3u8_url) + base_url = "/".join(data.get('data')['play_url'].split("/")[:7])+"/" + part_url = re.findall(r'([0-9]+\.ts)', m3u8) + real_url = [] + for i in part_url: + url = base_url + i + real_url.append(url) + print_info(site_info, title, 'ts', float('inf')) if not info_only: - download_url_ffmpeg(real_url, title, 'flv', None, output_dir, merge = merge) + download_urls(real_url, title, 'ts', float('inf'), output_dir, merge = merge) site_info = "yizhibo.com" download = yizhibo_download From e0554b2d7b7a214c988100ac32187208b22e1d26 Mon Sep 17 00:00:00 2001 From: Valdemar Erk Date: Sun, 11 Dec 2016 01:49:13 +0100 Subject: [PATCH 04/37] Made player use the m3u8 file. --- src/you_get/extractors/yizhibo.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/you_get/extractors/yizhibo.py b/src/you_get/extractors/yizhibo.py index 37fa043c..11ce86ad 100644 --- a/src/you_get/extractors/yizhibo.py +++ b/src/you_get/extractors/yizhibo.py @@ -28,6 +28,8 @@ def yizhibo_download(url, output_dir = '.', merge = True, info_only = False, **k real_url.append(url) print_info(site_info, title, 'ts', float('inf')) if not info_only: + if player: + launch_player(player, [m3u8_url]) download_urls(real_url, title, 'ts', float('inf'), output_dir, merge = merge) site_info = "yizhibo.com" From a520eb051e797b70eddfecaf5c934259c071bf3c Mon Sep 17 00:00:00 2001 From: AlanYang Date: Thu, 19 Jan 2017 11:15:42 +0800 Subject: [PATCH 05/37] fixed mgtv.com 1.17 change api address and stream domain --- src/you_get/extractors/mgtv.py | 9 +++++---- src/you_get/json_output.py | 5 +++++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/you_get/extractors/mgtv.py b/src/you_get/extractors/mgtv.py index 3ce62efe..1656ac3c 100644 --- a/src/you_get/extractors/mgtv.py +++ b/src/you_get/extractors/mgtv.py @@ -21,7 +21,7 @@ class MGTV(VideoExtractor): id_dic = {i['video_profile']:(i['id']) for i in stream_types} - api_endpoint = 'http://v.api.mgtv.com/player/video?video_id={video_id}' + api_endpoint = 'http://pcweb.api.mgtv.com/player/video?video_id={video_id}' @staticmethod def get_vid_from_url(url): @@ -63,6 +63,7 @@ class MGTV(VideoExtractor): content = get_content(self.api_endpoint.format(video_id = self.vid)) content = loads(content) self.title = content['data']['info']['title'] + domain = content['data']['stream_domain'][0] #stream_avalable = [i['name'] for i in content['data']['stream']] stream_available = {} @@ -73,7 +74,7 @@ class MGTV(VideoExtractor): if s['video_profile'] in stream_available.keys(): quality_id = self.id_dic[s['video_profile']] url = stream_available[s['video_profile']] - url = re.sub( r'(\&arange\=\d+)', '', url) #Un-Hum + url = domain + re.sub( r'(\&arange\=\d+)', '', url) #Un-Hum m3u8_url, m3u8_size, segment_list_this = self.get_mgtv_real_url(url) stream_fileid_list = [] @@ -144,8 +145,8 @@ class MGTV(VideoExtractor): else: download_urls(stream_info['src'], self.title, stream_info['container'], stream_info['size'], output_dir=kwargs['output_dir'], - merge=kwargs['merge'], - av=stream_id in self.dash_streams) + merge=kwargs.get('merge', True)) + # av=stream_id in self.dash_streams) site = MGTV() download = site.download_by_url diff --git a/src/you_get/json_output.py b/src/you_get/json_output.py index 86a42abc..3e1bac9f 100644 --- a/src/you_get/json_output.py +++ b/src/you_get/json_output.py @@ -31,6 +31,11 @@ def print_info(site_info=None, title=None, type=None, size=None): def download_urls(urls=None, title=None, ext=None, total_size=None, refer=None): ve = last_info + if not ve: + ve = VideoExtractor() + ve.name = '' + ve.url = urls + ve.title=title # save download info in streams stream = {} stream['container'] = ext From 61225b1552df86dbecf1be22c6b5433cd3412f44 Mon Sep 17 00:00:00 2001 From: Chuntao Hong Date: Tue, 24 Jan 2017 12:36:57 +0800 Subject: [PATCH 06/37] fix non-ascii url --- src/you_get/common.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/you_get/common.py b/src/you_get/common.py index bea6e62c..51b81cad 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -259,6 +259,7 @@ def undeflate(data): # DEPRECATED in favor of get_content() def get_response(url, faker = False): + url = parse.quote(url,':/') # install cookies if cookies: opener = request.build_opener(request.HTTPCookieProcessor(cookies)) From 4d0dac29681a18520dabe1fc6a6deb81fe20f49d Mon Sep 17 00:00:00 2001 From: Zhiming Wang Date: Thu, 2 Feb 2017 03:59:44 -0500 Subject: [PATCH 07/37] [ffmpeg] call ffmpeg with stdin redirected to the null device Prevent FFmpeg from consuming stdin and interpreting the character stream as a stream of interactive commands, specifically: ? show this help + increase verbosity - decrease verbosity c Send command to first matching filter supporting it C Send/Queue command to all matching filters D cycle through available debug modes h dump packets/hex press to cycle through the 3 states q quit s Show QP histogram This prevents misclicking a key or key sequence (e.g., h) produces a large amount of debugging output which may confuse the unseasoned user. It is also useful in a batch environment where an unsuspecting user may not realize you-get could consume stdin through FFmpeg, e.g. while read url; do you-get $url; done '0') or (vers[0] == 'avconv') @@ -24,8 +33,10 @@ def get_usable_ffmpeg(cmd): FFMPEG, FFMPEG_VERSION = get_usable_ffmpeg('ffmpeg') or get_usable_ffmpeg('avconv') or (None, None) if logging.getLogger().isEnabledFor(logging.DEBUG): LOGLEVEL = ['-loglevel', 'info'] + STDIN = None else: LOGLEVEL = ['-loglevel', 'quiet'] + STDIN = DEVNULL def has_ffmpeg_installed(): return FFMPEG is not None @@ -54,14 +65,14 @@ def ffmpeg_concat_av(files, output, ext): params.extend(['-c:a', 'vorbis']) params.extend(['-strict', 'experimental']) params.append(output) - return subprocess.call(params) + return subprocess.call(params, stdin=STDIN) def ffmpeg_convert_ts_to_mkv(files, output='output.mkv'): for file in files: if os.path.isfile(file): params = [FFMPEG] + LOGLEVEL params.extend(['-y', '-i', file, output]) - subprocess.call(params) + subprocess.call(params, stdin=STDIN) return @@ -71,7 +82,7 @@ def ffmpeg_concat_mp4_to_mpg(files, output='output.mpg'): concat_list = generate_concat_list(files, output) params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '-1', '-i', concat_list, '-c', 'copy', output] - if subprocess.call(params) == 0: + if subprocess.call(params, stdin=STDIN) == 0: os.remove(output + '.txt') return True else: @@ -81,7 +92,7 @@ def ffmpeg_concat_mp4_to_mpg(files, output='output.mpg'): if os.path.isfile(file): params = [FFMPEG] + LOGLEVEL + ['-y', '-i'] params.extend([file, file + '.mpg']) - subprocess.call(params) + subprocess.call(params, stdin=STDIN) inputs = [open(file + '.mpg', 'rb') for file in files] with open(output + '.mpg', 'wb') as o: @@ -92,9 +103,8 @@ def ffmpeg_concat_mp4_to_mpg(files, output='output.mpg'): params.append(output + '.mpg') params += ['-vcodec', 'copy', '-acodec', 'copy'] params.append(output) - subprocess.call(params) - if subprocess.call(params) == 0: + if subprocess.call(params, stdin=STDIN) == 0: for file in files: os.remove(file + '.mpg') os.remove(output + '.mpg') @@ -112,7 +122,7 @@ def ffmpeg_concat_ts_to_mkv(files, output='output.mkv'): params += ['-f', 'matroska', '-c', 'copy', output] try: - if subprocess.call(params) == 0: + if subprocess.call(params, stdin=STDIN) == 0: return True else: return False @@ -127,7 +137,7 @@ def ffmpeg_concat_flv_to_mp4(files, output='output.mp4'): params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '-1', '-i', concat_list, '-c', 'copy', '-bsf:a', 'aac_adtstoasc', output] - subprocess.check_call(params) + subprocess.check_call(params, stdin=STDIN) os.remove(output + '.txt') return True @@ -138,7 +148,7 @@ def ffmpeg_concat_flv_to_mp4(files, output='output.mp4'): params += ['-map', '0', '-c', 'copy', '-f', 'mpegts', '-bsf:v', 'h264_mp4toannexb'] params.append(file + '.ts') - subprocess.call(params) + subprocess.call(params, stdin=STDIN) params = [FFMPEG] + LOGLEVEL + ['-y', '-i'] params.append('concat:') @@ -151,7 +161,7 @@ def ffmpeg_concat_flv_to_mp4(files, output='output.mp4'): else: params += ['-c', 'copy', '-absf', 'aac_adtstoasc', output] - if subprocess.call(params) == 0: + if subprocess.call(params, stdin=STDIN) == 0: for file in files: os.remove(file + '.ts') return True @@ -166,7 +176,7 @@ def ffmpeg_concat_mp4_to_mp4(files, output='output.mp4'): params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '-1', '-i', concat_list, '-c', 'copy', '-bsf:a', 'aac_adtstoasc', output] - subprocess.check_call(params) + subprocess.check_call(params, stdin=STDIN) os.remove(output + '.txt') return True @@ -177,7 +187,7 @@ def ffmpeg_concat_mp4_to_mp4(files, output='output.mp4'): params += ['-c', 'copy', '-f', 'mpegts', '-bsf:v', 'h264_mp4toannexb'] params.append(file + '.ts') - subprocess.call(params) + subprocess.call(params, stdin=STDIN) params = [FFMPEG] + LOGLEVEL + ['-y', '-i'] params.append('concat:') @@ -190,7 +200,7 @@ def ffmpeg_concat_mp4_to_mp4(files, output='output.mp4'): else: params += ['-c', 'copy', '-absf', 'aac_adtstoasc', output] - subprocess.check_call(params) + subprocess.check_call(params, stdin=STDIN) for file in files: os.remove(file + '.ts') return True From 8799197befd1f52278a4344fc41ba94cc45c548a Mon Sep 17 00:00:00 2001 From: YK Liu Date: Mon, 20 Feb 2017 15:09:38 +0800 Subject: [PATCH 08/37] Print audiolang in json output --- src/you_get/json_output.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/you_get/json_output.py b/src/you_get/json_output.py index 3e1bac9f..0e610a41 100644 --- a/src/you_get/json_output.py +++ b/src/you_get/json_output.py @@ -11,6 +11,11 @@ def output(video_extractor, pretty_print=True): out['title'] = ve.title out['site'] = ve.name out['streams'] = ve.streams + try: + if ve.audiolang: + out['audiolang'] = ve.audiolang + except NameError: + pass if pretty_print: print(json.dumps(out, indent=4, sort_keys=True, ensure_ascii=False)) else: From 9b9d80b32deb6bae475d3d85f376e6d69c6c0835 Mon Sep 17 00:00:00 2001 From: MaxwellGoblin Date: Sat, 25 Feb 2017 02:31:07 +0800 Subject: [PATCH 09/37] do not print size when the container is m3u8 --- src/you_get/extractor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractor.py b/src/you_get/extractor.py index 332440dd..af7cc824 100644 --- a/src/you_get/extractor.py +++ b/src/you_get/extractor.py @@ -98,7 +98,7 @@ class VideoExtractor(): if 'quality' in stream: print(" quality: %s" % stream['quality']) - if 'size' in stream: + if 'size' in stream and stream['container'].lower() != 'm3u8': print(" size: %s MiB (%s bytes)" % (round(stream['size'] / 1048576, 1), stream['size'])) if 'itag' in stream: From 251a1bff489d2eb34bfa52b54b55dbab6069bd63 Mon Sep 17 00:00:00 2001 From: MaxwellGoblin Date: Fri, 24 Feb 2017 22:54:59 +0800 Subject: [PATCH 10/37] ckplayer.py: fix when got data without a ckplayer->info and clean the code --- src/you_get/extractors/ckplayer.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/you_get/extractors/ckplayer.py b/src/you_get/extractors/ckplayer.py index 09e95557..91159897 100644 --- a/src/you_get/extractors/ckplayer.py +++ b/src/you_get/extractors/ckplayer.py @@ -9,7 +9,6 @@ __all__ = ['ckplayer_download'] from xml.etree import cElementTree as ET from copy import copy from ..common import * - #---------------------------------------------------------------------- def ckplayer_get_info_by_xml(ckinfo): """str->dict @@ -20,20 +19,22 @@ def ckplayer_get_info_by_xml(ckinfo): 'links': [], 'size': 0, 'flashvars': '',} - if '_text' in dictify(e)['ckplayer']['info'][0]['title'][0]: #title - video_dict['title'] = dictify(e)['ckplayer']['info'][0]['title'][0]['_text'].strip() + dictified = dictify(e)['ckplayer'] + if 'info' in dictified: + if '_text' in dictified['info'][0]['title'][0]: #title + video_dict['title'] = dictified['info'][0]['title'][0]['_text'].strip() #if dictify(e)['ckplayer']['info'][0]['title'][0]['_text'].strip(): #duration #video_dict['title'] = dictify(e)['ckplayer']['info'][0]['title'][0]['_text'].strip() - if '_text' in dictify(e)['ckplayer']['video'][0]['size'][0]: #size exists for 1 piece - video_dict['size'] = sum([int(i['size'][0]['_text']) for i in dictify(e)['ckplayer']['video']]) + if '_text' in dictified['video'][0]['size'][0]: #size exists for 1 piece + video_dict['size'] = sum([int(i['size'][0]['_text']) for i in dictified['video']]) - if '_text' in dictify(e)['ckplayer']['video'][0]['file'][0]: #link exist - video_dict['links'] = [i['file'][0]['_text'].strip() for i in dictify(e)['ckplayer']['video']] + if '_text' in dictified['video'][0]['file'][0]: #link exist + video_dict['links'] = [i['file'][0]['_text'].strip() for i in dictified['video']] - if '_text' in dictify(e)['ckplayer']['flashvars'][0]: - video_dict['flashvars'] = dictify(e)['ckplayer']['flashvars'][0]['_text'].strip() + if '_text' in dictified['flashvars'][0]: + video_dict['flashvars'] = dictified['flashvars'][0]['_text'].strip() return video_dict From 925415fa2b831c6fb5856de0e3739c31c101c1a9 Mon Sep 17 00:00:00 2001 From: MaxwellGoblin Date: Sat, 25 Feb 2017 00:31:30 +0800 Subject: [PATCH 11/37] add support for dilidili.mobi and dilidili.wang --- src/you_get/extractors/dilidili.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/src/you_get/extractors/dilidili.py b/src/you_get/extractors/dilidili.py index 082f84e1..f7b5922d 100644 --- a/src/you_get/extractors/dilidili.py +++ b/src/you_get/extractors/dilidili.py @@ -21,8 +21,9 @@ headers = { #---------------------------------------------------------------------- def dilidili_parser_data_to_stream_types(typ ,vid ,hd2 ,sign, tmsign, ulk): """->list""" + another_url = 'https://newplayer.jfrft.com/parse.php?xmlurl=null&type={typ}&vid={vid}&hd={hd2}&sign={sign}&tmsign={tmsign}&userlink={ulk}'.format(typ = typ, vid = vid, hd2 = hd2, sign = sign, tmsign = tmsign, ulk = ulk) parse_url = 'http://player.005.tv/parse.php?xmlurl=null&type={typ}&vid={vid}&hd={hd2}&sign={sign}&tmsign={tmsign}&userlink={ulk}'.format(typ = typ, vid = vid, hd2 = hd2, sign = sign, tmsign = tmsign, ulk = ulk) - html = get_content(parse_url, headers=headers) + html = get_content(another_url, headers=headers) info = re.search(r'(\{[^{]+\})(\{[^{]+\})(\{[^{]+\})(\{[^{]+\})(\{[^{]+\})', html).groups() info = [i.strip('{}').split('->') for i in info] @@ -35,13 +36,22 @@ def dilidili_parser_data_to_stream_types(typ ,vid ,hd2 ,sign, tmsign, ulk): #---------------------------------------------------------------------- def dilidili_download(url, output_dir = '.', merge = False, info_only = False, **kwargs): - if re.match(r'http://www.dilidili.com/watch\S+', url): + global headers + re_str = r'http://www.dilidili.com/watch\S+' + if re.match(r'http://www.dilidili.wang', url): + re_str = r'http://www.dilidili.wang/watch\S+' + headers['Referer'] = 'http://www.dilidili.wang/' + elif re.match(r'http://www.dilidili.mobi', url): + re_str = r'http://www.dilidili.mobi/watch\S+' + headers['Referer'] = 'http://www.dilidili.mobi/' + + if re.match(re_str, url): html = get_content(url) title = match1(html, r'(.+)丨(.+)') #title # player loaded via internal iframe frame_url = re.search(r'