mirror of
https://github.com/soimort/you-get.git
synced 2025-02-12 04:55:21 +03:00
支持一些新的参数:sortbyidx tofile beginidx
This commit is contained in:
parent
858435d503
commit
75f7eca1b1
@ -100,6 +100,7 @@ import locale
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import platform
|
import platform
|
||||||
|
import functools
|
||||||
import re
|
import re
|
||||||
import socket
|
import socket
|
||||||
import sys
|
import sys
|
||||||
@ -715,7 +716,7 @@ class DummyProgressBar:
|
|||||||
def done(self):
|
def done(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def get_output_filename(urls, title, ext, output_dir, merge):
|
def get_output_filename(urls, title, ext, output_dir, merge, **kwargs):
|
||||||
# lame hack for the --output-filename option
|
# lame hack for the --output-filename option
|
||||||
global output_filename
|
global output_filename
|
||||||
if output_filename: return output_filename
|
if output_filename: return output_filename
|
||||||
@ -735,7 +736,12 @@ def get_output_filename(urls, title, ext, output_dir, merge):
|
|||||||
merged_ext = 'mkv'
|
merged_ext = 'mkv'
|
||||||
else:
|
else:
|
||||||
merged_ext = 'ts'
|
merged_ext = 'ts'
|
||||||
return '%s.%s' % (title, merged_ext)
|
|
||||||
|
index = kwargs.get('index')
|
||||||
|
if index is not None:
|
||||||
|
return '%03d_%s.%s' % (index, title, merged_ext)
|
||||||
|
else:
|
||||||
|
return '%s.%s' % (title, merged_ext)
|
||||||
|
|
||||||
def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False, headers = {}, **kwargs):
|
def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False, headers = {}, **kwargs):
|
||||||
assert urls
|
assert urls
|
||||||
@ -759,7 +765,7 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
title = tr(get_filename(title))
|
title = tr(get_filename(title))
|
||||||
output_filename = get_output_filename(urls, title, ext, output_dir, merge)
|
output_filename = get_output_filename(urls, title, ext, output_dir, merge, **kwargs)
|
||||||
output_filepath = os.path.join(output_dir, output_filename)
|
output_filepath = os.path.join(output_dir, output_filename)
|
||||||
|
|
||||||
if total_size:
|
if total_size:
|
||||||
@ -1173,6 +1179,8 @@ def script_main(script_name, download, download_playlist, **kwargs):
|
|||||||
|
|
||||||
short_opts = 'Vhfiuc:ndF:O:o:p:x:y:s:t:'
|
short_opts = 'Vhfiuc:ndF:O:o:p:x:y:s:t:'
|
||||||
opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-caption', 'no-merge', 'no-proxy', 'debug', 'json', 'format=', 'stream=', 'itag=', 'output-filename=', 'output-dir=', 'player=', 'http-proxy=', 'socks-proxy=', 'extractor-proxy=', 'lang=', 'timeout=']
|
opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-caption', 'no-merge', 'no-proxy', 'debug', 'json', 'format=', 'stream=', 'itag=', 'output-filename=', 'output-dir=', 'player=', 'http-proxy=', 'socks-proxy=', 'extractor-proxy=', 'lang=', 'timeout=']
|
||||||
|
EXTRA_OPTS = 'sortbyidx tofile beginidx='.split()
|
||||||
|
opts += EXTRA_OPTS
|
||||||
if download_playlist:
|
if download_playlist:
|
||||||
short_opts = 'l' + short_opts
|
short_opts = 'l' + short_opts
|
||||||
opts = ['playlist'] + opts
|
opts = ['playlist'] + opts
|
||||||
@ -1204,6 +1212,7 @@ def script_main(script_name, download, download_playlist, **kwargs):
|
|||||||
extractor_proxy = None
|
extractor_proxy = None
|
||||||
traceback = False
|
traceback = False
|
||||||
timeout = 600
|
timeout = 600
|
||||||
|
extra_opts = {}
|
||||||
for o, a in opts:
|
for o, a in opts:
|
||||||
if o in ('-V', '--version'):
|
if o in ('-V', '--version'):
|
||||||
version()
|
version()
|
||||||
@ -1282,13 +1291,17 @@ def script_main(script_name, download, download_playlist, **kwargs):
|
|||||||
elif o in ('-t', '--timeout'):
|
elif o in ('-t', '--timeout'):
|
||||||
timeout = int(a)
|
timeout = int(a)
|
||||||
else:
|
else:
|
||||||
log.e("try 'you-get --help' for more options")
|
oky = o.strip('-')
|
||||||
sys.exit(2)
|
if oky in EXTRA_OPTS or oky + '=' in EXTRA_OPTS:
|
||||||
|
extra_opts[oky] = a
|
||||||
|
else:
|
||||||
|
log.e("try 'you-get --help' for more options")
|
||||||
|
sys.exit(2)
|
||||||
if not args:
|
if not args:
|
||||||
print(help)
|
print(help)
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
||||||
if (socks_proxy):
|
if socks_proxy:
|
||||||
try:
|
try:
|
||||||
import socket
|
import socket
|
||||||
import socks
|
import socks
|
||||||
@ -1309,46 +1322,54 @@ def script_main(script_name, download, download_playlist, **kwargs):
|
|||||||
|
|
||||||
socket.setdefaulttimeout(timeout)
|
socket.setdefaulttimeout(timeout)
|
||||||
|
|
||||||
try:
|
globals()['download_main'] = functools.partial(download_main, extra_opts=extra_opts)
|
||||||
if stream_id:
|
|
||||||
if not extractor_proxy:
|
retry_max = 10
|
||||||
download_main(download, download_playlist, args, playlist, stream_id=stream_id, output_dir=output_dir, merge=merge, info_only=info_only, json_output=json_output, caption=caption)
|
for retry in range(retry_max):
|
||||||
|
try:
|
||||||
|
if stream_id:
|
||||||
|
if not extractor_proxy:
|
||||||
|
download_main(download, download_playlist, args, playlist, stream_id=stream_id, output_dir=output_dir, merge=merge, info_only=info_only, json_output=json_output, caption=caption)
|
||||||
|
else:
|
||||||
|
download_main(download, download_playlist, args, playlist, stream_id=stream_id, extractor_proxy=extractor_proxy, output_dir=output_dir, merge=merge, info_only=info_only, json_output=json_output, caption=caption)
|
||||||
else:
|
else:
|
||||||
download_main(download, download_playlist, args, playlist, stream_id=stream_id, extractor_proxy=extractor_proxy, output_dir=output_dir, merge=merge, info_only=info_only, json_output=json_output, caption=caption)
|
if not extractor_proxy:
|
||||||
else:
|
download_main(download, download_playlist, args, playlist, output_dir=output_dir, merge=merge, info_only=info_only, json_output=json_output, caption=caption)
|
||||||
if not extractor_proxy:
|
else:
|
||||||
download_main(download, download_playlist, args, playlist, output_dir=output_dir, merge=merge, info_only=info_only, json_output=json_output, caption=caption)
|
download_main(download, download_playlist, args, playlist, extractor_proxy=extractor_proxy, output_dir=output_dir, merge=merge, info_only=info_only, json_output=json_output, caption=caption)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
if traceback:
|
||||||
|
raise
|
||||||
else:
|
else:
|
||||||
download_main(download, download_playlist, args, playlist, extractor_proxy=extractor_proxy, output_dir=output_dir, merge=merge, info_only=info_only, json_output=json_output, caption=caption)
|
sys.exit(1)
|
||||||
except KeyboardInterrupt:
|
except UnicodeEncodeError:
|
||||||
if traceback:
|
log.e('[error] oops, the current environment does not seem to support Unicode.')
|
||||||
raise
|
log.e('please set it to a UTF-8-aware locale first,')
|
||||||
|
log.e('so as to save the video (with some Unicode characters) correctly.')
|
||||||
|
log.e('you can do it like this:')
|
||||||
|
log.e(' (Windows) % chcp 65001 ')
|
||||||
|
log.e(' (Linux) $ LC_CTYPE=en_US.UTF-8')
|
||||||
|
# sys.exit(1)
|
||||||
|
except Exception:
|
||||||
|
if not traceback:
|
||||||
|
log.e('[error] oops, something went wrong.')
|
||||||
|
log.e('don\'t panic, c\'est la vie. please try the following steps:')
|
||||||
|
log.e(' (1) Rule out any network problem.')
|
||||||
|
log.e(' (2) Make sure you-get is up-to-date.')
|
||||||
|
log.e(' (3) Check if the issue is already known, on')
|
||||||
|
log.e(' https://github.com/soimort/you-get/wiki/Known-Bugs')
|
||||||
|
log.e(' https://github.com/soimort/you-get/issues')
|
||||||
|
log.e(' (4) Run the command with \'--debug\' option,')
|
||||||
|
log.e(' and report this issue with the full output.')
|
||||||
|
else:
|
||||||
|
version()
|
||||||
|
log.i(args)
|
||||||
|
import traceback
|
||||||
|
log.w(traceback.format_exc())
|
||||||
|
# raise
|
||||||
|
# sys.exit(1)
|
||||||
else:
|
else:
|
||||||
sys.exit(1)
|
break
|
||||||
except UnicodeEncodeError:
|
|
||||||
log.e('[error] oops, the current environment does not seem to support Unicode.')
|
|
||||||
log.e('please set it to a UTF-8-aware locale first,')
|
|
||||||
log.e('so as to save the video (with some Unicode characters) correctly.')
|
|
||||||
log.e('you can do it like this:')
|
|
||||||
log.e(' (Windows) % chcp 65001 ')
|
|
||||||
log.e(' (Linux) $ LC_CTYPE=en_US.UTF-8')
|
|
||||||
sys.exit(1)
|
|
||||||
except Exception:
|
|
||||||
if not traceback:
|
|
||||||
log.e('[error] oops, something went wrong.')
|
|
||||||
log.e('don\'t panic, c\'est la vie. please try the following steps:')
|
|
||||||
log.e(' (1) Rule out any network problem.')
|
|
||||||
log.e(' (2) Make sure you-get is up-to-date.')
|
|
||||||
log.e(' (3) Check if the issue is already known, on')
|
|
||||||
log.e(' https://github.com/soimort/you-get/wiki/Known-Bugs')
|
|
||||||
log.e(' https://github.com/soimort/you-get/issues')
|
|
||||||
log.e(' (4) Run the command with \'--debug\' option,')
|
|
||||||
log.e(' and report this issue with the full output.')
|
|
||||||
else:
|
|
||||||
version()
|
|
||||||
log.i(args)
|
|
||||||
raise
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
def google_search(url):
|
def google_search(url):
|
||||||
keywords = r1(r'https?://(.*)', url)
|
keywords = r1(r'https?://(.*)', url)
|
||||||
|
@ -20,6 +20,7 @@ class Extractor():
|
|||||||
class VideoExtractor():
|
class VideoExtractor():
|
||||||
def __init__(self, *args):
|
def __init__(self, *args):
|
||||||
self.url = None
|
self.url = None
|
||||||
|
self.index = 0
|
||||||
self.title = None
|
self.title = None
|
||||||
self.vid = None
|
self.vid = None
|
||||||
self.streams = {}
|
self.streams = {}
|
||||||
@ -47,6 +48,8 @@ class VideoExtractor():
|
|||||||
except:
|
except:
|
||||||
self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams]
|
self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams]
|
||||||
|
|
||||||
|
self.sort_streams(**kwargs)
|
||||||
|
|
||||||
self.extract(**kwargs)
|
self.extract(**kwargs)
|
||||||
|
|
||||||
self.download(**kwargs)
|
self.download(**kwargs)
|
||||||
@ -74,6 +77,10 @@ class VideoExtractor():
|
|||||||
pass
|
pass
|
||||||
#raise NotImplementedError()
|
#raise NotImplementedError()
|
||||||
|
|
||||||
|
def sort_streams(self, **kwargs):
|
||||||
|
pass
|
||||||
|
# raise NotImplementedError()
|
||||||
|
|
||||||
def extract(self, **kwargs):
|
def extract(self, **kwargs):
|
||||||
pass
|
pass
|
||||||
#raise NotImplementedError()
|
#raise NotImplementedError()
|
||||||
@ -159,8 +166,14 @@ class VideoExtractor():
|
|||||||
print("videos:")
|
print("videos:")
|
||||||
|
|
||||||
def download(self, **kwargs):
|
def download(self, **kwargs):
|
||||||
|
if not self.streams_sorted:
|
||||||
|
# No stream is available
|
||||||
|
return
|
||||||
|
|
||||||
|
if 'index' in kwargs:
|
||||||
|
self.index = int(kwargs.get('index'))
|
||||||
if 'json_output' in kwargs and kwargs['json_output']:
|
if 'json_output' in kwargs and kwargs['json_output']:
|
||||||
json_output.output(self)
|
json_output.output(self, tofile='tofile' in kwargs.get('extra_opts'))
|
||||||
elif 'info_only' in kwargs and kwargs['info_only']:
|
elif 'info_only' in kwargs and kwargs['info_only']:
|
||||||
if 'stream_id' in kwargs and kwargs['stream_id']:
|
if 'stream_id' in kwargs and kwargs['stream_id']:
|
||||||
# Display the stream
|
# Display the stream
|
||||||
@ -205,7 +218,8 @@ class VideoExtractor():
|
|||||||
download_urls(urls, self.title, ext, total_size,
|
download_urls(urls, self.title, ext, total_size,
|
||||||
output_dir=kwargs['output_dir'],
|
output_dir=kwargs['output_dir'],
|
||||||
merge=kwargs['merge'],
|
merge=kwargs['merge'],
|
||||||
av=stream_id in self.dash_streams)
|
av=stream_id in self.dash_streams,
|
||||||
|
index=self.index)
|
||||||
if 'caption' not in kwargs or not kwargs['caption']:
|
if 'caption' not in kwargs or not kwargs['caption']:
|
||||||
print('Skipping captions.')
|
print('Skipping captions.')
|
||||||
return
|
return
|
||||||
|
@ -36,6 +36,8 @@ class YouTube(VideoExtractor):
|
|||||||
{'itag': '17', 'container': '3GP', 'video_resolution': '144p', 'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.05', 'audio_encoding': 'AAC', 'audio_bitrate': '24'},
|
{'itag': '17', 'container': '3GP', 'video_resolution': '144p', 'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.05', 'audio_encoding': 'AAC', 'audio_bitrate': '24'},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
PREFER_FORMAT = 'mp4_hd720 mp4_medium'.split()
|
||||||
|
|
||||||
def decipher(js, s):
|
def decipher(js, s):
|
||||||
def tr_js(code):
|
def tr_js(code):
|
||||||
code = re.sub(r'function', r'def', code)
|
code = re.sub(r'function', r'def', code)
|
||||||
@ -125,9 +127,21 @@ class YouTube(VideoExtractor):
|
|||||||
|
|
||||||
self.title = re.search(r'<meta name="title" content="([^"]+)"', video_page).group(1)
|
self.title = re.search(r'<meta name="title" content="([^"]+)"', video_page).group(1)
|
||||||
self.p_playlist()
|
self.p_playlist()
|
||||||
|
video_list = []
|
||||||
for video in videos:
|
for video in videos:
|
||||||
vid = parse_query_param(video, 'v')
|
vid = parse_query_param(video, 'v')
|
||||||
index = parse_query_param(video, 'index')
|
index = parse_query_param(video, 'index')
|
||||||
|
video_list.append((vid, index))
|
||||||
|
|
||||||
|
if 'sortbyidx' in kwargs.get('extra_opts'):
|
||||||
|
video_list.sort(key=lambda x: int(x[1]))
|
||||||
|
|
||||||
|
beginidx = int(kwargs.get('extra_opts').get('beginidx', 0))
|
||||||
|
|
||||||
|
for i, (vid, index) in enumerate(video_list):
|
||||||
|
if i < beginidx - 1:
|
||||||
|
continue
|
||||||
|
log.i('%d) index=%s, vid=%s' % (i+1, index, vid))
|
||||||
self.__class__().download_by_url(self.__class__.get_url_from_vid(vid), index=index, **kwargs)
|
self.__class__().download_by_url(self.__class__.get_url_from_vid(vid), index=index, **kwargs)
|
||||||
|
|
||||||
def prepare(self, **kwargs):
|
def prepare(self, **kwargs):
|
||||||
@ -201,13 +215,18 @@ class YouTube(VideoExtractor):
|
|||||||
#stream_list = []
|
#stream_list = []
|
||||||
|
|
||||||
elif video_info['errorcode'] == ['100']:
|
elif video_info['errorcode'] == ['100']:
|
||||||
log.wtf('[Failed] This video does not exist.', exit_code=int(video_info['errorcode'][0]))
|
# log.wtf('[Failed] This video does not exist.', exit_code=int(video_info['errorcode'][0]))
|
||||||
|
log.w('[Failed] This video does not exist.')
|
||||||
|
return
|
||||||
else:
|
else:
|
||||||
log.wtf('[Failed] %s' % video_info['reason'][0], exit_code=int(video_info['errorcode'][0]))
|
# log.wtf('[Failed] %s' % video_info['reason'][0], exit_code=int(video_info['errorcode'][0]))
|
||||||
|
log.w('[Failed] %s' % video_info['reason'][0])
|
||||||
|
return
|
||||||
|
|
||||||
else:
|
else:
|
||||||
log.wtf('[Failed] Invalid status.')
|
# log.wtf('[Failed] Invalid status.')
|
||||||
|
log.w('[Failed] Invalid status.')
|
||||||
|
return
|
||||||
|
|
||||||
for stream in stream_list:
|
for stream in stream_list:
|
||||||
metadata = parse.parse_qs(stream)
|
metadata = parse.parse_qs(stream)
|
||||||
@ -379,6 +398,18 @@ class YouTube(VideoExtractor):
|
|||||||
'size': int(dash_size) + int(dash_webm_a_size)
|
'size': int(dash_size) + int(dash_webm_a_size)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def sort_streams(self, **kwargs):
|
||||||
|
streams_sorted = []
|
||||||
|
for itag, stream in self.streams.copy().items():
|
||||||
|
stream['itag'] = itag
|
||||||
|
taginfo = '%s_%s' % (stream['container'], stream['quality'])
|
||||||
|
stream['taginfo'] = taginfo
|
||||||
|
streams_sorted.append(stream)
|
||||||
|
|
||||||
|
candidate_steams = filter(lambda x: x['taginfo'] in self.PREFER_FORMAT, streams_sorted)
|
||||||
|
self.streams_sorted = sorted(candidate_steams, key=lambda x: self.PREFER_FORMAT.index(x['taginfo']))
|
||||||
|
|
||||||
|
|
||||||
def extract(self, **kwargs):
|
def extract(self, **kwargs):
|
||||||
if not self.streams_sorted:
|
if not self.streams_sorted:
|
||||||
# No stream is available
|
# No stream is available
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
|
import os
|
||||||
import json
|
import json
|
||||||
|
|
||||||
# save info from common.print_info()
|
# save info from common.print_info()
|
||||||
last_info = None
|
last_info = None
|
||||||
|
|
||||||
def output(video_extractor, pretty_print=True):
|
def output(video_extractor, pretty_print=True, tofile=False):
|
||||||
ve = video_extractor
|
ve = video_extractor
|
||||||
out = {}
|
out = {}
|
||||||
out['url'] = ve.url
|
out['url'] = ve.url
|
||||||
@ -12,9 +12,20 @@ def output(video_extractor, pretty_print=True):
|
|||||||
out['site'] = ve.name
|
out['site'] = ve.name
|
||||||
out['streams'] = ve.streams
|
out['streams'] = ve.streams
|
||||||
if pretty_print:
|
if pretty_print:
|
||||||
print(json.dumps(out, indent=4, sort_keys=True, ensure_ascii=False))
|
json_content = json.dumps(out, indent=4, sort_keys=True, ensure_ascii=False)
|
||||||
else:
|
else:
|
||||||
print(json.dumps(out))
|
json_content = json.dumps(out)
|
||||||
|
if tofile:
|
||||||
|
jsondir = 'json'
|
||||||
|
if not os.path.exists(jsondir):
|
||||||
|
os.mkdir(jsondir)
|
||||||
|
filename = '%s/%03d_%s.json' % (jsondir, ve.index, ve.title)
|
||||||
|
f = open(filename, 'wb')
|
||||||
|
_output = f.write
|
||||||
|
json_content = json_content.encode('utf8')
|
||||||
|
else:
|
||||||
|
_output = print
|
||||||
|
_output(json_content)
|
||||||
|
|
||||||
# a fake VideoExtractor object to save info
|
# a fake VideoExtractor object to save info
|
||||||
class VideoExtractor(object):
|
class VideoExtractor(object):
|
||||||
|
Loading…
Reference in New Issue
Block a user