支持一些新的参数:sortbyidx tofile beginidx

This commit is contained in:
DDGG 2017-06-09 02:16:52 +08:00
parent 858435d503
commit 75f7eca1b1
4 changed files with 130 additions and 53 deletions

View File

@ -100,6 +100,7 @@ import locale
import logging
import os
import platform
import functools
import re
import socket
import sys
@ -715,7 +716,7 @@ class DummyProgressBar:
def done(self):
pass
def get_output_filename(urls, title, ext, output_dir, merge):
def get_output_filename(urls, title, ext, output_dir, merge, **kwargs):
# lame hack for the --output-filename option
global output_filename
if output_filename: return output_filename
@ -735,6 +736,11 @@ def get_output_filename(urls, title, ext, output_dir, merge):
merged_ext = 'mkv'
else:
merged_ext = 'ts'
index = kwargs.get('index')
if index is not None:
return '%03d_%s.%s' % (index, title, merged_ext)
else:
return '%s.%s' % (title, merged_ext)
def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False, headers = {}, **kwargs):
@ -759,7 +765,7 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg
pass
title = tr(get_filename(title))
output_filename = get_output_filename(urls, title, ext, output_dir, merge)
output_filename = get_output_filename(urls, title, ext, output_dir, merge, **kwargs)
output_filepath = os.path.join(output_dir, output_filename)
if total_size:
@ -1173,6 +1179,8 @@ def script_main(script_name, download, download_playlist, **kwargs):
short_opts = 'Vhfiuc:ndF:O:o:p:x:y:s:t:'
opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-caption', 'no-merge', 'no-proxy', 'debug', 'json', 'format=', 'stream=', 'itag=', 'output-filename=', 'output-dir=', 'player=', 'http-proxy=', 'socks-proxy=', 'extractor-proxy=', 'lang=', 'timeout=']
EXTRA_OPTS = 'sortbyidx tofile beginidx='.split()
opts += EXTRA_OPTS
if download_playlist:
short_opts = 'l' + short_opts
opts = ['playlist'] + opts
@ -1204,6 +1212,7 @@ def script_main(script_name, download, download_playlist, **kwargs):
extractor_proxy = None
traceback = False
timeout = 600
extra_opts = {}
for o, a in opts:
if o in ('-V', '--version'):
version()
@ -1281,6 +1290,10 @@ def script_main(script_name, download, download_playlist, **kwargs):
lang = a
elif o in ('-t', '--timeout'):
timeout = int(a)
else:
oky = o.strip('-')
if oky in EXTRA_OPTS or oky + '=' in EXTRA_OPTS:
extra_opts[oky] = a
else:
log.e("try 'you-get --help' for more options")
sys.exit(2)
@ -1288,7 +1301,7 @@ def script_main(script_name, download, download_playlist, **kwargs):
print(help)
sys.exit()
if (socks_proxy):
if socks_proxy:
try:
import socket
import socks
@ -1309,6 +1322,10 @@ def script_main(script_name, download, download_playlist, **kwargs):
socket.setdefaulttimeout(timeout)
globals()['download_main'] = functools.partial(download_main, extra_opts=extra_opts)
retry_max = 10
for retry in range(retry_max):
try:
if stream_id:
if not extractor_proxy:
@ -1332,7 +1349,7 @@ def script_main(script_name, download, download_playlist, **kwargs):
log.e('you can do it like this:')
log.e(' (Windows) % chcp 65001 ')
log.e(' (Linux) $ LC_CTYPE=en_US.UTF-8')
sys.exit(1)
# sys.exit(1)
except Exception:
if not traceback:
log.e('[error] oops, something went wrong.')
@ -1347,8 +1364,12 @@ def script_main(script_name, download, download_playlist, **kwargs):
else:
version()
log.i(args)
raise
sys.exit(1)
import traceback
log.w(traceback.format_exc())
# raise
# sys.exit(1)
else:
break
def google_search(url):
keywords = r1(r'https?://(.*)', url)

View File

@ -20,6 +20,7 @@ class Extractor():
class VideoExtractor():
def __init__(self, *args):
self.url = None
self.index = 0
self.title = None
self.vid = None
self.streams = {}
@ -47,6 +48,8 @@ class VideoExtractor():
except:
self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams]
self.sort_streams(**kwargs)
self.extract(**kwargs)
self.download(**kwargs)
@ -74,6 +77,10 @@ class VideoExtractor():
pass
#raise NotImplementedError()
def sort_streams(self, **kwargs):
pass
# raise NotImplementedError()
def extract(self, **kwargs):
pass
#raise NotImplementedError()
@ -159,8 +166,14 @@ class VideoExtractor():
print("videos:")
def download(self, **kwargs):
if not self.streams_sorted:
# No stream is available
return
if 'index' in kwargs:
self.index = int(kwargs.get('index'))
if 'json_output' in kwargs and kwargs['json_output']:
json_output.output(self)
json_output.output(self, tofile='tofile' in kwargs.get('extra_opts'))
elif 'info_only' in kwargs and kwargs['info_only']:
if 'stream_id' in kwargs and kwargs['stream_id']:
# Display the stream
@ -205,7 +218,8 @@ class VideoExtractor():
download_urls(urls, self.title, ext, total_size,
output_dir=kwargs['output_dir'],
merge=kwargs['merge'],
av=stream_id in self.dash_streams)
av=stream_id in self.dash_streams,
index=self.index)
if 'caption' not in kwargs or not kwargs['caption']:
print('Skipping captions.')
return

View File

@ -36,6 +36,8 @@ class YouTube(VideoExtractor):
{'itag': '17', 'container': '3GP', 'video_resolution': '144p', 'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.05', 'audio_encoding': 'AAC', 'audio_bitrate': '24'},
]
PREFER_FORMAT = 'mp4_hd720 mp4_medium'.split()
def decipher(js, s):
def tr_js(code):
code = re.sub(r'function', r'def', code)
@ -125,9 +127,21 @@ class YouTube(VideoExtractor):
self.title = re.search(r'<meta name="title" content="([^"]+)"', video_page).group(1)
self.p_playlist()
video_list = []
for video in videos:
vid = parse_query_param(video, 'v')
index = parse_query_param(video, 'index')
video_list.append((vid, index))
if 'sortbyidx' in kwargs.get('extra_opts'):
video_list.sort(key=lambda x: int(x[1]))
beginidx = int(kwargs.get('extra_opts').get('beginidx', 0))
for i, (vid, index) in enumerate(video_list):
if i < beginidx - 1:
continue
log.i('%d) index=%s, vid=%s' % (i+1, index, vid))
self.__class__().download_by_url(self.__class__.get_url_from_vid(vid), index=index, **kwargs)
def prepare(self, **kwargs):
@ -201,13 +215,18 @@ class YouTube(VideoExtractor):
#stream_list = []
elif video_info['errorcode'] == ['100']:
log.wtf('[Failed] This video does not exist.', exit_code=int(video_info['errorcode'][0]))
# log.wtf('[Failed] This video does not exist.', exit_code=int(video_info['errorcode'][0]))
log.w('[Failed] This video does not exist.')
return
else:
# log.wtf('[Failed] %s' % video_info['reason'][0], exit_code=int(video_info['errorcode'][0]))
log.w('[Failed] %s' % video_info['reason'][0])
return
else:
log.wtf('[Failed] %s' % video_info['reason'][0], exit_code=int(video_info['errorcode'][0]))
else:
log.wtf('[Failed] Invalid status.')
# log.wtf('[Failed] Invalid status.')
log.w('[Failed] Invalid status.')
return
for stream in stream_list:
metadata = parse.parse_qs(stream)
@ -379,6 +398,18 @@ class YouTube(VideoExtractor):
'size': int(dash_size) + int(dash_webm_a_size)
}
def sort_streams(self, **kwargs):
streams_sorted = []
for itag, stream in self.streams.copy().items():
stream['itag'] = itag
taginfo = '%s_%s' % (stream['container'], stream['quality'])
stream['taginfo'] = taginfo
streams_sorted.append(stream)
candidate_steams = filter(lambda x: x['taginfo'] in self.PREFER_FORMAT, streams_sorted)
self.streams_sorted = sorted(candidate_steams, key=lambda x: self.PREFER_FORMAT.index(x['taginfo']))
def extract(self, **kwargs):
if not self.streams_sorted:
# No stream is available

View File

@ -1,10 +1,10 @@
import os
import json
# save info from common.print_info()
last_info = None
def output(video_extractor, pretty_print=True):
def output(video_extractor, pretty_print=True, tofile=False):
ve = video_extractor
out = {}
out['url'] = ve.url
@ -12,9 +12,20 @@ def output(video_extractor, pretty_print=True):
out['site'] = ve.name
out['streams'] = ve.streams
if pretty_print:
print(json.dumps(out, indent=4, sort_keys=True, ensure_ascii=False))
json_content = json.dumps(out, indent=4, sort_keys=True, ensure_ascii=False)
else:
print(json.dumps(out))
json_content = json.dumps(out)
if tofile:
jsondir = 'json'
if not os.path.exists(jsondir):
os.mkdir(jsondir)
filename = '%s/%03d_%s.json' % (jsondir, ve.index, ve.title)
f = open(filename, 'wb')
_output = f.write
json_content = json_content.encode('utf8')
else:
_output = print
_output(json_content)
# a fake VideoExtractor object to save info
class VideoExtractor(object):