you-get/src/you_get/extractor.py
2015-10-18 01:35:54 +02:00

213 lines
8.1 KiB
Python

#!/usr/bin/env python
from .common import match1, download_urls, get_filename, parse_host, set_proxy, unset_proxy
from .util import log
from . import json_output
import os
class Extractor():
def __init__(self, *args):
self.url = None
self.title = None
self.vid = None
self.streams = {}
self.streams_sorted = []
if args:
self.url = args[0]
class VideoExtractor():
def __init__(self, *args):
self.url = None
self.title = None
self.vid = None
self.streams = {}
self.streams_sorted = []
self.audiolang = None
self.password_protected = False
self.dash_streams = {}
self.caption_tracks = {}
if args:
self.url = args[0]
def download_by_url(self, url, **kwargs):
self.url = url
if 'extractor_proxy' in kwargs and kwargs['extractor_proxy']:
set_proxy(parse_host(kwargs['extractor_proxy']))
self.prepare(**kwargs)
if 'extractor_proxy' in kwargs and kwargs['extractor_proxy']:
unset_proxy()
try:
self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams]
except:
self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams]
self.extract(**kwargs)
self.download(**kwargs)
def download_by_vid(self, vid, **kwargs):
self.vid = vid
if 'extractor_proxy' in kwargs and kwargs['extractor_proxy']:
set_proxy(parse_host(kwargs['extractor_proxy']))
self.prepare(**kwargs)
if 'extractor_proxy' in kwargs and kwargs['extractor_proxy']:
unset_proxy()
try:
self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams]
except:
self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams]
self.extract(**kwargs)
self.download(**kwargs)
def prepare(self, **kwargs):
pass
#raise NotImplementedError()
def extract(self, **kwargs):
pass
#raise NotImplementedError()
def p_stream(self, stream_id):
if stream_id in self.streams:
stream = self.streams[stream_id]
else:
stream = self.dash_streams[stream_id]
if 'itag' in stream:
print(" - itag: %s" % log.sprint(stream_id, log.NEGATIVE))
else:
print(" - format: %s" % log.sprint(stream_id, log.NEGATIVE))
if 'container' in stream:
print(" container: %s" % stream['container'])
if 'video_profile' in stream:
print(" video-profile: %s" % stream['video_profile'])
if 'quality' in stream:
print(" quality: %s" % stream['quality'])
if 'size' in stream:
print(" size: %s MiB (%s bytes)" % (round(stream['size'] / 1048576, 1), stream['size']))
if 'itag' in stream:
print(" # download-with: %s" % log.sprint("you-get --itag=%s [URL]" % stream_id, log.UNDERLINE))
else:
print(" # download-with: %s" % log.sprint("you-get --format=%s [URL]" % stream_id, log.UNDERLINE))
print()
def p_i(self, stream_id):
stream = self.streams[stream_id]
print(" - title: %s" % self.title)
print(" size: %s MiB (%s bytes)" % (round(stream['size'] / 1048576, 1), stream['size']))
print(" url: %s" % self.url)
print()
def p(self, stream_id=None):
print("site: %s" % self.__class__.name)
print("title: %s" % self.title)
if stream_id:
# Print the stream
print("stream:")
self.p_stream(stream_id)
elif stream_id is None:
# Print stream with best quality
print("stream: # Best quality")
stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag']
self.p_stream(stream_id)
elif stream_id == []:
print("streams: # Available quality and codecs")
# Print DASH streams
if self.dash_streams:
print(" [ DASH ] %s" % ('_' * 36))
for stream in self.dash_streams:
self.p_stream(stream)
# Print all other available streams
print(" [ DEFAULT ] %s" % ('_' * 33))
for stream in self.streams_sorted:
self.p_stream(stream['id'] if 'id' in stream else stream['itag'])
if self.audiolang:
print("audio-languages:")
for i in self.audiolang:
print(" - lang: {}".format(i['lang']))
print(" download-url: {}\n".format(i['url']))
def p_playlist(self, stream_id=None):
print("site: %s" % self.__class__.name)
print("playlist: %s" % self.title)
print("videos:")
def download(self, **kwargs):
if 'json_output' in kwargs and kwargs['json_output']:
json_output.output(self)
elif 'info_only' in kwargs and kwargs['info_only']:
if 'stream_id' in kwargs and kwargs['stream_id']:
# Display the stream
stream_id = kwargs['stream_id']
if 'index' not in kwargs:
self.p(stream_id)
else:
self.p_i(stream_id)
else:
# Display all available streams
if 'index' not in kwargs:
self.p([])
else:
stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag']
self.p_i(stream_id)
else:
if 'stream_id' in kwargs and kwargs['stream_id']:
# Download the stream
stream_id = kwargs['stream_id']
else:
# Download stream with the best quality
stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag']
if 'index' not in kwargs:
self.p(stream_id)
else:
self.p_i(stream_id)
if stream_id in self.streams:
urls = self.streams[stream_id]['src']
ext = self.streams[stream_id]['container']
total_size = self.streams[stream_id]['size']
else:
urls = self.dash_streams[stream_id]['src']
ext = self.dash_streams[stream_id]['container']
total_size = self.dash_streams[stream_id]['size']
if not urls:
log.wtf('[Failed] Cannot extract video source.')
# For legacy main()
download_urls(urls, self.title, ext, total_size,
output_dir=kwargs['output_dir'],
merge=kwargs['merge'],
av=stream_id in self.dash_streams)
for lang in self.caption_tracks:
filename = '%s.%s.srt' % (get_filename(self.title), lang)
print('Saving %s ...' % filename, end="", flush=True)
srt = self.caption_tracks[lang]
with open(os.path.join(kwargs['output_dir'], filename),
'w', encoding='utf-8') as x:
x.write(srt)
print('Done.')
# For main_dev()
#download_urls(urls, self.title, self.streams[stream_id]['container'], self.streams[stream_id]['size'])
self.__init__()