tightening up code

This commit is contained in:
cclauss 2016-04-05 12:24:43 +02:00
parent 53cb97def5
commit 546457dea2

View File

@ -123,10 +123,8 @@ fake_headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:13.0) Gecko/20100101 Firefox/13.0' 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:13.0) Gecko/20100101 Firefox/13.0'
} }
if sys.stdout.isatty():
default_encoding = sys.stdout.encoding.lower() default_encoding = (sys.stdout.encoding if sys.stdout.isatty() else locale.getpreferredencoding()).lower()
else:
default_encoding = locale.getpreferredencoding().lower()
def maybe_print(*s): def maybe_print(*s):
try: print(*s) try: print(*s)
@ -167,10 +165,7 @@ def match1(text, *patterns):
if len(patterns) == 1: if len(patterns) == 1:
pattern = patterns[0] pattern = patterns[0]
match = re.search(pattern, text) match = re.search(pattern, text)
if match: return match.group(1) if match else None
return match.group(1)
else:
return None
else: else:
ret = [] ret = []
for pattern in patterns: for pattern in patterns:
@ -190,12 +185,7 @@ def matchall(text, patterns):
a list if matched. empty if not. a list if matched. empty if not.
""" """
ret = [] return [re.findall(pattern, text) for pattern in patterns]
for pattern in patterns:
match = re.findall(pattern, text)
ret += match
return ret
def launch_player(player, urls): def launch_player(player, urls):
import subprocess import subprocess
@ -223,20 +213,14 @@ def unicodize(text):
# DEPRECATED in favor of util.legitimize() # DEPRECATED in favor of util.legitimize()
def escape_file_path(path): def escape_file_path(path):
path = path.replace('/', '-') return path.replace('/', '-').replace('\\', '-').replace('*', '-').replace('?', '-')
path = path.replace('\\', '-')
path = path.replace('*', '-')
path = path.replace('?', '-')
return path
def ungzip(data): def ungzip(data):
"""Decompresses data for Content-Encoding: gzip. """Decompresses data for Content-Encoding: gzip.
""" """
from io import BytesIO from io import BytesIO
import gzip import gzip
buffer = BytesIO(data) return gzip.GzipFile(fileobj=BytesIO(data)).read()
f = gzip.GzipFile(fileobj=buffer)
return f.read()
def undeflate(data): def undeflate(data):
"""Decompresses data for Content-Encoding: deflate. """Decompresses data for Content-Encoding: deflate.
@ -259,27 +243,23 @@ def get_response(url, faker = False):
response = request.urlopen(url) response = request.urlopen(url)
data = response.read() data = response.read()
if response.info().get('Content-Encoding') == 'gzip': content_encoding = response.info().get('Content-Encoding')
data = ungzip(data) func = {'gzip': ungzip, 'deflate': undeflate}.get(content_encoding, None)
elif response.info().get('Content-Encoding') == 'deflate': if func:
data = undeflate(data) data = func(data)
response.data = data response.data = data
return response return response
# DEPRECATED in favor of get_content() # DEPRECATED in favor of get_content()
def get_html(url, encoding = None, faker = False): def get_html(url, encoding = None, faker = False):
content = get_response(url, faker).data return str(get_response(url, faker).data, 'utf-8', 'ignore')
return str(content, 'utf-8', 'ignore')
# DEPRECATED in favor of get_content() # DEPRECATED in favor of get_content()
def get_decoded_html(url, faker = False): def get_decoded_html(url, faker = False):
response = get_response(url, faker) response = get_response(url, faker)
data = response.data data = response.data
charset = r1(r'charset=([\w-]+)', response.headers['content-type']) charset = r1(r'charset=([\w-]+)', response.headers['content-type'])
if charset: return data.decode(charset, 'ignore') if charset else data
return data.decode(charset, 'ignore')
else:
return data
def get_location(url): def get_location(url):
response = request.urlopen(url) response = request.urlopen(url)
@ -310,26 +290,21 @@ def get_content(url, headers={}, decoded=True):
# Handle HTTP compression for gzip and deflate (zlib) # Handle HTTP compression for gzip and deflate (zlib)
content_encoding = response.getheader('Content-Encoding') content_encoding = response.getheader('Content-Encoding')
if content_encoding == 'gzip': func = {'gzip': ungzip, 'deflate': undeflate}.get(content_encoding, None)
data = ungzip(data) if func:
elif content_encoding == 'deflate': data = func(data)
data = undeflate(data)
# Decode the response body # Decode the response body
if decoded: if decoded:
charset = match1(response.getheader('Content-Type'), r'charset=([\w-]+)') charset = match1(response.getheader('Content-Type'), r'charset=([\w-]+)')
if charset is not None: data = data.decode(charset or 'utf-8')
data = data.decode(charset)
else:
data = data.decode('utf-8')
return data return data
def url_size(url, faker = False, headers = {}): def url_size(url, faker = False, headers = {}):
if faker: headers = fake_headers if faker else headers
response = request.urlopen(request.Request(url, headers = fake_headers), None) if headers:
elif headers: response = request.urlopen(request.Request(url, headers=headers), None)
response = request.urlopen(request.Request(url, headers = headers), None)
else: else:
response = request.urlopen(url) response = request.urlopen(url)
@ -340,18 +315,14 @@ def urls_size(urls, faker = False, headers = {}):
return sum([url_size(url, faker=faker, headers=headers) for url in urls]) return sum([url_size(url, faker=faker, headers=headers) for url in urls])
def get_head(url, headers = {}): def get_head(url, headers = {}):
if headers: req = request.Request(url, headers = headers) if headers else request.Request(url)
req = request.Request(url, headers = headers)
else:
req = request.Request(url)
req.get_method = lambda : 'HEAD' req.get_method = lambda : 'HEAD'
res = request.urlopen(req) res = request.urlopen(req)
return dict(res.headers) return dict(res.headers)
def url_info(url, faker = False, headers = {}): def url_info(url, faker = False, headers = {}):
if faker: headers = fake_headers if faker else headers
response = request.urlopen(request.Request(url, headers = fake_headers), None) if headers:
elif headers:
response = request.urlopen(request.Request(url, headers = headers), None) response = request.urlopen(request.Request(url, headers = headers), None)
else: else:
response = request.urlopen(request.Request(url)) response = request.urlopen(request.Request(url))
@ -359,7 +330,8 @@ def url_info(url, faker = False, headers = {}):
headers = response.headers headers = response.headers
type = headers['content-type'] type = headers['content-type']
if type == 'image/jpg; charset=UTF-8' or type == 'image/jpg' : type = 'audio/mpeg' #fix for netease if type in ('image/jpg; charset=UTF-8', 'image/jpg'):
type = 'audio/mpeg' #fix for netease
mapping = { mapping = {
'video/3gpp': '3gp', 'video/3gpp': '3gp',
'video/f4v': 'flv', 'video/f4v': 'flv',
@ -376,9 +348,8 @@ def url_info(url, faker = False, headers = {}):
'image/gif': 'gif', 'image/gif': 'gif',
'application/pdf': 'pdf', 'application/pdf': 'pdf',
} }
if type in mapping: ext = mapping.get(type, None)
ext = mapping[type] if not ext:
else:
type = None type = None
if headers['content-disposition']: if headers['content-disposition']:
try: try:
@ -400,15 +371,13 @@ def url_info(url, faker = False, headers = {}):
return type, ext, size return type, ext, size
def url_locations(urls, faker = False, headers = {}): def url_locations(urls, faker = False, headers = {}):
headers = fake_headers if faker else headers
locations = [] locations = []
for url in urls: for url in urls:
if faker: if headers:
response = request.urlopen(request.Request(url, headers = fake_headers), None)
elif headers:
response = request.urlopen(request.Request(url, headers = headers), None) response = request.urlopen(request.Request(url, headers = headers), None)
else: else:
response = request.urlopen(request.Request(url)) response = request.urlopen(request.Request(url))
locations.append(response.url) locations.append(response.url)
return locations return locations
@ -446,12 +415,7 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False, h
open_mode = 'wb' open_mode = 'wb'
if received < file_size: if received < file_size:
if faker: headers = fake_headers if faker else (headers or {})
headers = fake_headers
elif headers:
headers = headers
else:
headers = {}
if received: if received:
headers['Range'] = 'bytes=' + str(received) + '-' headers['Range'] = 'bytes=' + str(received) + '-'
if refer: if refer:
@ -523,12 +487,7 @@ def url_save_chunked(url, filepath, bar, refer = None, is_part = False, faker =
else: else:
open_mode = 'wb' open_mode = 'wb'
if faker: headers = fake_headers if faker else (headers or {})
headers = fake_headers
elif headers:
headers = headers
else:
headers = {}
if received: if received:
headers['Range'] = 'bytes=' + str(received) + '-' headers['Range'] = 'bytes=' + str(received) + '-'
if refer: if refer:
@ -576,8 +535,7 @@ class SimpleProgressBar:
self.displayed = True self.displayed = True
bar_size = self.bar_size bar_size = self.bar_size
percent = round(self.received * 100 / self.total_size, 1) percent = round(self.received * 100 / self.total_size, 1)
if percent >= 100: percent = max(percent, 100)
percent = 100
dots = bar_size * int(percent) // 100 dots = bar_size * int(percent) // 100
plus = int(percent) - dots // bar_size * 100 plus = int(percent) - dots // bar_size * 100
if plus > 0.8: if plus > 0.8:
@ -659,17 +617,11 @@ def get_output_filename(urls, title, ext, output_dir, merge):
if (len(urls) > 1) and merge: if (len(urls) > 1) and merge:
from .processor.ffmpeg import has_ffmpeg_installed from .processor.ffmpeg import has_ffmpeg_installed
if ext in ['flv', 'f4v']: if ext in ['flv', 'f4v']:
if has_ffmpeg_installed(): merged_ext = 'mp4' if has_ffmpeg_installed() else 'flv'
merged_ext = 'mp4'
else:
merged_ext = 'flv'
elif ext == 'mp4': elif ext == 'mp4':
merged_ext = 'mp4' merged_ext = 'mp4'
elif ext == 'ts': elif ext == 'ts':
if has_ffmpeg_installed(): merged_ext = 'mkv' if has_ffmpeg_installed() else 'ts'
merged_ext = 'mkv'
else:
merged_ext = 'ts'
return '%s.%s' % (title, merged_ext) return '%s.%s' % (title, merged_ext)
def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False, headers = {}, **kwargs): def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False, headers = {}, **kwargs):
@ -699,8 +651,7 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg
if total_size: if total_size:
if not force and os.path.exists(output_filepath) and os.path.getsize(output_filepath) >= total_size * 0.9: if not force and os.path.exists(output_filepath) and os.path.getsize(output_filepath) >= total_size * 0.9:
print('Skipping %s: file already exists' % output_filepath) print('Skipping %s: file already exists\n' % output_filepath)
print()
return return
bar = SimpleProgressBar(total_size, len(urls)) bar = SimpleProgressBar(total_size, len(urls))
else: else:
@ -729,7 +680,7 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg
print() print()
return return
if 'av' in kwargs and kwargs['av']: if kwargs.get('av', None):
from .processor.ffmpeg import has_ffmpeg_installed from .processor.ffmpeg import has_ffmpeg_installed
if has_ffmpeg_installed(): if has_ffmpeg_installed():
from .processor.ffmpeg import ffmpeg_concat_av from .processor.ffmpeg import ffmpeg_concat_av
@ -738,7 +689,7 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg
if ret == 0: if ret == 0:
for part in parts: os.remove(part) for part in parts: os.remove(part)
elif ext in ['flv', 'f4v']: elif ext in ('flv', 'f4v'):
try: try:
from .processor.ffmpeg import has_ffmpeg_installed from .processor.ffmpeg import has_ffmpeg_installed
if has_ffmpeg_installed(): if has_ffmpeg_installed():
@ -974,10 +925,7 @@ def mime_to_container(mime):
'video/webm': 'webm', 'video/webm': 'webm',
'video/x-flv': 'flv', 'video/x-flv': 'flv',
} }
if mime in mapping: return mapping.get(mime, mime.split('/')[1])
return mapping[mime]
else:
return mime.split('/')[1]
def parse_host(host): def parse_host(host):
"""Parses host name and port number from a string. """Parses host name and port number from a string.
@ -1226,8 +1174,7 @@ def google_search(url):
durs = [r1(r'(\d+:\d+)', unescape_html(dur)) for dur in vdurs] durs = [r1(r'(\d+:\d+)', unescape_html(dur)) for dur in vdurs]
print("Google Videos search:") print("Google Videos search:")
for v in zip(videos, durs): for v in zip(videos, durs):
print("- video: %s [%s]" % (unescape_html(v[0][1]), print("- video: %s [%s]" % (unescape_html(v[0][1]), v[1] or '?'))
v[1] if v[1] else '?'))
print("# you-get %s" % log.sprint(v[0][0], log.UNDERLINE)) print("# you-get %s" % log.sprint(v[0][0], log.UNDERLINE))
print() print()
print("Best matched result:") print("Best matched result:")