mirror of
https://github.com/soimort/you-get.git
synced 2025-02-11 20:52:31 +03:00
tightening up code
This commit is contained in:
parent
53cb97def5
commit
546457dea2
@ -123,10 +123,8 @@ fake_headers = {
|
|||||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:13.0) Gecko/20100101 Firefox/13.0'
|
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:13.0) Gecko/20100101 Firefox/13.0'
|
||||||
}
|
}
|
||||||
|
|
||||||
if sys.stdout.isatty():
|
|
||||||
default_encoding = sys.stdout.encoding.lower()
|
default_encoding = (sys.stdout.encoding if sys.stdout.isatty() else locale.getpreferredencoding()).lower()
|
||||||
else:
|
|
||||||
default_encoding = locale.getpreferredencoding().lower()
|
|
||||||
|
|
||||||
def maybe_print(*s):
|
def maybe_print(*s):
|
||||||
try: print(*s)
|
try: print(*s)
|
||||||
@ -167,10 +165,7 @@ def match1(text, *patterns):
|
|||||||
if len(patterns) == 1:
|
if len(patterns) == 1:
|
||||||
pattern = patterns[0]
|
pattern = patterns[0]
|
||||||
match = re.search(pattern, text)
|
match = re.search(pattern, text)
|
||||||
if match:
|
return match.group(1) if match else None
|
||||||
return match.group(1)
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
else:
|
else:
|
||||||
ret = []
|
ret = []
|
||||||
for pattern in patterns:
|
for pattern in patterns:
|
||||||
@ -190,12 +185,7 @@ def matchall(text, patterns):
|
|||||||
a list if matched. empty if not.
|
a list if matched. empty if not.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
ret = []
|
return [re.findall(pattern, text) for pattern in patterns]
|
||||||
for pattern in patterns:
|
|
||||||
match = re.findall(pattern, text)
|
|
||||||
ret += match
|
|
||||||
|
|
||||||
return ret
|
|
||||||
|
|
||||||
def launch_player(player, urls):
|
def launch_player(player, urls):
|
||||||
import subprocess
|
import subprocess
|
||||||
@ -223,20 +213,14 @@ def unicodize(text):
|
|||||||
|
|
||||||
# DEPRECATED in favor of util.legitimize()
|
# DEPRECATED in favor of util.legitimize()
|
||||||
def escape_file_path(path):
|
def escape_file_path(path):
|
||||||
path = path.replace('/', '-')
|
return path.replace('/', '-').replace('\\', '-').replace('*', '-').replace('?', '-')
|
||||||
path = path.replace('\\', '-')
|
|
||||||
path = path.replace('*', '-')
|
|
||||||
path = path.replace('?', '-')
|
|
||||||
return path
|
|
||||||
|
|
||||||
def ungzip(data):
|
def ungzip(data):
|
||||||
"""Decompresses data for Content-Encoding: gzip.
|
"""Decompresses data for Content-Encoding: gzip.
|
||||||
"""
|
"""
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
import gzip
|
import gzip
|
||||||
buffer = BytesIO(data)
|
return gzip.GzipFile(fileobj=BytesIO(data)).read()
|
||||||
f = gzip.GzipFile(fileobj=buffer)
|
|
||||||
return f.read()
|
|
||||||
|
|
||||||
def undeflate(data):
|
def undeflate(data):
|
||||||
"""Decompresses data for Content-Encoding: deflate.
|
"""Decompresses data for Content-Encoding: deflate.
|
||||||
@ -259,27 +243,23 @@ def get_response(url, faker = False):
|
|||||||
response = request.urlopen(url)
|
response = request.urlopen(url)
|
||||||
|
|
||||||
data = response.read()
|
data = response.read()
|
||||||
if response.info().get('Content-Encoding') == 'gzip':
|
content_encoding = response.info().get('Content-Encoding')
|
||||||
data = ungzip(data)
|
func = {'gzip': ungzip, 'deflate': undeflate}.get(content_encoding, None)
|
||||||
elif response.info().get('Content-Encoding') == 'deflate':
|
if func:
|
||||||
data = undeflate(data)
|
data = func(data)
|
||||||
response.data = data
|
response.data = data
|
||||||
return response
|
return response
|
||||||
|
|
||||||
# DEPRECATED in favor of get_content()
|
# DEPRECATED in favor of get_content()
|
||||||
def get_html(url, encoding = None, faker = False):
|
def get_html(url, encoding = None, faker = False):
|
||||||
content = get_response(url, faker).data
|
return str(get_response(url, faker).data, 'utf-8', 'ignore')
|
||||||
return str(content, 'utf-8', 'ignore')
|
|
||||||
|
|
||||||
# DEPRECATED in favor of get_content()
|
# DEPRECATED in favor of get_content()
|
||||||
def get_decoded_html(url, faker = False):
|
def get_decoded_html(url, faker = False):
|
||||||
response = get_response(url, faker)
|
response = get_response(url, faker)
|
||||||
data = response.data
|
data = response.data
|
||||||
charset = r1(r'charset=([\w-]+)', response.headers['content-type'])
|
charset = r1(r'charset=([\w-]+)', response.headers['content-type'])
|
||||||
if charset:
|
return data.decode(charset, 'ignore') if charset else data
|
||||||
return data.decode(charset, 'ignore')
|
|
||||||
else:
|
|
||||||
return data
|
|
||||||
|
|
||||||
def get_location(url):
|
def get_location(url):
|
||||||
response = request.urlopen(url)
|
response = request.urlopen(url)
|
||||||
@ -310,26 +290,21 @@ def get_content(url, headers={}, decoded=True):
|
|||||||
|
|
||||||
# Handle HTTP compression for gzip and deflate (zlib)
|
# Handle HTTP compression for gzip and deflate (zlib)
|
||||||
content_encoding = response.getheader('Content-Encoding')
|
content_encoding = response.getheader('Content-Encoding')
|
||||||
if content_encoding == 'gzip':
|
func = {'gzip': ungzip, 'deflate': undeflate}.get(content_encoding, None)
|
||||||
data = ungzip(data)
|
if func:
|
||||||
elif content_encoding == 'deflate':
|
data = func(data)
|
||||||
data = undeflate(data)
|
|
||||||
|
|
||||||
# Decode the response body
|
# Decode the response body
|
||||||
if decoded:
|
if decoded:
|
||||||
charset = match1(response.getheader('Content-Type'), r'charset=([\w-]+)')
|
charset = match1(response.getheader('Content-Type'), r'charset=([\w-]+)')
|
||||||
if charset is not None:
|
data = data.decode(charset or 'utf-8')
|
||||||
data = data.decode(charset)
|
|
||||||
else:
|
|
||||||
data = data.decode('utf-8')
|
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def url_size(url, faker = False, headers = {}):
|
def url_size(url, faker = False, headers = {}):
|
||||||
if faker:
|
headers = fake_headers if faker else headers
|
||||||
response = request.urlopen(request.Request(url, headers = fake_headers), None)
|
if headers:
|
||||||
elif headers:
|
response = request.urlopen(request.Request(url, headers=headers), None)
|
||||||
response = request.urlopen(request.Request(url, headers = headers), None)
|
|
||||||
else:
|
else:
|
||||||
response = request.urlopen(url)
|
response = request.urlopen(url)
|
||||||
|
|
||||||
@ -340,18 +315,14 @@ def urls_size(urls, faker = False, headers = {}):
|
|||||||
return sum([url_size(url, faker=faker, headers=headers) for url in urls])
|
return sum([url_size(url, faker=faker, headers=headers) for url in urls])
|
||||||
|
|
||||||
def get_head(url, headers = {}):
|
def get_head(url, headers = {}):
|
||||||
if headers:
|
req = request.Request(url, headers = headers) if headers else request.Request(url)
|
||||||
req = request.Request(url, headers = headers)
|
|
||||||
else:
|
|
||||||
req = request.Request(url)
|
|
||||||
req.get_method = lambda : 'HEAD'
|
req.get_method = lambda : 'HEAD'
|
||||||
res = request.urlopen(req)
|
res = request.urlopen(req)
|
||||||
return dict(res.headers)
|
return dict(res.headers)
|
||||||
|
|
||||||
def url_info(url, faker = False, headers = {}):
|
def url_info(url, faker = False, headers = {}):
|
||||||
if faker:
|
headers = fake_headers if faker else headers
|
||||||
response = request.urlopen(request.Request(url, headers = fake_headers), None)
|
if headers:
|
||||||
elif headers:
|
|
||||||
response = request.urlopen(request.Request(url, headers = headers), None)
|
response = request.urlopen(request.Request(url, headers = headers), None)
|
||||||
else:
|
else:
|
||||||
response = request.urlopen(request.Request(url))
|
response = request.urlopen(request.Request(url))
|
||||||
@ -359,7 +330,8 @@ def url_info(url, faker = False, headers = {}):
|
|||||||
headers = response.headers
|
headers = response.headers
|
||||||
|
|
||||||
type = headers['content-type']
|
type = headers['content-type']
|
||||||
if type == 'image/jpg; charset=UTF-8' or type == 'image/jpg' : type = 'audio/mpeg' #fix for netease
|
if type in ('image/jpg; charset=UTF-8', 'image/jpg'):
|
||||||
|
type = 'audio/mpeg' #fix for netease
|
||||||
mapping = {
|
mapping = {
|
||||||
'video/3gpp': '3gp',
|
'video/3gpp': '3gp',
|
||||||
'video/f4v': 'flv',
|
'video/f4v': 'flv',
|
||||||
@ -376,9 +348,8 @@ def url_info(url, faker = False, headers = {}):
|
|||||||
'image/gif': 'gif',
|
'image/gif': 'gif',
|
||||||
'application/pdf': 'pdf',
|
'application/pdf': 'pdf',
|
||||||
}
|
}
|
||||||
if type in mapping:
|
ext = mapping.get(type, None)
|
||||||
ext = mapping[type]
|
if not ext:
|
||||||
else:
|
|
||||||
type = None
|
type = None
|
||||||
if headers['content-disposition']:
|
if headers['content-disposition']:
|
||||||
try:
|
try:
|
||||||
@ -400,15 +371,13 @@ def url_info(url, faker = False, headers = {}):
|
|||||||
return type, ext, size
|
return type, ext, size
|
||||||
|
|
||||||
def url_locations(urls, faker = False, headers = {}):
|
def url_locations(urls, faker = False, headers = {}):
|
||||||
|
headers = fake_headers if faker else headers
|
||||||
locations = []
|
locations = []
|
||||||
for url in urls:
|
for url in urls:
|
||||||
if faker:
|
if headers:
|
||||||
response = request.urlopen(request.Request(url, headers = fake_headers), None)
|
|
||||||
elif headers:
|
|
||||||
response = request.urlopen(request.Request(url, headers = headers), None)
|
response = request.urlopen(request.Request(url, headers = headers), None)
|
||||||
else:
|
else:
|
||||||
response = request.urlopen(request.Request(url))
|
response = request.urlopen(request.Request(url))
|
||||||
|
|
||||||
locations.append(response.url)
|
locations.append(response.url)
|
||||||
return locations
|
return locations
|
||||||
|
|
||||||
@ -446,12 +415,7 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False, h
|
|||||||
open_mode = 'wb'
|
open_mode = 'wb'
|
||||||
|
|
||||||
if received < file_size:
|
if received < file_size:
|
||||||
if faker:
|
headers = fake_headers if faker else (headers or {})
|
||||||
headers = fake_headers
|
|
||||||
elif headers:
|
|
||||||
headers = headers
|
|
||||||
else:
|
|
||||||
headers = {}
|
|
||||||
if received:
|
if received:
|
||||||
headers['Range'] = 'bytes=' + str(received) + '-'
|
headers['Range'] = 'bytes=' + str(received) + '-'
|
||||||
if refer:
|
if refer:
|
||||||
@ -523,12 +487,7 @@ def url_save_chunked(url, filepath, bar, refer = None, is_part = False, faker =
|
|||||||
else:
|
else:
|
||||||
open_mode = 'wb'
|
open_mode = 'wb'
|
||||||
|
|
||||||
if faker:
|
headers = fake_headers if faker else (headers or {})
|
||||||
headers = fake_headers
|
|
||||||
elif headers:
|
|
||||||
headers = headers
|
|
||||||
else:
|
|
||||||
headers = {}
|
|
||||||
if received:
|
if received:
|
||||||
headers['Range'] = 'bytes=' + str(received) + '-'
|
headers['Range'] = 'bytes=' + str(received) + '-'
|
||||||
if refer:
|
if refer:
|
||||||
@ -576,8 +535,7 @@ class SimpleProgressBar:
|
|||||||
self.displayed = True
|
self.displayed = True
|
||||||
bar_size = self.bar_size
|
bar_size = self.bar_size
|
||||||
percent = round(self.received * 100 / self.total_size, 1)
|
percent = round(self.received * 100 / self.total_size, 1)
|
||||||
if percent >= 100:
|
percent = max(percent, 100)
|
||||||
percent = 100
|
|
||||||
dots = bar_size * int(percent) // 100
|
dots = bar_size * int(percent) // 100
|
||||||
plus = int(percent) - dots // bar_size * 100
|
plus = int(percent) - dots // bar_size * 100
|
||||||
if plus > 0.8:
|
if plus > 0.8:
|
||||||
@ -659,17 +617,11 @@ def get_output_filename(urls, title, ext, output_dir, merge):
|
|||||||
if (len(urls) > 1) and merge:
|
if (len(urls) > 1) and merge:
|
||||||
from .processor.ffmpeg import has_ffmpeg_installed
|
from .processor.ffmpeg import has_ffmpeg_installed
|
||||||
if ext in ['flv', 'f4v']:
|
if ext in ['flv', 'f4v']:
|
||||||
if has_ffmpeg_installed():
|
merged_ext = 'mp4' if has_ffmpeg_installed() else 'flv'
|
||||||
merged_ext = 'mp4'
|
|
||||||
else:
|
|
||||||
merged_ext = 'flv'
|
|
||||||
elif ext == 'mp4':
|
elif ext == 'mp4':
|
||||||
merged_ext = 'mp4'
|
merged_ext = 'mp4'
|
||||||
elif ext == 'ts':
|
elif ext == 'ts':
|
||||||
if has_ffmpeg_installed():
|
merged_ext = 'mkv' if has_ffmpeg_installed() else 'ts'
|
||||||
merged_ext = 'mkv'
|
|
||||||
else:
|
|
||||||
merged_ext = 'ts'
|
|
||||||
return '%s.%s' % (title, merged_ext)
|
return '%s.%s' % (title, merged_ext)
|
||||||
|
|
||||||
def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False, headers = {}, **kwargs):
|
def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False, headers = {}, **kwargs):
|
||||||
@ -699,8 +651,7 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg
|
|||||||
|
|
||||||
if total_size:
|
if total_size:
|
||||||
if not force and os.path.exists(output_filepath) and os.path.getsize(output_filepath) >= total_size * 0.9:
|
if not force and os.path.exists(output_filepath) and os.path.getsize(output_filepath) >= total_size * 0.9:
|
||||||
print('Skipping %s: file already exists' % output_filepath)
|
print('Skipping %s: file already exists\n' % output_filepath)
|
||||||
print()
|
|
||||||
return
|
return
|
||||||
bar = SimpleProgressBar(total_size, len(urls))
|
bar = SimpleProgressBar(total_size, len(urls))
|
||||||
else:
|
else:
|
||||||
@ -729,7 +680,7 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg
|
|||||||
print()
|
print()
|
||||||
return
|
return
|
||||||
|
|
||||||
if 'av' in kwargs and kwargs['av']:
|
if kwargs.get('av', None):
|
||||||
from .processor.ffmpeg import has_ffmpeg_installed
|
from .processor.ffmpeg import has_ffmpeg_installed
|
||||||
if has_ffmpeg_installed():
|
if has_ffmpeg_installed():
|
||||||
from .processor.ffmpeg import ffmpeg_concat_av
|
from .processor.ffmpeg import ffmpeg_concat_av
|
||||||
@ -738,7 +689,7 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg
|
|||||||
if ret == 0:
|
if ret == 0:
|
||||||
for part in parts: os.remove(part)
|
for part in parts: os.remove(part)
|
||||||
|
|
||||||
elif ext in ['flv', 'f4v']:
|
elif ext in ('flv', 'f4v'):
|
||||||
try:
|
try:
|
||||||
from .processor.ffmpeg import has_ffmpeg_installed
|
from .processor.ffmpeg import has_ffmpeg_installed
|
||||||
if has_ffmpeg_installed():
|
if has_ffmpeg_installed():
|
||||||
@ -974,10 +925,7 @@ def mime_to_container(mime):
|
|||||||
'video/webm': 'webm',
|
'video/webm': 'webm',
|
||||||
'video/x-flv': 'flv',
|
'video/x-flv': 'flv',
|
||||||
}
|
}
|
||||||
if mime in mapping:
|
return mapping.get(mime, mime.split('/')[1])
|
||||||
return mapping[mime]
|
|
||||||
else:
|
|
||||||
return mime.split('/')[1]
|
|
||||||
|
|
||||||
def parse_host(host):
|
def parse_host(host):
|
||||||
"""Parses host name and port number from a string.
|
"""Parses host name and port number from a string.
|
||||||
@ -1226,8 +1174,7 @@ def google_search(url):
|
|||||||
durs = [r1(r'(\d+:\d+)', unescape_html(dur)) for dur in vdurs]
|
durs = [r1(r'(\d+:\d+)', unescape_html(dur)) for dur in vdurs]
|
||||||
print("Google Videos search:")
|
print("Google Videos search:")
|
||||||
for v in zip(videos, durs):
|
for v in zip(videos, durs):
|
||||||
print("- video: %s [%s]" % (unescape_html(v[0][1]),
|
print("- video: %s [%s]" % (unescape_html(v[0][1]), v[1] or '?'))
|
||||||
v[1] if v[1] else '?'))
|
|
||||||
print("# you-get %s" % log.sprint(v[0][0], log.UNDERLINE))
|
print("# you-get %s" % log.sprint(v[0][0], log.UNDERLINE))
|
||||||
print()
|
print()
|
||||||
print("Best matched result:")
|
print("Best matched result:")
|
||||||
|
Loading…
Reference in New Issue
Block a user