diff --git a/src/you_get/common.py b/src/you_get/common.py index 0100cae7..27998cf5 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -338,7 +338,7 @@ def get_content(url, headers={}, decoded=True): if charset is not None: data = data.decode(charset) else: - data = data.decode('utf-8') + data = data.decode('utf-8', 'ignore') return data @@ -395,12 +395,12 @@ def url_size(url, faker = False, headers = {}): def urls_size(urls, faker = False, headers = {}): return sum([url_size(url, faker=faker, headers=headers) for url in urls]) -def get_head(url, headers = {}): +def get_head(url, headers = {}, get_method = 'HEAD'): if headers: req = request.Request(url, headers = headers) else: req = request.Request(url) - req.get_method = lambda : 'HEAD' + req.get_method = lambda : get_method res = request.urlopen(req) return dict(res.headers) diff --git a/src/you_get/extractors/universal.py b/src/you_get/extractors/universal.py index ebab70f8..a4262f61 100644 --- a/src/you_get/extractors/universal.py +++ b/src/you_get/extractors/universal.py @@ -6,7 +6,10 @@ from ..common import * from .embed import * def universal_download(url, output_dir='.', merge=True, info_only=False, **kwargs): - content_type = get_head(url, headers=fake_headers)['Content-Type'] + try: + content_type = get_head(url, headers=fake_headers)['Content-Type'] + except: + content_type = get_head(url, headers=fake_headers, get_method='GET')['Content-Type'] if content_type.startswith('text/html'): try: embed_download(url, output_dir, merge=merge, info_only=info_only)