mirror of
https://github.com/soimort/you-get.git
synced 2025-01-24 22:15:03 +03:00
[universal] workaround for websites that block HEAD requests
This commit is contained in:
parent
03266c030a
commit
538f1796f2
@ -338,7 +338,7 @@ def get_content(url, headers={}, decoded=True):
|
||||
if charset is not None:
|
||||
data = data.decode(charset)
|
||||
else:
|
||||
data = data.decode('utf-8')
|
||||
data = data.decode('utf-8', 'ignore')
|
||||
|
||||
return data
|
||||
|
||||
@ -395,12 +395,12 @@ def url_size(url, faker = False, headers = {}):
|
||||
def urls_size(urls, faker = False, headers = {}):
|
||||
return sum([url_size(url, faker=faker, headers=headers) for url in urls])
|
||||
|
||||
def get_head(url, headers = {}):
|
||||
def get_head(url, headers = {}, get_method = 'HEAD'):
|
||||
if headers:
|
||||
req = request.Request(url, headers = headers)
|
||||
else:
|
||||
req = request.Request(url)
|
||||
req.get_method = lambda : 'HEAD'
|
||||
req.get_method = lambda : get_method
|
||||
res = request.urlopen(req)
|
||||
return dict(res.headers)
|
||||
|
||||
|
@ -6,7 +6,10 @@ from ..common import *
|
||||
from .embed import *
|
||||
|
||||
def universal_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
content_type = get_head(url, headers=fake_headers)['Content-Type']
|
||||
try:
|
||||
content_type = get_head(url, headers=fake_headers)['Content-Type']
|
||||
except:
|
||||
content_type = get_head(url, headers=fake_headers, get_method='GET')['Content-Type']
|
||||
if content_type.startswith('text/html'):
|
||||
try:
|
||||
embed_download(url, output_dir, merge=merge, info_only=info_only)
|
||||
|
Loading…
Reference in New Issue
Block a user