mirror of
https://github.com/soimort/you-get.git
synced 2025-01-23 21:45:02 +03:00
[universal] call embed_download only if content_type is text/html (#1369)
This commit is contained in:
parent
099cd3e1a4
commit
6fc2cc375e
@ -6,20 +6,20 @@ from ..common import *
|
||||
from .embed import *
|
||||
|
||||
def universal_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
try:
|
||||
embed_download(url, output_dir, merge=merge, info_only=info_only)
|
||||
except: pass
|
||||
else: return
|
||||
content_type = get_head(url, headers=fake_headers)['Content-Type']
|
||||
if content_type.startswith('text/html'):
|
||||
try:
|
||||
embed_download(url, output_dir, merge=merge, info_only=info_only)
|
||||
except: pass
|
||||
else: return
|
||||
|
||||
domains = url.split('/')[2].split('.')
|
||||
if len(domains) > 2: domains = domains[1:]
|
||||
site_info = '.'.join(domains)
|
||||
|
||||
response = get_response(url, faker=True)
|
||||
content_type = response.headers['Content-Type']
|
||||
|
||||
if content_type.startswith('text/html'):
|
||||
# extract an HTML page
|
||||
response = get_response(url, faker=True)
|
||||
page = str(response.data)
|
||||
|
||||
page_title = r1(r'<title>([^<]*)', page)
|
||||
|
Loading…
Reference in New Issue
Block a user