[universal] call embed_download only if content_type is text/html (#1369)

This commit is contained in:
Mort Yao 2016-08-26 19:27:40 +02:00
parent 099cd3e1a4
commit 6fc2cc375e
No known key found for this signature in database
GPG Key ID: 07DA00CB78203251

View File

@ -6,20 +6,20 @@ from ..common import *
from .embed import * from .embed import *
def universal_download(url, output_dir='.', merge=True, info_only=False, **kwargs): def universal_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
try: content_type = get_head(url, headers=fake_headers)['Content-Type']
embed_download(url, output_dir, merge=merge, info_only=info_only) if content_type.startswith('text/html'):
except: pass try:
else: return embed_download(url, output_dir, merge=merge, info_only=info_only)
except: pass
else: return
domains = url.split('/')[2].split('.') domains = url.split('/')[2].split('.')
if len(domains) > 2: domains = domains[1:] if len(domains) > 2: domains = domains[1:]
site_info = '.'.join(domains) site_info = '.'.join(domains)
response = get_response(url, faker=True)
content_type = response.headers['Content-Type']
if content_type.startswith('text/html'): if content_type.startswith('text/html'):
# extract an HTML page # extract an HTML page
response = get_response(url, faker=True)
page = str(response.data) page = str(response.data)
page_title = r1(r'<title>([^<]*)', page) page_title = r1(r'<title>([^<]*)', page)