[universal] call embed_download only if content_type is text/html (#1369)

This commit is contained in:
Mort Yao 2016-08-26 19:27:40 +02:00
parent 099cd3e1a4
commit 6fc2cc375e
No known key found for this signature in database
GPG Key ID: 07DA00CB78203251

View File

@ -6,20 +6,20 @@ from ..common import *
from .embed import *
def universal_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
try:
embed_download(url, output_dir, merge=merge, info_only=info_only)
except: pass
else: return
content_type = get_head(url, headers=fake_headers)['Content-Type']
if content_type.startswith('text/html'):
try:
embed_download(url, output_dir, merge=merge, info_only=info_only)
except: pass
else: return
domains = url.split('/')[2].split('.')
if len(domains) > 2: domains = domains[1:]
site_info = '.'.join(domains)
response = get_response(url, faker=True)
content_type = response.headers['Content-Type']
if content_type.startswith('text/html'):
# extract an HTML page
response = get_response(url, faker=True)
page = str(response.data)
page_title = r1(r'<title>([^<]*)', page)