mirror of
https://github.com/soimort/you-get.git
synced 2025-01-24 05:55:02 +03:00
[universal] call embed_download only if content_type is text/html (#1369)
This commit is contained in:
parent
099cd3e1a4
commit
6fc2cc375e
@ -6,6 +6,8 @@ from ..common import *
|
|||||||
from .embed import *
|
from .embed import *
|
||||||
|
|
||||||
def universal_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
def universal_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||||
|
content_type = get_head(url, headers=fake_headers)['Content-Type']
|
||||||
|
if content_type.startswith('text/html'):
|
||||||
try:
|
try:
|
||||||
embed_download(url, output_dir, merge=merge, info_only=info_only)
|
embed_download(url, output_dir, merge=merge, info_only=info_only)
|
||||||
except: pass
|
except: pass
|
||||||
@ -15,11 +17,9 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg
|
|||||||
if len(domains) > 2: domains = domains[1:]
|
if len(domains) > 2: domains = domains[1:]
|
||||||
site_info = '.'.join(domains)
|
site_info = '.'.join(domains)
|
||||||
|
|
||||||
response = get_response(url, faker=True)
|
|
||||||
content_type = response.headers['Content-Type']
|
|
||||||
|
|
||||||
if content_type.startswith('text/html'):
|
if content_type.startswith('text/html'):
|
||||||
# extract an HTML page
|
# extract an HTML page
|
||||||
|
response = get_response(url, faker=True)
|
||||||
page = str(response.data)
|
page = str(response.data)
|
||||||
|
|
||||||
page_title = r1(r'<title>([^<]*)', page)
|
page_title = r1(r'<title>([^<]*)', page)
|
||||||
|
Loading…
Reference in New Issue
Block a user