[common.py] keep user-specified headers when faker is True

Previously, for multiple functions, when the faker argument is True and
the headers argument is non-empty, the headers argument is silently
ignored. This commit fixes that behavior by merging fake headers with
user-specified headers (but not overriding user-specified keys).

Also, get rid of mutable values as default arguments, which is an
anti-pattern and not safe for changes like the one above. As part of
this effort, when a default value {} is replaced with None, the
corresponding argument is also copied on use, which may be a bit
paranoid in some cases.
This commit is contained in:
Zhiming Wang 2017-05-25 15:30:06 -04:00
parent d0a7655727
commit 23011a011c
No known key found for this signature in database
GPG Key ID: 5B58F95EC95965D8

View File

@ -308,7 +308,7 @@ def urlopen_with_retry(*args, **kwargs):
except socket.timeout:
logging.debug('request attempt %s timeout' % str(i + 1))
def get_content(url, headers={}, decoded=True):
def get_content(url, headers=None, decoded=True):
"""Gets the content of a URL via sending a HTTP GET request.
Args:
@ -322,6 +322,7 @@ def get_content(url, headers={}, decoded=True):
logging.debug('get_content: %s' % url)
headers = {} if headers is None else headers.copy()
req = request.Request(url, headers=headers)
if cookies:
cookies.add_cookie_header(req)
@ -347,7 +348,7 @@ def get_content(url, headers={}, decoded=True):
return data
def post_content(url, headers={}, post_data={}, decoded=True):
def post_content(url, headers=None, post_data=None, decoded=True):
"""Post the content of a URL via sending a HTTP POST request.
Args:
@ -361,6 +362,8 @@ def post_content(url, headers={}, post_data={}, decoded=True):
logging.debug('post_content: %s \n post_data: %s' % (url, post_data))
headers = {} if headers is None else headers.copy()
post_data = {} if post_data is None else post_data.copy()
req = request.Request(url, headers=headers)
if cookies:
cookies.add_cookie_header(req)
@ -386,40 +389,34 @@ def post_content(url, headers={}, post_data={}, decoded=True):
return data
def url_size(url, faker = False, headers = {}):
def url_size(url, faker = False, headers = None):
headers = {} if headers is None else headers.copy()
if faker:
response = urlopen_with_retry(request.Request(url, headers=fake_headers))
elif headers:
headers.update({key: val for key, val in fake_headers.items() if key not in headers})
response = urlopen_with_retry(request.Request(url, headers=headers))
else:
response = urlopen_with_retry(url)
size = response.headers['content-length']
return int(size) if size!=None else float('inf')
def urls_size(urls, faker = False, headers = {}):
def urls_size(urls, faker = False, headers = None):
return sum([url_size(url, faker=faker, headers=headers) for url in urls])
def get_head(url, headers = {}, get_method = 'HEAD'):
def get_head(url, headers = None, get_method = 'HEAD'):
logging.debug('get_head: %s' % url)
if headers:
headers = {} if headers is None else headers.copy()
req = request.Request(url, headers=headers)
else:
req = request.Request(url)
req.get_method = lambda: get_method
res = urlopen_with_retry(req)
return dict(res.headers)
def url_info(url, faker = False, headers = {}):
def url_info(url, faker = False, headers = None):
logging.debug('url_info: %s' % url)
headers = {} if headers is None else headers.copy()
if faker:
response = urlopen_with_retry(request.Request(url, headers=fake_headers))
elif headers:
headers.update({key: val for key, val in fake_headers.items() if key not in headers})
response = urlopen_with_retry(request.Request(url, headers=headers))
else:
response = urlopen_with_retry(request.Request(url))
headers = response.headers
@ -464,23 +461,19 @@ def url_info(url, faker = False, headers = {}):
return type, ext, size
def url_locations(urls, faker = False, headers = {}):
def url_locations(urls, faker = False, headers = None):
headers = {} if headers is None else headers.copy()
if faker:
headers.update({key: val for key, val in fake_headers.items() if key not in headers})
locations = []
for url in urls:
logging.debug('url_locations: %s' % url)
if faker:
response = urlopen_with_retry(request.Request(url, headers=fake_headers))
elif headers:
response = urlopen_with_retry(request.Request(url, headers=headers))
else:
response = urlopen_with_retry(request.Request(url))
locations.append(response.url)
return locations
def url_save(url, filepath, bar, refer = None, is_part = False, faker = False, headers = {}, timeout = None, **kwargs):
#When a referer specified with param refer, the key must be 'Referer' for the hack here
def url_save(url, filepath, bar, refer = None, is_part = False, faker = False, headers = None, timeout = None, **kwargs):
#When a referer specified with param refer, the key must be 'Referer' for the hack here
if refer is not None:
headers['Referer'] = refer
file_size = url_size(url, faker = faker, headers = headers)
@ -516,12 +509,9 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False, h
open_mode = 'wb'
if received < file_size:
headers = {} if headers is None else headers.copy()
if faker:
headers = fake_headers
elif headers:
headers = headers
else:
headers = {}
headers.update({key: val for key, val in fake_headers.items() if key not in headers})
headers['Range'] = 'bytes=' + str(received) + '-'
if refer:
headers['Referer'] = refer
@ -564,7 +554,7 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False, h
os.remove(filepath) # on Windows rename could fail if destination filepath exists
os.rename(temp_filepath, filepath)
def url_save_chunked(url, filepath, bar, dyn_callback=None, chunk_size=0, ignore_range=False, refer=None, is_part=False, faker=False, headers={}):
def url_save_chunked(url, filepath, bar, dyn_callback=None, chunk_size=0, ignore_range=False, refer=None, is_part=False, faker=False, headers=None):
def dyn_update_url(received):
if callable(dyn_callback):
logging.debug('Calling callback %s for new URL from %s' % (dyn_callback.__name__, received))
@ -599,12 +589,9 @@ def url_save_chunked(url, filepath, bar, dyn_callback=None, chunk_size=0, ignore
else:
open_mode = 'wb'
headers = {} if headers is None else headers.copy()
if faker:
headers = fake_headers
elif headers:
headers = headers
else:
headers = {}
headers.update({key: val for key, val in fake_headers.items() if key not in headers})
if received:
url = dyn_update_url(received)
if not ignore_range:
@ -758,7 +745,7 @@ def get_output_filename(urls, title, ext, output_dir, merge):
merged_ext = 'ts'
return '%s.%s' % (title, merged_ext)
def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False, headers = {}, **kwargs):
def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False, headers = None, **kwargs):
assert urls
if json_output:
json_output_.download_urls(urls=urls, title=title, ext=ext, total_size=total_size, refer=refer)
@ -877,7 +864,7 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg
print()
def download_urls_chunked(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False, headers = {}, **kwargs):
def download_urls_chunked(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False, headers = None, **kwargs):
assert urls
if dry_run:
print('Real URLs:\n%s\n' % urls)
@ -956,8 +943,9 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir='.', refer=No
print()
def download_rtmp_url(url,title, ext,params={}, total_size=0, output_dir='.', refer=None, merge=True, faker=False):
def download_rtmp_url(url,title, ext,params=None, total_size=0, output_dir='.', refer=None, merge=True, faker=False):
assert url
params = {} if params is None else params.copy()
if dry_run:
print('Real URL:\n%s\n' % [url])
if params.get("-y",False): #None or unset ->False
@ -973,8 +961,9 @@ def download_rtmp_url(url,title, ext,params={}, total_size=0, output_dir='.', re
assert has_rtmpdump_installed(), "RTMPDump not installed."
download_rtmpdump_stream(url, title, ext,params, output_dir)
def download_url_ffmpeg(url,title, ext,params={}, total_size=0, output_dir='.', refer=None, merge=True, faker=False):
def download_url_ffmpeg(url,title, ext,params=None, total_size=0, output_dir='.', refer=None, merge=True, faker=False):
assert url
params = {} if params is None else params.copy()
if dry_run:
print('Real URL:\n%s\n' % [url])
if params.get("-y",False): #None or unset ->False