Support load cookies.txt

This commit is contained in:
HU Pili 2014-03-28 12:49:34 +08:00
parent ecb7e84e6b
commit 95cd795241

View File

@ -54,16 +54,16 @@ def r1_of(patterns, text):
def match1(text, *patterns):
"""Scans through a string for substrings matched some patterns (first-subgroups only).
Args:
text: A string to be scanned.
patterns: Arbitrary number of regex patterns.
Returns:
When only one pattern is given, returns a string (None if no match found).
When more than one pattern are given, returns a list of strings ([] if no match found).
"""
if len(patterns) == 1:
pattern = patterns[0]
match = re.search(pattern, text)
@ -86,15 +86,15 @@ def launch_player(player, urls):
def parse_query_param(url, param):
"""Parses the query string of a URL and returns the value of a parameter.
Args:
url: A URL.
param: A string representing the name of the parameter.
Returns:
The value of the parameter.
"""
try:
return parse.parse_qs(parse.urlparse(url).query)[param][0]
except:
@ -172,7 +172,7 @@ def get_response(url, faker = False):
response = request.urlopen(request.Request(url, headers = fake_headers), None)
else:
response = request.urlopen(url)
data = response.read()
if response.info().get('Content-Encoding') == 'gzip':
data = ungzip(data)
@ -198,26 +198,30 @@ def get_decoded_html(url, faker = False):
def get_content(url, headers={}, decoded=True):
"""Gets the content of a URL via sending a HTTP GET request.
Args:
url: A URL.
headers: Request headers used by the client.
decoded: Whether decode the response body using UTF-8 or the charset specified in Content-Type.
Returns:
The content as a string.
"""
response = request.urlopen(request.Request(url, headers=headers))
req = request.Request(url, headers=headers)
if cookies_txt:
cookies_txt.add_cookie_header(req)
req.headers.update(req.unredirected_hdrs)
response = request.urlopen(req)
data = response.read()
# Handle HTTP compression for gzip and deflate (zlib)
content_encoding = response.getheader('Content-Encoding')
if content_encoding == 'gzip':
data = ungzip(data)
elif content_encoding == 'deflate':
data = undeflate(data)
# Decode the response body
if decoded:
charset = match1(response.getheader('Content-Type'), r'charset=([\w-]+)')
@ -225,7 +229,7 @@ def get_content(url, headers={}, decoded=True):
data = data.decode(charset)
else:
data = data.decode('utf-8')
return data
def url_size(url, faker = False):
@ -233,7 +237,7 @@ def url_size(url, faker = False):
response = request.urlopen(request.Request(url, headers = fake_headers), None)
else:
response = request.urlopen(url)
size = int(response.headers['content-length'])
return size
@ -245,9 +249,9 @@ def url_info(url, faker = False):
response = request.urlopen(request.Request(url, headers = fake_headers), None)
else:
response = request.urlopen(request.Request(url))
headers = response.headers
type = headers['content-type']
mapping = {
'video/3gpp': '3gp',
@ -275,12 +279,12 @@ def url_info(url, faker = False):
ext = None
else:
ext = None
if headers['transfer-encoding'] != 'chunked':
size = int(headers['content-length'])
else:
size = None
return type, ext, size
def url_locations(urls, faker = False):
@ -290,13 +294,13 @@ def url_locations(urls, faker = False):
response = request.urlopen(request.Request(url, headers = fake_headers), None)
else:
response = request.urlopen(request.Request(url))
locations.append(response.url)
return locations
def url_save(url, filepath, bar, refer = None, is_part = False, faker = False):
file_size = url_size(url, faker = faker)
if os.path.exists(filepath):
if not force and file_size == os.path.getsize(filepath):
if not is_part:
@ -314,19 +318,19 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False):
print('Overwriting %s' % tr(os.path.basename(filepath)), '...')
elif not os.path.exists(os.path.dirname(filepath)):
os.mkdir(os.path.dirname(filepath))
temp_filepath = filepath + '.download'
received = 0
if not force:
open_mode = 'ab'
if os.path.exists(temp_filepath):
received += os.path.getsize(temp_filepath)
if bar:
bar.update_received(os.path.getsize(temp_filepath))
else:
open_mode = 'wb'
if received < file_size:
if faker:
headers = fake_headers
@ -336,7 +340,7 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False):
headers['Range'] = 'bytes=' + str(received) + '-'
if refer:
headers['Referer'] = refer
response = request.urlopen(request.Request(url, headers = headers), None)
try:
range_start = int(response.headers['content-range'][6:].split('/')[0].split('-')[0])
@ -344,13 +348,13 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False):
range_length = end_length - range_start
except:
range_length = int(response.headers['content-length'])
if file_size != received + range_length:
received = 0
if bar:
bar.received = 0
open_mode = 'wb'
with open(temp_filepath, open_mode) as output:
while True:
buffer = response.read(1024 * 256)
@ -364,9 +368,9 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False):
received += len(buffer)
if bar:
bar.update_received(len(buffer))
assert received == os.path.getsize(temp_filepath), '%s == %s == %s' % (received, os.path.getsize(temp_filepath), temp_filepath)
if os.access(filepath, os.W_OK):
os.remove(filepath) # on Windows rename could fail if destination filepath exists
os.rename(temp_filepath, filepath)
@ -389,19 +393,19 @@ def url_save_chunked(url, filepath, bar, refer = None, is_part = False, faker =
print('Overwriting %s' % tr(os.path.basename(filepath)), '...')
elif not os.path.exists(os.path.dirname(filepath)):
os.mkdir(os.path.dirname(filepath))
temp_filepath = filepath + '.download'
received = 0
if not force:
open_mode = 'ab'
if os.path.exists(temp_filepath):
received += os.path.getsize(temp_filepath)
if bar:
bar.update_received(os.path.getsize(temp_filepath))
else:
open_mode = 'wb'
if faker:
headers = fake_headers
else:
@ -410,9 +414,9 @@ def url_save_chunked(url, filepath, bar, refer = None, is_part = False, faker =
headers['Range'] = 'bytes=' + str(received) + '-'
if refer:
headers['Referer'] = refer
response = request.urlopen(request.Request(url, headers = headers), None)
with open(temp_filepath, open_mode) as output:
while True:
buffer = response.read(1024 * 256)
@ -422,9 +426,9 @@ def url_save_chunked(url, filepath, bar, refer = None, is_part = False, faker =
received += len(buffer)
if bar:
bar.update_received(len(buffer))
assert received == os.path.getsize(temp_filepath), '%s == %s == %s' % (received, os.path.getsize(temp_filepath))
if os.access(filepath, os.W_OK):
os.remove(filepath) # on Windows rename could fail if destination filepath exists
os.rename(temp_filepath, filepath)
@ -436,7 +440,7 @@ class SimpleProgressBar:
self.total_pieces = total_pieces
self.current_piece = 1
self.received = 0
def update(self):
self.displayed = True
bar_size = 40
@ -455,14 +459,14 @@ class SimpleProgressBar:
bar = '{0:>5}% ({1:>5}/{2:<5}MB) [{3:<40}] {4}/{5}'.format(percent, round(self.received / 1048576, 1), round(self.total_size / 1048576, 1), bar, self.current_piece, self.total_pieces)
sys.stdout.write('\r' + bar)
sys.stdout.flush()
def update_received(self, n):
self.received += n
self.update()
def update_piece(self, n):
self.current_piece = n
def done(self):
if self.displayed:
print()
@ -475,20 +479,20 @@ class PiecesProgressBar:
self.total_pieces = total_pieces
self.current_piece = 1
self.received = 0
def update(self):
self.displayed = True
bar = '{0:>5}%[{1:<40}] {2}/{3}'.format('?', '?' * 40, self.current_piece, self.total_pieces)
sys.stdout.write('\r' + bar)
sys.stdout.flush()
def update_received(self, n):
self.received += n
self.update()
def update_piece(self, n):
self.current_piece = n
def done(self):
if self.displayed:
print()
@ -509,7 +513,7 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None,
if dry_run:
print('Real URLs:\n', urls, '\n')
return
if player:
launch_player(player, urls)
return
@ -522,9 +526,9 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None,
import sys
traceback.print_exc(file = sys.stdout)
pass
title = legitimize(title)
filename = '%s.%s' % (title, ext)
filepath = os.path.join(output_dir, filename)
if total_size:
@ -535,7 +539,7 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None,
bar = SimpleProgressBar(total_size, len(urls))
else:
bar = PiecesProgressBar(total_size, len(urls))
if len(urls) == 1:
url = urls[0]
print('Downloading %s ...' % tr(filename))
@ -552,7 +556,7 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None,
bar.update_piece(i + 1)
url_save(url, filepath, bar, refer = refer, is_part = True, faker = faker)
bar.done()
if not merge:
print()
return
@ -570,7 +574,7 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None,
else:
for part in parts:
os.remove(part)
elif ext == 'mp4':
try:
from .processor.ffmpeg import has_ffmpeg_installed
@ -585,10 +589,10 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None,
else:
for part in parts:
os.remove(part)
else:
print("Can't merge %s files" % ext)
print()
def download_urls_chunked(urls, title, ext, total_size, output_dir = '.', refer = None, merge = True, faker = False):
@ -596,15 +600,15 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir = '.', refer
if dry_run:
print('Real URLs:\n', urls, '\n')
return
if player:
launch_player(player, urls)
return
assert ext in ('ts')
title = legitimize(title)
filename = '%s.%s' % (title, 'ts')
filepath = os.path.join(output_dir, filename)
if total_size:
@ -615,7 +619,7 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir = '.', refer
bar = SimpleProgressBar(total_size, len(urls))
else:
bar = PiecesProgressBar(total_size, len(urls))
if len(urls) == 1:
parts = []
url = urls[0]
@ -624,7 +628,7 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir = '.', refer
parts.append(filepath)
url_save_chunked(url, filepath, bar, refer = refer, faker = faker)
bar.done()
if not merge:
print()
return
@ -652,7 +656,7 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir = '.', refer
bar.update_piece(i + 1)
url_save_chunked(url, filepath, bar, refer = refer, is_part = True, faker = faker)
bar.done()
if not merge:
print()
return
@ -669,7 +673,7 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir = '.', refer
print('No ffmpeg is found. Merging aborted.')
else:
print("Can't merge %s files" % ext)
print()
def playlist_not_supported(name):
@ -698,7 +702,7 @@ def print_info(site_info, title, type, size):
type = 'video/MP2T'
elif type in ['webm']:
type = 'video/webm'
if type in ['video/3gpp']:
type_info = "3GPP multimedia file (%s)" % type
elif type in ['video/x-flv', 'video/f4v']:
@ -725,7 +729,7 @@ def print_info(site_info, title, type, size):
type_info = "MP3 (%s)" % type
else:
type_info = "Unknown type (%s)" % type
print("Video Site:", site_info)
print("Title: ", tr(title))
print("Type: ", type_info)
@ -777,7 +781,7 @@ def download_main(download, download_playlist, urls, playlist, output_dir, merge
url = url[8:]
if not url.startswith('http://'):
url = 'http://' + url
if playlist:
download_playlist(url, output_dir = output_dir, merge = merge, info_only = info_only)
else:
@ -804,6 +808,7 @@ def script_main(script_name, download, download_playlist = None):
-f | --force Force overwriting existed files.
-i | --info Display the information of videos without downloading.
-u | --url Display the real URLs of videos without downloading.
-c | --cookies Load NetScape's cookies.txt file.
-n | --no-merge Don't merge video parts.
-o | --output-dir <PATH> Set the output directory for downloaded videos.
-p | --player <PLAYER [options]> Directly play the video with PLAYER like vlc/smplayer.
@ -813,26 +818,28 @@ def script_main(script_name, download, download_playlist = None):
--sogou-proxy <HOST:PORT> Run a standalone Sogou proxy server.
--debug Show traceback on KeyboardInterrupt.
'''
short_opts = 'VhfiunSo:p:x:'
opts = ['version', 'help', 'force', 'info', 'url', 'no-merge', 'no-proxy', 'debug', 'sogou', 'output-dir=', 'player=', 'http-proxy=', 'sogou-proxy=', 'sogou-env=']
short_opts = 'Vhfiuc:nSo:p:x:'
opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-merge', 'no-proxy', 'debug', 'sogou', 'output-dir=', 'player=', 'http-proxy=', 'sogou-proxy=', 'sogou-env=']
if download_playlist:
short_opts = 'l' + short_opts
opts = ['playlist'] + opts
try:
opts, args = getopt.getopt(sys.argv[1:], short_opts, opts)
except getopt.GetoptError as err:
log.e(err)
log.e("try 'you-get --help' for more options")
sys.exit(2)
global force
global dry_run
global player
global sogou_proxy
global sogou_env
global cookies_txt
cookies_txt = None
info_only = False
playlist = False
merge = True
@ -853,6 +860,10 @@ def script_main(script_name, download, download_playlist = None):
info_only = True
elif o in ('-u', '--url'):
dry_run = True
elif o in ('-c', '--cookies'):
from http import cookiejar
cookies_txt = cookiejar.MozillaCookieJar(a)
cookies_txt.load()
elif o in ('-l', '--playlist'):
playlist = True
elif o in ('-n', '--no-merge'):
@ -892,7 +903,7 @@ def script_main(script_name, download, download_playlist = None):
else:
print(help)
sys.exit()
set_http_proxy(proxy)
try: