mirror of
https://github.com/soimort/you-get.git
synced 2025-01-23 21:45:02 +03:00
Support load cookies.txt
This commit is contained in:
parent
ecb7e84e6b
commit
95cd795241
@ -54,16 +54,16 @@ def r1_of(patterns, text):
|
||||
|
||||
def match1(text, *patterns):
|
||||
"""Scans through a string for substrings matched some patterns (first-subgroups only).
|
||||
|
||||
|
||||
Args:
|
||||
text: A string to be scanned.
|
||||
patterns: Arbitrary number of regex patterns.
|
||||
|
||||
|
||||
Returns:
|
||||
When only one pattern is given, returns a string (None if no match found).
|
||||
When more than one pattern are given, returns a list of strings ([] if no match found).
|
||||
"""
|
||||
|
||||
|
||||
if len(patterns) == 1:
|
||||
pattern = patterns[0]
|
||||
match = re.search(pattern, text)
|
||||
@ -86,15 +86,15 @@ def launch_player(player, urls):
|
||||
|
||||
def parse_query_param(url, param):
|
||||
"""Parses the query string of a URL and returns the value of a parameter.
|
||||
|
||||
|
||||
Args:
|
||||
url: A URL.
|
||||
param: A string representing the name of the parameter.
|
||||
|
||||
|
||||
Returns:
|
||||
The value of the parameter.
|
||||
"""
|
||||
|
||||
|
||||
try:
|
||||
return parse.parse_qs(parse.urlparse(url).query)[param][0]
|
||||
except:
|
||||
@ -172,7 +172,7 @@ def get_response(url, faker = False):
|
||||
response = request.urlopen(request.Request(url, headers = fake_headers), None)
|
||||
else:
|
||||
response = request.urlopen(url)
|
||||
|
||||
|
||||
data = response.read()
|
||||
if response.info().get('Content-Encoding') == 'gzip':
|
||||
data = ungzip(data)
|
||||
@ -198,26 +198,30 @@ def get_decoded_html(url, faker = False):
|
||||
|
||||
def get_content(url, headers={}, decoded=True):
|
||||
"""Gets the content of a URL via sending a HTTP GET request.
|
||||
|
||||
|
||||
Args:
|
||||
url: A URL.
|
||||
headers: Request headers used by the client.
|
||||
decoded: Whether decode the response body using UTF-8 or the charset specified in Content-Type.
|
||||
|
||||
|
||||
Returns:
|
||||
The content as a string.
|
||||
"""
|
||||
|
||||
response = request.urlopen(request.Request(url, headers=headers))
|
||||
|
||||
req = request.Request(url, headers=headers)
|
||||
if cookies_txt:
|
||||
cookies_txt.add_cookie_header(req)
|
||||
req.headers.update(req.unredirected_hdrs)
|
||||
response = request.urlopen(req)
|
||||
data = response.read()
|
||||
|
||||
|
||||
# Handle HTTP compression for gzip and deflate (zlib)
|
||||
content_encoding = response.getheader('Content-Encoding')
|
||||
if content_encoding == 'gzip':
|
||||
data = ungzip(data)
|
||||
elif content_encoding == 'deflate':
|
||||
data = undeflate(data)
|
||||
|
||||
|
||||
# Decode the response body
|
||||
if decoded:
|
||||
charset = match1(response.getheader('Content-Type'), r'charset=([\w-]+)')
|
||||
@ -225,7 +229,7 @@ def get_content(url, headers={}, decoded=True):
|
||||
data = data.decode(charset)
|
||||
else:
|
||||
data = data.decode('utf-8')
|
||||
|
||||
|
||||
return data
|
||||
|
||||
def url_size(url, faker = False):
|
||||
@ -233,7 +237,7 @@ def url_size(url, faker = False):
|
||||
response = request.urlopen(request.Request(url, headers = fake_headers), None)
|
||||
else:
|
||||
response = request.urlopen(url)
|
||||
|
||||
|
||||
size = int(response.headers['content-length'])
|
||||
return size
|
||||
|
||||
@ -245,9 +249,9 @@ def url_info(url, faker = False):
|
||||
response = request.urlopen(request.Request(url, headers = fake_headers), None)
|
||||
else:
|
||||
response = request.urlopen(request.Request(url))
|
||||
|
||||
|
||||
headers = response.headers
|
||||
|
||||
|
||||
type = headers['content-type']
|
||||
mapping = {
|
||||
'video/3gpp': '3gp',
|
||||
@ -275,12 +279,12 @@ def url_info(url, faker = False):
|
||||
ext = None
|
||||
else:
|
||||
ext = None
|
||||
|
||||
|
||||
if headers['transfer-encoding'] != 'chunked':
|
||||
size = int(headers['content-length'])
|
||||
else:
|
||||
size = None
|
||||
|
||||
|
||||
return type, ext, size
|
||||
|
||||
def url_locations(urls, faker = False):
|
||||
@ -290,13 +294,13 @@ def url_locations(urls, faker = False):
|
||||
response = request.urlopen(request.Request(url, headers = fake_headers), None)
|
||||
else:
|
||||
response = request.urlopen(request.Request(url))
|
||||
|
||||
|
||||
locations.append(response.url)
|
||||
return locations
|
||||
|
||||
def url_save(url, filepath, bar, refer = None, is_part = False, faker = False):
|
||||
file_size = url_size(url, faker = faker)
|
||||
|
||||
|
||||
if os.path.exists(filepath):
|
||||
if not force and file_size == os.path.getsize(filepath):
|
||||
if not is_part:
|
||||
@ -314,19 +318,19 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False):
|
||||
print('Overwriting %s' % tr(os.path.basename(filepath)), '...')
|
||||
elif not os.path.exists(os.path.dirname(filepath)):
|
||||
os.mkdir(os.path.dirname(filepath))
|
||||
|
||||
|
||||
temp_filepath = filepath + '.download'
|
||||
received = 0
|
||||
if not force:
|
||||
open_mode = 'ab'
|
||||
|
||||
|
||||
if os.path.exists(temp_filepath):
|
||||
received += os.path.getsize(temp_filepath)
|
||||
if bar:
|
||||
bar.update_received(os.path.getsize(temp_filepath))
|
||||
else:
|
||||
open_mode = 'wb'
|
||||
|
||||
|
||||
if received < file_size:
|
||||
if faker:
|
||||
headers = fake_headers
|
||||
@ -336,7 +340,7 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False):
|
||||
headers['Range'] = 'bytes=' + str(received) + '-'
|
||||
if refer:
|
||||
headers['Referer'] = refer
|
||||
|
||||
|
||||
response = request.urlopen(request.Request(url, headers = headers), None)
|
||||
try:
|
||||
range_start = int(response.headers['content-range'][6:].split('/')[0].split('-')[0])
|
||||
@ -344,13 +348,13 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False):
|
||||
range_length = end_length - range_start
|
||||
except:
|
||||
range_length = int(response.headers['content-length'])
|
||||
|
||||
|
||||
if file_size != received + range_length:
|
||||
received = 0
|
||||
if bar:
|
||||
bar.received = 0
|
||||
open_mode = 'wb'
|
||||
|
||||
|
||||
with open(temp_filepath, open_mode) as output:
|
||||
while True:
|
||||
buffer = response.read(1024 * 256)
|
||||
@ -364,9 +368,9 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False):
|
||||
received += len(buffer)
|
||||
if bar:
|
||||
bar.update_received(len(buffer))
|
||||
|
||||
|
||||
assert received == os.path.getsize(temp_filepath), '%s == %s == %s' % (received, os.path.getsize(temp_filepath), temp_filepath)
|
||||
|
||||
|
||||
if os.access(filepath, os.W_OK):
|
||||
os.remove(filepath) # on Windows rename could fail if destination filepath exists
|
||||
os.rename(temp_filepath, filepath)
|
||||
@ -389,19 +393,19 @@ def url_save_chunked(url, filepath, bar, refer = None, is_part = False, faker =
|
||||
print('Overwriting %s' % tr(os.path.basename(filepath)), '...')
|
||||
elif not os.path.exists(os.path.dirname(filepath)):
|
||||
os.mkdir(os.path.dirname(filepath))
|
||||
|
||||
|
||||
temp_filepath = filepath + '.download'
|
||||
received = 0
|
||||
if not force:
|
||||
open_mode = 'ab'
|
||||
|
||||
|
||||
if os.path.exists(temp_filepath):
|
||||
received += os.path.getsize(temp_filepath)
|
||||
if bar:
|
||||
bar.update_received(os.path.getsize(temp_filepath))
|
||||
else:
|
||||
open_mode = 'wb'
|
||||
|
||||
|
||||
if faker:
|
||||
headers = fake_headers
|
||||
else:
|
||||
@ -410,9 +414,9 @@ def url_save_chunked(url, filepath, bar, refer = None, is_part = False, faker =
|
||||
headers['Range'] = 'bytes=' + str(received) + '-'
|
||||
if refer:
|
||||
headers['Referer'] = refer
|
||||
|
||||
|
||||
response = request.urlopen(request.Request(url, headers = headers), None)
|
||||
|
||||
|
||||
with open(temp_filepath, open_mode) as output:
|
||||
while True:
|
||||
buffer = response.read(1024 * 256)
|
||||
@ -422,9 +426,9 @@ def url_save_chunked(url, filepath, bar, refer = None, is_part = False, faker =
|
||||
received += len(buffer)
|
||||
if bar:
|
||||
bar.update_received(len(buffer))
|
||||
|
||||
|
||||
assert received == os.path.getsize(temp_filepath), '%s == %s == %s' % (received, os.path.getsize(temp_filepath))
|
||||
|
||||
|
||||
if os.access(filepath, os.W_OK):
|
||||
os.remove(filepath) # on Windows rename could fail if destination filepath exists
|
||||
os.rename(temp_filepath, filepath)
|
||||
@ -436,7 +440,7 @@ class SimpleProgressBar:
|
||||
self.total_pieces = total_pieces
|
||||
self.current_piece = 1
|
||||
self.received = 0
|
||||
|
||||
|
||||
def update(self):
|
||||
self.displayed = True
|
||||
bar_size = 40
|
||||
@ -455,14 +459,14 @@ class SimpleProgressBar:
|
||||
bar = '{0:>5}% ({1:>5}/{2:<5}MB) [{3:<40}] {4}/{5}'.format(percent, round(self.received / 1048576, 1), round(self.total_size / 1048576, 1), bar, self.current_piece, self.total_pieces)
|
||||
sys.stdout.write('\r' + bar)
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
def update_received(self, n):
|
||||
self.received += n
|
||||
self.update()
|
||||
|
||||
|
||||
def update_piece(self, n):
|
||||
self.current_piece = n
|
||||
|
||||
|
||||
def done(self):
|
||||
if self.displayed:
|
||||
print()
|
||||
@ -475,20 +479,20 @@ class PiecesProgressBar:
|
||||
self.total_pieces = total_pieces
|
||||
self.current_piece = 1
|
||||
self.received = 0
|
||||
|
||||
|
||||
def update(self):
|
||||
self.displayed = True
|
||||
bar = '{0:>5}%[{1:<40}] {2}/{3}'.format('?', '?' * 40, self.current_piece, self.total_pieces)
|
||||
sys.stdout.write('\r' + bar)
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
def update_received(self, n):
|
||||
self.received += n
|
||||
self.update()
|
||||
|
||||
|
||||
def update_piece(self, n):
|
||||
self.current_piece = n
|
||||
|
||||
|
||||
def done(self):
|
||||
if self.displayed:
|
||||
print()
|
||||
@ -509,7 +513,7 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None,
|
||||
if dry_run:
|
||||
print('Real URLs:\n', urls, '\n')
|
||||
return
|
||||
|
||||
|
||||
if player:
|
||||
launch_player(player, urls)
|
||||
return
|
||||
@ -522,9 +526,9 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None,
|
||||
import sys
|
||||
traceback.print_exc(file = sys.stdout)
|
||||
pass
|
||||
|
||||
|
||||
title = legitimize(title)
|
||||
|
||||
|
||||
filename = '%s.%s' % (title, ext)
|
||||
filepath = os.path.join(output_dir, filename)
|
||||
if total_size:
|
||||
@ -535,7 +539,7 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None,
|
||||
bar = SimpleProgressBar(total_size, len(urls))
|
||||
else:
|
||||
bar = PiecesProgressBar(total_size, len(urls))
|
||||
|
||||
|
||||
if len(urls) == 1:
|
||||
url = urls[0]
|
||||
print('Downloading %s ...' % tr(filename))
|
||||
@ -552,7 +556,7 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None,
|
||||
bar.update_piece(i + 1)
|
||||
url_save(url, filepath, bar, refer = refer, is_part = True, faker = faker)
|
||||
bar.done()
|
||||
|
||||
|
||||
if not merge:
|
||||
print()
|
||||
return
|
||||
@ -570,7 +574,7 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None,
|
||||
else:
|
||||
for part in parts:
|
||||
os.remove(part)
|
||||
|
||||
|
||||
elif ext == 'mp4':
|
||||
try:
|
||||
from .processor.ffmpeg import has_ffmpeg_installed
|
||||
@ -585,10 +589,10 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None,
|
||||
else:
|
||||
for part in parts:
|
||||
os.remove(part)
|
||||
|
||||
|
||||
else:
|
||||
print("Can't merge %s files" % ext)
|
||||
|
||||
|
||||
print()
|
||||
|
||||
def download_urls_chunked(urls, title, ext, total_size, output_dir = '.', refer = None, merge = True, faker = False):
|
||||
@ -596,15 +600,15 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir = '.', refer
|
||||
if dry_run:
|
||||
print('Real URLs:\n', urls, '\n')
|
||||
return
|
||||
|
||||
|
||||
if player:
|
||||
launch_player(player, urls)
|
||||
return
|
||||
|
||||
assert ext in ('ts')
|
||||
|
||||
|
||||
title = legitimize(title)
|
||||
|
||||
|
||||
filename = '%s.%s' % (title, 'ts')
|
||||
filepath = os.path.join(output_dir, filename)
|
||||
if total_size:
|
||||
@ -615,7 +619,7 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir = '.', refer
|
||||
bar = SimpleProgressBar(total_size, len(urls))
|
||||
else:
|
||||
bar = PiecesProgressBar(total_size, len(urls))
|
||||
|
||||
|
||||
if len(urls) == 1:
|
||||
parts = []
|
||||
url = urls[0]
|
||||
@ -624,7 +628,7 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir = '.', refer
|
||||
parts.append(filepath)
|
||||
url_save_chunked(url, filepath, bar, refer = refer, faker = faker)
|
||||
bar.done()
|
||||
|
||||
|
||||
if not merge:
|
||||
print()
|
||||
return
|
||||
@ -652,7 +656,7 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir = '.', refer
|
||||
bar.update_piece(i + 1)
|
||||
url_save_chunked(url, filepath, bar, refer = refer, is_part = True, faker = faker)
|
||||
bar.done()
|
||||
|
||||
|
||||
if not merge:
|
||||
print()
|
||||
return
|
||||
@ -669,7 +673,7 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir = '.', refer
|
||||
print('No ffmpeg is found. Merging aborted.')
|
||||
else:
|
||||
print("Can't merge %s files" % ext)
|
||||
|
||||
|
||||
print()
|
||||
|
||||
def playlist_not_supported(name):
|
||||
@ -698,7 +702,7 @@ def print_info(site_info, title, type, size):
|
||||
type = 'video/MP2T'
|
||||
elif type in ['webm']:
|
||||
type = 'video/webm'
|
||||
|
||||
|
||||
if type in ['video/3gpp']:
|
||||
type_info = "3GPP multimedia file (%s)" % type
|
||||
elif type in ['video/x-flv', 'video/f4v']:
|
||||
@ -725,7 +729,7 @@ def print_info(site_info, title, type, size):
|
||||
type_info = "MP3 (%s)" % type
|
||||
else:
|
||||
type_info = "Unknown type (%s)" % type
|
||||
|
||||
|
||||
print("Video Site:", site_info)
|
||||
print("Title: ", tr(title))
|
||||
print("Type: ", type_info)
|
||||
@ -777,7 +781,7 @@ def download_main(download, download_playlist, urls, playlist, output_dir, merge
|
||||
url = url[8:]
|
||||
if not url.startswith('http://'):
|
||||
url = 'http://' + url
|
||||
|
||||
|
||||
if playlist:
|
||||
download_playlist(url, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||
else:
|
||||
@ -804,6 +808,7 @@ def script_main(script_name, download, download_playlist = None):
|
||||
-f | --force Force overwriting existed files.
|
||||
-i | --info Display the information of videos without downloading.
|
||||
-u | --url Display the real URLs of videos without downloading.
|
||||
-c | --cookies Load NetScape's cookies.txt file.
|
||||
-n | --no-merge Don't merge video parts.
|
||||
-o | --output-dir <PATH> Set the output directory for downloaded videos.
|
||||
-p | --player <PLAYER [options]> Directly play the video with PLAYER like vlc/smplayer.
|
||||
@ -813,26 +818,28 @@ def script_main(script_name, download, download_playlist = None):
|
||||
--sogou-proxy <HOST:PORT> Run a standalone Sogou proxy server.
|
||||
--debug Show traceback on KeyboardInterrupt.
|
||||
'''
|
||||
|
||||
short_opts = 'VhfiunSo:p:x:'
|
||||
opts = ['version', 'help', 'force', 'info', 'url', 'no-merge', 'no-proxy', 'debug', 'sogou', 'output-dir=', 'player=', 'http-proxy=', 'sogou-proxy=', 'sogou-env=']
|
||||
|
||||
short_opts = 'Vhfiuc:nSo:p:x:'
|
||||
opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-merge', 'no-proxy', 'debug', 'sogou', 'output-dir=', 'player=', 'http-proxy=', 'sogou-proxy=', 'sogou-env=']
|
||||
if download_playlist:
|
||||
short_opts = 'l' + short_opts
|
||||
opts = ['playlist'] + opts
|
||||
|
||||
|
||||
try:
|
||||
opts, args = getopt.getopt(sys.argv[1:], short_opts, opts)
|
||||
except getopt.GetoptError as err:
|
||||
log.e(err)
|
||||
log.e("try 'you-get --help' for more options")
|
||||
sys.exit(2)
|
||||
|
||||
|
||||
global force
|
||||
global dry_run
|
||||
global player
|
||||
global sogou_proxy
|
||||
global sogou_env
|
||||
|
||||
global cookies_txt
|
||||
cookies_txt = None
|
||||
|
||||
info_only = False
|
||||
playlist = False
|
||||
merge = True
|
||||
@ -853,6 +860,10 @@ def script_main(script_name, download, download_playlist = None):
|
||||
info_only = True
|
||||
elif o in ('-u', '--url'):
|
||||
dry_run = True
|
||||
elif o in ('-c', '--cookies'):
|
||||
from http import cookiejar
|
||||
cookies_txt = cookiejar.MozillaCookieJar(a)
|
||||
cookies_txt.load()
|
||||
elif o in ('-l', '--playlist'):
|
||||
playlist = True
|
||||
elif o in ('-n', '--no-merge'):
|
||||
@ -892,7 +903,7 @@ def script_main(script_name, download, download_playlist = None):
|
||||
else:
|
||||
print(help)
|
||||
sys.exit()
|
||||
|
||||
|
||||
set_http_proxy(proxy)
|
||||
|
||||
try:
|
||||
|
Loading…
Reference in New Issue
Block a user