Multithreaded downloading.

This commit is contained in:
Peter Xia 2017-02-13 18:30:32 -08:00
parent 192eb82dcf
commit 31153bdb3d

View File

@ -107,6 +107,8 @@ import time
from urllib import request, parse, error from urllib import request, parse, error
from http import cookiejar from http import cookiejar
from importlib import import_module from importlib import import_module
from concurrent.futures import ThreadPoolExecutor
from threading import Lock
from .version import __version__ from .version import __version__
from .util import log, term from .util import log, term
@ -638,10 +640,12 @@ class SimpleProgressBar:
self.displayed = False self.displayed = False
self.total_size = total_size self.total_size = total_size
self.total_pieces = total_pieces self.total_pieces = total_pieces
self.current_piece = 1 self.current_piece = 0
self.received = 0 self.received = 0
self.speed = '' self.speed = ''
self.last_updated = time.time() self.last_updated = time.time()
self.data_lock = Lock()
self.ui_lock = Lock()
total_pieces_len = len(str(total_pieces)) total_pieces_len = len(str(total_pieces))
# 38 is the size of all statically known size in self.bar # 38 is the size of all statically known size in self.bar
@ -652,9 +656,13 @@ class SimpleProgressBar:
total_str_width, total_str, self.bar_size, total_pieces_len, total_pieces_len) total_str_width, total_str, self.bar_size, total_pieces_len, total_pieces_len)
def update(self): def update(self):
# Don't bother updating the UI if cannot aquire the lock
if not self.ui_lock.acquire(blocking=False) return;
self.data_lock.acquire()
self.displayed = True self.displayed = True
bar_size = self.bar_size bar_size = self.bar_size
percent = round(self.received * 100 / self.total_size, 1) percent = round(self.received * 100 / self.total_size, 1)
self.data_lock.release()
if percent >= 100: if percent >= 100:
percent = 100 percent = 100
dots = bar_size * int(percent) // 100 dots = bar_size * int(percent) // 100
@ -669,8 +677,10 @@ class SimpleProgressBar:
bar = self.bar.format(percent, round(self.received / 1048576, 1), bar, self.current_piece, self.total_pieces, self.speed) bar = self.bar.format(percent, round(self.received / 1048576, 1), bar, self.current_piece, self.total_pieces, self.speed)
sys.stdout.write('\r' + bar) sys.stdout.write('\r' + bar)
sys.stdout.flush() sys.stdout.flush()
self.ui_lock.release()
def update_received(self, n): def update_received(self, n):
self.data_lock.acquire()
self.received += n self.received += n
time_diff = time.time() - self.last_updated time_diff = time.time() - self.last_updated
bytes_ps = n / time_diff if time_diff else 0 bytes_ps = n / time_diff if time_diff else 0
@ -683,15 +693,23 @@ class SimpleProgressBar:
else: else:
self.speed = '{:4.0f} B/s'.format(bytes_ps) self.speed = '{:4.0f} B/s'.format(bytes_ps)
self.last_updated = time.time() self.last_updated = time.time()
self.data_lock.release()
self.update() self.update()
def update_piece(self, n): def update_piece(self, n):
self.data_lock.acquire()
self.current_piece = n self.current_piece = n
self.data_lock.release()
def done(self): def done(self):
self.ui_lock.acquire()
self.data_lock.acquire()
if self.displayed: if self.displayed:
print() print()
self.displayed = False self.displayed = False
self.data_lock.release()
self.ui_lock.release()
class PiecesProgressBar: class PiecesProgressBar:
def __init__(self, total_size, total_pieces = 1): def __init__(self, total_size, total_pieces = 1):
@ -700,24 +718,38 @@ class PiecesProgressBar:
self.total_pieces = total_pieces self.total_pieces = total_pieces
self.current_piece = 1 self.current_piece = 1
self.received = 0 self.received = 0
self.data_lock = Lock()
self.ui_lock = Lock()
def update(self): def update(self):
self.ui_lock.acquire()
self.data_lock.acquire()
self.displayed = True self.displayed = True
self.data_lock.release()
bar = '{0:>5}%[{1:<40}] {2}/{3}'.format('', '=' * 40, self.current_piece, self.total_pieces) bar = '{0:>5}%[{1:<40}] {2}/{3}'.format('', '=' * 40, self.current_piece, self.total_pieces)
sys.stdout.write('\r' + bar) sys.stdout.write('\r' + bar)
sys.stdout.flush() sys.stdout.flush()
self.ui_lock.release()
def update_received(self, n): def update_received(self, n):
self.data_lock.acquire()
self.received += n self.received += n
self.data_lock.release()
self.update() self.update()
def update_piece(self, n): def update_piece(self, n):
self.data_lock.acquire()
self.current_piece = n self.current_piece = n
self.data_lock.release()
def done(self): def done(self):
self.ui_lock.acquire()
self.data_lock.acquire()
if self.displayed: if self.displayed:
print() print()
self.displayed = False self.displayed = False
self.data_lock.release()
self.ui_lock.release()
class DummyProgressBar: class DummyProgressBar:
def __init__(self, *args): def __init__(self, *args):
@ -795,13 +827,14 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg
parts = [] parts = []
print('Downloading %s.%s ...' % (tr(title), ext)) print('Downloading %s.%s ...' % (tr(title), ext))
bar.update() bar.update()
for i, url in enumerate(urls): with ThreadPoolExecutor(max_workers=16) as e:
filename = '%s[%02d].%s' % (title, i, ext) for i, url in enumerate(urls):
filepath = os.path.join(output_dir, filename) filename = '%s[%02d].%s' % (title, i, ext)
parts.append(filepath) filepath = os.path.join(output_dir, filename)
#print 'Downloading %s [%s/%s]...' % (tr(filename), i + 1, len(urls)) parts.append(filepath)
bar.update_piece(i + 1) #print 'Downloading %s [%s/%s]...' % (tr(filename), i + 1, len(urls))
url_save(url, filepath, bar, refer = refer, is_part = True, faker = faker, headers = headers) bar.update_piece(i + 1)
e.submit(url_save, url, filepath, bar, refer = refer, is_part = True, faker = faker, headers = headers)
bar.done() bar.done()
if not merge: if not merge:
@ -921,13 +954,14 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir='.', refer=No
else: else:
parts = [] parts = []
print('Downloading %s.%s ...' % (tr(title), ext)) print('Downloading %s.%s ...' % (tr(title), ext))
for i, url in enumerate(urls): with ThreadPoolExecutor(max_workers=16) as e:
filename = '%s[%02d].%s' % (title, i, ext) for i, url in enumerate(urls):
filepath = os.path.join(output_dir, filename) filename = '%s[%02d].%s' % (title, i, ext)
parts.append(filepath) filepath = os.path.join(output_dir, filename)
#print 'Downloading %s [%s/%s]...' % (tr(filename), i + 1, len(urls)) parts.append(filepath)
bar.update_piece(i + 1) #print 'Downloading %s [%s/%s]...' % (tr(filename), i + 1, len(urls))
url_save_chunked(url, filepath, bar, refer = refer, is_part = True, faker = faker, headers = headers) bar.update_piece(i + 1)
e.submit(url_save_chunked, url, filepath, bar, refer = refer, is_part = True, faker = faker, headers = headers)
bar.done() bar.done()
if not merge: if not merge: