added support for downloading using aria2c

This commit is contained in:
Zhenyao She 2021-01-17 19:33:26 -05:00
parent e914a4bbbf
commit 963defae47
3 changed files with 162 additions and 7 deletions

View File

@ -137,6 +137,7 @@ cookies = None
output_filename = None output_filename = None
auto_rename = False auto_rename = False
insecure = False insecure = False
downloader = None
fake_headers = { fake_headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', # noqa 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', # noqa
@ -624,24 +625,20 @@ def url_save(
headers=None, timeout=None, **kwargs headers=None, timeout=None, **kwargs
): ):
tmp_headers = headers.copy() if headers is not None else {} tmp_headers = headers.copy() if headers is not None else {}
# When a referer specified with param refer,
# the key must be 'Referer' for the hack here
if refer is not None: if refer is not None:
tmp_headers['Referer'] = refer tmp_headers['Referer'] = refer
if type(url) is list: if type(url) is list:
chunk_sizes = [url_size(url, faker=faker, headers=tmp_headers) for url in url] chunk_sizes = [url_size(url, faker=faker, headers=tmp_headers) for url in url]
file_size = sum(chunk_sizes) file_size = sum(chunk_sizes)
is_chunked, urls = True, url
else: else:
file_size = url_size(url, faker=faker, headers=tmp_headers) file_size = url_size(url, faker=faker, headers=tmp_headers)
chunk_sizes = [file_size]
is_chunked, urls = False, [url]
continue_renameing = True continue_renameing = True
while continue_renameing: while continue_renameing:
continue_renameing = False continue_renameing = False
if os.path.exists(filepath): if os.path.exists(filepath):
if not force and (file_size == os.path.getsize(filepath) or skip_existing_file_size_check): if not force and (file_size == os.path.getsize(filepath) \
or skip_existing_file_size_check):
if not is_part: if not is_part:
if bar: if bar:
bar.done() bar.done()
@ -684,6 +681,28 @@ def url_save(
return return
elif not os.path.exists(os.path.dirname(filepath)): elif not os.path.exists(os.path.dirname(filepath)):
os.mkdir(os.path.dirname(filepath)) os.mkdir(os.path.dirname(filepath))
global downloader
downloader(url, filepath, bar, refer=refer, is_part=is_part, faker=faker,
headers=headers, timeout=timeout, **kwargs)
def default_downloader(
url, filepath, bar, refer=None, is_part=False, faker=False,
headers=None, timeout=None, **kwargs
):
tmp_headers = headers.copy() if headers is not None else {}
# When a referer specified with param refer,
# the key must be 'Referer' for the hack here
if refer is not None:
tmp_headers['Referer'] = refer
if type(url) is list:
chunk_sizes = [url_size(url, faker=faker, headers=tmp_headers) for url in url]
file_size = sum(chunk_sizes)
is_chunked, urls = True, url
else:
file_size = url_size(url, faker=faker, headers=tmp_headers)
chunk_sizes = [file_size]
is_chunked, urls = False, [url]
temp_filepath = filepath + '.download' if file_size != float('inf') \ temp_filepath = filepath + '.download' if file_size != float('inf') \
else filepath else filepath
@ -1604,6 +1623,20 @@ def script_main(download, download_playlist, **kwargs):
parser.add_argument('URL', nargs='*', help=argparse.SUPPRESS) parser.add_argument('URL', nargs='*', help=argparse.SUPPRESS)
aria2c_grp = parser.add_argument_group('Aria2c options')
aria2c_grp.add_argument(
'--aria2c', action='store_true', help='Download using Aria2c'
)
aria2c_grp.add_argument(
'--aria2c-rpc', metavar='ARIA_RPC', help='''RPC link used by aria2c,
default to http://localhost:6800/jsonrpc''',
default="http://localhost:6800/jsonrpc"
)
aria2c_grp.add_argument(
'--aria2c-secret', metavar='ARIA_SECRET',
help='RPC secret authorization token'
)
args = parser.parse_args() args = parser.parse_args()
if args.help: if args.help:
@ -1627,8 +1660,11 @@ def script_main(download, download_playlist, **kwargs):
global output_filename global output_filename
global auto_rename global auto_rename
global insecure global insecure
global downloader
downloader = default_downloader
output_filename = args.output_filename output_filename = args.output_filename
extractor_proxy = args.extractor_proxy extractor_proxy = args.extractor_proxy
aria2c_options = {}
info_only = args.info info_only = args.info
if args.force: if args.force:
@ -1660,14 +1696,23 @@ def script_main(download, download_playlist, **kwargs):
# ignore ssl # ignore ssl
insecure = True insecure = True
if args.no_proxy: if args.no_proxy:
set_http_proxy('') set_http_proxy('')
else: else:
set_http_proxy(args.http_proxy) set_http_proxy(args.http_proxy)
aria2c_options["all-proxy"] = args.http_proxy
if args.socks_proxy: if args.socks_proxy:
if args.aria2c:
log.e('Aria2c does not support socket proxy')
exit(1)
set_socks_proxy(args.socks_proxy) set_socks_proxy(args.socks_proxy)
if args.aria2c:
from .processor.aria2c import Aria2cDownloader
aria = Aria2cDownloader(
args.aria2c_rpc, args.aria2c_secret, aria2c_options, log.e)
downloader = aria.download
URLs = [] URLs = []
if args.input_file: if args.input_file:
logging.debug('you are trying to load urls from %s', args.input_file) logging.debug('you are trying to load urls from %s', args.input_file)

View File

@ -4,3 +4,4 @@ from .join_flv import concat_flv
from .join_mp4 import concat_mp4 from .join_mp4 import concat_mp4
from .ffmpeg import * from .ffmpeg import *
from .rtmpdump import * from .rtmpdump import *
from .aria2c import *

View File

@ -0,0 +1,109 @@
#!/usr/bin/env python
import json
import os
import sys
from urllib import request, parse, error
import time
fake_headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', # noqa
'Accept-Charset': 'UTF-8,*;q=0.5',
'Accept-Encoding': 'gzip,deflate,sdch',
'Accept-Language': 'en-US,en;q=0.8',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.74 Safari/537.36 Edg/79.0.309.43', # noqa
}
class Aria2cDownloader:
def __init__(self, rpc, secret, options={}, error_logger=print):
base = {
'jsonrpc':'2.0',
'id':'you-get',
'params':[]
}
if secret is not None:
base['params'].append('token:%s' % secret)
self.base = json.dumps(base)
self.options = json.dumps(options)
self.rpc = rpc
self.error_logger = error_logger
self.bar = None
def log_error(self, msg):
if self.bar:
sys.stderr.write('\n')
self.bar = None
self.error_logger(msg)
def request(self, base):
req = json.dumps(base).encode('utf-8')
try:
r = request.urlopen(self.rpc, req)
return json.loads(r.read().decode('utf-8'))
except error.HTTPError as e:
self.log_error('Error connecting aria2c via %s' % self.rpc)
rep = json.loads(e.read())
self.log_error(rep["error"]["message"])
exit(1)
def download(self, urls, filepath, bar, refer=None, is_part=False,
faker=False, headers=None, timeout=None, **kwargs):
self.bar = bar
download_base = json.loads(self.base)
download_options = json.loads(self.options)
if type(urls) is not list:
urls = [urls]
download_options['out'] = os.path.basename(filepath)
download_options['dir'] = os.path.abspath(os.path.dirname(filepath))
if refer is not None:
download_options['referer'] = refer
if faker:
headers = fake_headers
if headers:
download_options['header'] = \
[ '%s: %s' % (k, v) for (k, v) in headers.items() ]
download_base['method'] = 'aria2.addUri'
download_base['params'] += [urls, download_options]
result = self.request(download_base)
gid = result['result']
received = 0
while True:
time.sleep(1)
status = self.tell_status(gid)
s = status['status']
if s == 'removed':
self.log_error('download removed')
exit(1)
if s == 'error':
self.log_error('download failed')
exit(1)
if s == 'complete':
bar.done()
break
if bar:
completed = int(status['completedLength'])
delta = completed - received
received = completed
bar.update_received(delta)
self.bar = None
def tell_status(self, gid):
base = json.loads(self.base)
options = json.loads(self.options)
base['params'] += [gid, ['status', 'completedLength']]
base['method'] = 'aria2.tellStatus'
return self.request(base)['result']