refactoring

This commit is contained in:
wataru 2023-04-27 23:38:25 +09:00
parent a59631609c
commit 55118815b4
13 changed files with 525 additions and 280 deletions

16
server/.vscode/settings.json vendored Normal file
View File

@ -0,0 +1,16 @@
{
"workbench.colorCustomizations": {
"tab.activeBackground": "#65952acc"
},
"python.formatting.provider": "black",
"python.linting.mypyEnabled": true,
"[python]": {
"editor.defaultFormatter": null, // Prettier 使
"editor.formatOnSave": true //
},
"flake8.args": [
"--ignore=E501"
// "--max-line-length=150",
// "--max-complexity=20"
]
}

View File

@ -2,12 +2,12 @@ import sys
from distutils.util import strtobool from distutils.util import strtobool
from datetime import datetime from datetime import datetime
from dataclasses import dataclass
import misc.log_control
import socket import socket
import platform import platform
import os import os
import argparse import argparse
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
import uvicorn import uvicorn
from mods.ssl import create_self_signed_cert from mods.ssl import create_self_signed_cert
from voice_changer.VoiceChangerManager import VoiceChangerManager from voice_changer.VoiceChangerManager import VoiceChangerManager
@ -21,30 +21,48 @@ import multiprocessing as mp
def setupArgParser(): def setupArgParser():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("-p", type=int, default=18888, help="port") parser.add_argument("-p", type=int, default=18888, help="port")
parser.add_argument("--https", type=strtobool, parser.add_argument("--https", type=strtobool, default=False, help="use https")
default=False, help="use https") parser.add_argument(
parser.add_argument("--httpsKey", type=str, "--httpsKey", type=str, default="ssl.key", help="path for the key of https"
default="ssl.key", help="path for the key of https") )
parser.add_argument("--httpsCert", type=str, parser.add_argument(
default="ssl.cert", help="path for the cert of https") "--httpsCert", type=str, default="ssl.cert", help="path for the cert of https"
parser.add_argument("--httpsSelfSigned", type=strtobool, )
default=True, help="generate self-signed certificate") parser.add_argument(
"--httpsSelfSigned",
type=strtobool,
default=True,
help="generate self-signed certificate",
)
# parser.add_argument("--internal", type=strtobool, default=False, help="各種パスをmac appの中身に変換") parser.add_argument(
"--content_vec_500", type=str, help="path to content_vec_500 model(pytorch)"
parser.add_argument("--content_vec_500", type=str, help="path to content_vec_500 model(pytorch)") )
parser.add_argument("--content_vec_500_onnx", type=str, help="path to content_vec_500 model(onnx)") parser.add_argument(
parser.add_argument("--content_vec_500_onnx_on", type=strtobool, default=False, help="use or not onnx for content_vec_500") "--content_vec_500_onnx", type=str, help="path to content_vec_500 model(onnx)"
parser.add_argument("--hubert_base", type=str, help="path to hubert_base model(pytorch)") )
parser.add_argument("--hubert_soft", type=str, help="path to hubert_soft model(pytorch)") parser.add_argument(
parser.add_argument("--nsf_hifigan", type=str, help="path to nsf_hifigan model(pytorch)") "--content_vec_500_onnx_on",
type=strtobool,
default=False,
help="use or not onnx for content_vec_500",
)
parser.add_argument(
"--hubert_base", type=str, help="path to hubert_base model(pytorch)"
)
parser.add_argument(
"--hubert_soft", type=str, help="path to hubert_soft model(pytorch)"
)
parser.add_argument(
"--nsf_hifigan", type=str, help="path to nsf_hifigan model(pytorch)"
)
return parser return parser
def printMessage(message, level=0): def printMessage(message, level=0):
pf = platform.system() pf = platform.system()
if pf == 'Windows': if pf == "Windows":
if level == 0: if level == 0:
print(f"{message}") print(f"{message}")
elif level == 1: elif level == 1:
@ -78,37 +96,38 @@ def localServer():
host="0.0.0.0", host="0.0.0.0",
port=int(PORT), port=int(PORT),
reload=False if hasattr(sys, "_MEIPASS") else True, reload=False if hasattr(sys, "_MEIPASS") else True,
log_level="warning" log_level="warning",
) )
if __name__ == 'MMVCServerSIO': if __name__ == "MMVCServerSIO":
voiceChangerManager = VoiceChangerManager.get_instance({ voiceChangerParams = VoiceChangerParams(
"content_vec_500": args.content_vec_500, content_vec_500=args.content_vec_500,
"content_vec_500_onnx": args.content_vec_500_onnx, content_vec_500_onnx=args.content_vec_500_onnx,
"content_vec_500_onnx_on": args.content_vec_500_onnx_on, content_vec_500_onnx_on=args.content_vec_500_onnx_on,
"hubert_base": args.hubert_base, hubert_base=args.hubert_base,
"hubert_soft": args.hubert_soft, hubert_soft=args.hubert_soft,
"nsf_hifigan": args.nsf_hifigan, nsf_hifigan=args.nsf_hifigan,
}) )
voiceChangerManager = VoiceChangerManager.get_instance(voiceChangerParams)
print("voiceChangerManager", voiceChangerManager)
app_fastapi = MMVC_Rest.get_instance(voiceChangerManager) app_fastapi = MMVC_Rest.get_instance(voiceChangerManager)
app_socketio = MMVC_SocketIOApp.get_instance(app_fastapi, voiceChangerManager) app_socketio = MMVC_SocketIOApp.get_instance(app_fastapi, voiceChangerManager)
if __name__ == '__mp_main__': if __name__ == "__mp_main__":
printMessage(f"サーバプロセスを起動しています。", level=2) printMessage("サーバプロセスを起動しています。", level=2)
if __name__ == '__main__': if __name__ == "__main__":
mp.freeze_support() mp.freeze_support()
printMessage(f"Voice Changerを起動しています。", level=2) printMessage("Voice Changerを起動しています。", level=2)
PORT = args.p PORT = args.p
if os.getenv("EX_PORT"): if os.getenv("EX_PORT"):
EX_PORT = os.environ["EX_PORT"] EX_PORT = os.environ["EX_PORT"]
printMessage( printMessage(f"External_Port:{EX_PORT} Internal_Port:{PORT}", level=1)
f"External_Port:{EX_PORT} Internal_Port:{PORT}", level=1)
else: else:
printMessage(f"Internal_Port:{PORT}", level=1) printMessage(f"Internal_Port:{PORT}", level=1)
@ -123,38 +142,42 @@ if __name__ == '__main__':
key_base_name = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}" key_base_name = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}"
keyname = f"{key_base_name}.key" keyname = f"{key_base_name}.key"
certname = f"{key_base_name}.cert" certname = f"{key_base_name}.cert"
create_self_signed_cert(certname, keyname, certargs={"Country": "JP", create_self_signed_cert(
certname,
keyname,
certargs={
"Country": "JP",
"State": "Tokyo", "State": "Tokyo",
"City": "Chuo-ku", "City": "Chuo-ku",
"Organization": "F", "Organization": "F",
"Org. Unit": "F"}, cert_dir=SSL_KEY_DIR) "Org. Unit": "F",
},
cert_dir=SSL_KEY_DIR,
)
key_path = os.path.join(SSL_KEY_DIR, keyname) key_path = os.path.join(SSL_KEY_DIR, keyname)
cert_path = os.path.join(SSL_KEY_DIR, certname) cert_path = os.path.join(SSL_KEY_DIR, certname)
printMessage( printMessage(
f"protocol: HTTPS(self-signed), key:{key_path}, cert:{cert_path}", level=1) f"protocol: HTTPS(self-signed), key:{key_path}, cert:{cert_path}", level=1
)
elif args.https and args.httpsSelfSigned == 0: elif args.https and args.httpsSelfSigned == 0:
# HTTPS # HTTPS
key_path = args.httpsKey key_path = args.httpsKey
cert_path = args.httpsCert cert_path = args.httpsCert
printMessage( printMessage(f"protocol: HTTPS, key:{key_path}, cert:{cert_path}", level=1)
f"protocol: HTTPS, key:{key_path}, cert:{cert_path}", level=1)
else: else:
# HTTP # HTTP
printMessage(f"protocol: HTTP", level=1) printMessage("protocol: HTTP", level=1)
printMessage(f"-- ---- -- ", level=1) printMessage("-- ---- -- ", level=1)
# アドレス表示 # アドレス表示
printMessage( printMessage("ブラウザで次のURLを開いてください.", level=2)
f"ブラウザで次のURLを開いてください.", level=2)
if args.https == 1: if args.https == 1:
printMessage( printMessage("https://<IP>:<PORT>/", level=1)
f"https://<IP>:<PORT>/", level=1)
else: else:
printMessage( printMessage("http://<IP>:<PORT>/", level=1)
f"http://<IP>:<PORT>/", level=1)
printMessage(f"多くの場合は次のいずれかのURLにアクセスすると起動します。", level=2) printMessage("多くの場合は次のいずれかのURLにアクセスすると起動します。", level=2)
if "EX_PORT" in locals() and "EX_IP" in locals(): # シェルスクリプト経由起動(docker) if "EX_PORT" in locals() and "EX_IP" in locals(): # シェルスクリプト経由起動(docker)
if args.https == 1: if args.https == 1:
printMessage(f"https://localhost:{EX_PORT}/", level=1) printMessage(f"https://localhost:{EX_PORT}/", level=1)
@ -175,7 +198,7 @@ if __name__ == '__main__':
# サーバ起動 # サーバ起動
if args.https: if args.https:
# HTTPS サーバ起動 # HTTPS サーバ起動
res = uvicorn.run( uvicorn.run(
f"{os.path.basename(__file__)[:-3]}:app_socketio", f"{os.path.basename(__file__)[:-3]}:app_socketio",
host="0.0.0.0", host="0.0.0.0",
port=int(PORT), port=int(PORT),
@ -188,13 +211,17 @@ if __name__ == '__main__':
p = mp.Process(name="p", target=localServer) p = mp.Process(name="p", target=localServer)
p.start() p.start()
try: try:
if sys.platform.startswith('win'): if sys.platform.startswith("win"):
process = subprocess.Popen([NATIVE_CLIENT_FILE_WIN, "-u", f"http://localhost:{PORT}/"]) process = subprocess.Popen(
[NATIVE_CLIENT_FILE_WIN, "-u", f"http://localhost:{PORT}/"]
)
return_code = process.wait() return_code = process.wait()
print("client closed.") print("client closed.")
p.terminate() p.terminate()
elif sys.platform.startswith('darwin'): elif sys.platform.startswith("darwin"):
process = subprocess.Popen([NATIVE_CLIENT_FILE_MAC, "-u", f"http://localhost:{PORT}/"]) process = subprocess.Popen(
[NATIVE_CLIENT_FILE_MAC, "-u", f"http://localhost:{PORT}/"]
)
return_code = process.wait() return_code = process.wait()
print("client closed.") print("client closed.")
p.terminate() p.terminate()

View File

@ -26,14 +26,6 @@ TMP_DIR = os.path.join(tmpdir.name, "tmp_dir") if hasattr(sys, "_MEIPASS") else
os.makedirs(TMP_DIR, exist_ok=True) os.makedirs(TMP_DIR, exist_ok=True)
# modelType: ModelType = "MMVCv15"
# def getModelType() -> ModelType:
# return modelType
# def setModelType(_modelType: ModelType):
# global modelType
# modelType = _modelType
def getFrontendPath(): def getFrontendPath():
frontend_path = os.path.join(sys._MEIPASS, "dist") if hasattr(sys, "_MEIPASS") else "../client/demo/dist" frontend_path = os.path.join(sys._MEIPASS, "dist") if hasattr(sys, "_MEIPASS") else "../client/demo/dist"
return frontend_path return frontend_path

View File

@ -1,7 +1,8 @@
from fastapi import FastAPI, Request, Response from fastapi import FastAPI, Request, Response, HTTPException
from fastapi.routing import APIRoute from fastapi.routing import APIRoute
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles from fastapi.staticfiles import StaticFiles
from fastapi.exceptions import RequestValidationError
from typing import Callable from typing import Callable
from voice_changer.VoiceChangerManager import VoiceChangerManager from voice_changer.VoiceChangerManager import VoiceChangerManager
@ -18,7 +19,7 @@ class ValidationErrorLoggingRoute(APIRoute):
async def custom_route_handler(request: Request) -> Response: async def custom_route_handler(request: Request) -> Response:
try: try:
return await original_route_handler(request) return await original_route_handler(request)
except Exception as exc: except RequestValidationError as exc:
print("Exception", request.url, str(exc)) print("Exception", request.url, str(exc))
body = await request.body() body = await request.body()
detail = {"errors": exc.errors(), "body": body.decode()} detail = {"errors": exc.errors(), "body": body.decode()}
@ -28,10 +29,11 @@ class ValidationErrorLoggingRoute(APIRoute):
class MMVC_Rest: class MMVC_Rest:
_instance = None
@classmethod @classmethod
def get_instance(cls, voiceChangerManager: VoiceChangerManager): def get_instance(cls, voiceChangerManager: VoiceChangerManager):
if not hasattr(cls, "_instance"): if cls._instance is None:
app_fastapi = FastAPI() app_fastapi = FastAPI()
app_fastapi.router.route_class = ValidationErrorLoggingRoute app_fastapi.router.route_class = ValidationErrorLoggingRoute
app_fastapi.add_middleware( app_fastapi.add_middleware(
@ -43,15 +45,25 @@ class MMVC_Rest:
) )
app_fastapi.mount( app_fastapi.mount(
"/front", StaticFiles(directory=f'{getFrontendPath()}', html=True), name="static") "/front",
StaticFiles(directory=f"{getFrontendPath()}", html=True),
name="static",
)
app_fastapi.mount( app_fastapi.mount(
"/trainer", StaticFiles(directory=f'{getFrontendPath()}', html=True), name="static") "/trainer",
StaticFiles(directory=f"{getFrontendPath()}", html=True),
name="static",
)
app_fastapi.mount( app_fastapi.mount(
"/recorder", StaticFiles(directory=f'{getFrontendPath()}', html=True), name="static") "/recorder",
StaticFiles(directory=f"{getFrontendPath()}", html=True),
name="static",
)
app_fastapi.mount( app_fastapi.mount(
"/tmp", StaticFiles(directory=f'{TMP_DIR}'), name="static") "/tmp", StaticFiles(directory=f"{TMP_DIR}"), name="static"
)
restHello = MMVC_Rest_Hello() restHello = MMVC_Rest_Hello()
app_fastapi.include_router(restHello.router) app_fastapi.include_router(restHello.router)

View File

@ -4,12 +4,13 @@ from typing import Union
from fastapi import APIRouter from fastapi import APIRouter
from fastapi.encoders import jsonable_encoder from fastapi.encoders import jsonable_encoder
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
from fastapi import HTTPException, FastAPI, UploadFile, File, Form from fastapi import UploadFile, File, Form
from restapi.mods.FileUploader import upload_file, concat_file_chunks from restapi.mods.FileUploader import upload_file, concat_file_chunks
from voice_changer.VoiceChangerManager import VoiceChangerManager from voice_changer.VoiceChangerManager import VoiceChangerManager
from const import MODEL_DIR, UPLOAD_DIR, ModelType from const import MODEL_DIR, UPLOAD_DIR, ModelType
os.makedirs(UPLOAD_DIR, exist_ok=True) os.makedirs(UPLOAD_DIR, exist_ok=True)
os.makedirs(MODEL_DIR, exist_ok=True) os.makedirs(MODEL_DIR, exist_ok=True)
@ -19,12 +20,22 @@ class MMVC_Rest_Fileuploader:
self.voiceChangerManager = voiceChangerManager self.voiceChangerManager = voiceChangerManager
self.router = APIRouter() self.router = APIRouter()
self.router.add_api_route("/info", self.get_info, methods=["GET"]) self.router.add_api_route("/info", self.get_info, methods=["GET"])
self.router.add_api_route("/upload_file", self.post_upload_file, methods=["POST"]) self.router.add_api_route(
self.router.add_api_route("/concat_uploaded_file", self.post_concat_uploaded_file, methods=["POST"]) "/upload_file", self.post_upload_file, methods=["POST"]
self.router.add_api_route("/update_settings", self.post_update_settings, methods=["POST"]) )
self.router.add_api_route(
"/concat_uploaded_file", self.post_concat_uploaded_file, methods=["POST"]
)
self.router.add_api_route(
"/update_settings", self.post_update_settings, methods=["POST"]
)
self.router.add_api_route("/load_model", self.post_load_model, methods=["POST"]) self.router.add_api_route("/load_model", self.post_load_model, methods=["POST"])
self.router.add_api_route("/load_model_for_train", self.post_load_model_for_train, methods=["POST"]) self.router.add_api_route(
self.router.add_api_route("/extract_voices", self.post_extract_voices, methods=["POST"]) "/load_model_for_train", self.post_load_model_for_train, methods=["POST"]
)
self.router.add_api_route(
"/extract_voices", self.post_extract_voices, methods=["POST"]
)
self.router.add_api_route("/model_type", self.post_model_type, methods=["POST"]) self.router.add_api_route("/model_type", self.post_model_type, methods=["POST"])
self.router.add_api_route("/model_type", self.get_model_type, methods=["GET"]) self.router.add_api_route("/model_type", self.get_model_type, methods=["GET"])
self.router.add_api_route("/onnx", self.get_onnx, methods=["GET"]) self.router.add_api_route("/onnx", self.get_onnx, methods=["GET"])
@ -34,9 +45,13 @@ class MMVC_Rest_Fileuploader:
json_compatible_item_data = jsonable_encoder(res) json_compatible_item_data = jsonable_encoder(res)
return JSONResponse(content=json_compatible_item_data) return JSONResponse(content=json_compatible_item_data)
def post_concat_uploaded_file(self, filename: str = Form(...), filenameChunkNum: int = Form(...)): def post_concat_uploaded_file(
self, filename: str = Form(...), filenameChunkNum: int = Form(...)
):
slot = 0 slot = 0
res = concat_file_chunks(slot, UPLOAD_DIR, filename, filenameChunkNum, UPLOAD_DIR) res = concat_file_chunks(
slot, UPLOAD_DIR, filename, filenameChunkNum, UPLOAD_DIR
)
json_compatible_item_data = jsonable_encoder(res) json_compatible_item_data = jsonable_encoder(res)
return JSONResponse(content=json_compatible_item_data) return JSONResponse(content=json_compatible_item_data)
@ -45,7 +60,9 @@ class MMVC_Rest_Fileuploader:
json_compatible_item_data = jsonable_encoder(info) json_compatible_item_data = jsonable_encoder(info)
return JSONResponse(content=json_compatible_item_data) return JSONResponse(content=json_compatible_item_data)
def post_update_settings(self, key: str = Form(...), val: Union[int, str, float] = Form(...)): def post_update_settings(
self, key: str = Form(...), val: Union[int, str, float] = Form(...)
):
print("post_update_settings", key, val) print("post_update_settings", key, val)
info = self.voiceChangerManager.update_settings(key, val) info = self.voiceChangerManager.update_settings(key, val)
json_compatible_item_data = jsonable_encoder(info) json_compatible_item_data = jsonable_encoder(info)
@ -63,7 +80,6 @@ class MMVC_Rest_Fileuploader:
isHalf: bool = Form(...), isHalf: bool = Form(...),
params: str = Form(...), params: str = Form(...),
): ):
props = { props = {
"slot": slot, "slot": slot,
"isHalf": isHalf, "isHalf": isHalf,
@ -73,9 +89,9 @@ class MMVC_Rest_Fileuploader:
"onnxModelFilename": onnxModelFilename, "onnxModelFilename": onnxModelFilename,
"clusterTorchModelFilename": clusterTorchModelFilename, "clusterTorchModelFilename": clusterTorchModelFilename,
"featureFilename": featureFilename, "featureFilename": featureFilename,
"indexFilename": indexFilename "indexFilename": indexFilename,
}, },
"params": params "params": params,
} }
# Change Filepath # Change Filepath
for key, val in props["files"].items(): for key, val in props["files"].items():
@ -103,9 +119,11 @@ class MMVC_Rest_Fileuploader:
modelDFilenameChunkNum: int = Form(...), modelDFilenameChunkNum: int = Form(...),
): ):
modelGFilePath = concat_file_chunks( modelGFilePath = concat_file_chunks(
UPLOAD_DIR, modelGFilename, modelGFilenameChunkNum, MODEL_DIR) UPLOAD_DIR, modelGFilename, modelGFilenameChunkNum, MODEL_DIR
)
modelDFilePath = concat_file_chunks( modelDFilePath = concat_file_chunks(
UPLOAD_DIR, modelDFilename, modelDFilenameChunkNum, MODEL_DIR) UPLOAD_DIR, modelDFilename, modelDFilenameChunkNum, MODEL_DIR
)
return {"File saved": f"{modelGFilePath}, {modelDFilePath}"} return {"File saved": f"{modelGFilePath}, {modelDFilePath}"}
def post_extract_voices( def post_extract_voices(
@ -114,7 +132,8 @@ class MMVC_Rest_Fileuploader:
zipFileChunkNum: int = Form(...), zipFileChunkNum: int = Form(...),
): ):
zipFilePath = concat_file_chunks( zipFilePath = concat_file_chunks(
UPLOAD_DIR, zipFilename, zipFileChunkNum, UPLOAD_DIR) UPLOAD_DIR, zipFilename, zipFileChunkNum, UPLOAD_DIR
)
shutil.unpack_archive(zipFilePath, "MMVC_Trainer/dataset/textful/") shutil.unpack_archive(zipFilePath, "MMVC_Trainer/dataset/textful/")
return {"Zip file unpacked": f"{zipFilePath}"} return {"Zip file unpacked": f"{zipFilePath}"}

View File

@ -1,6 +1,6 @@
from fastapi import APIRouter from fastapi import APIRouter
from fastapi.encoders import jsonable_encoder
from fastapi.responses import JSONResponse
class MMVC_Rest_Hello: class MMVC_Rest_Hello:
def __init__(self): def __init__(self):
self.router = APIRouter() self.router = APIRouter()
@ -8,6 +8,3 @@ class MMVC_Rest_Hello:
def hello(self): def hello(self):
return {"result": "Index"} return {"result": "Index"}

View File

@ -31,24 +31,24 @@ class MMVC_Rest_VoiceChanger:
buffer = voice.buffer buffer = voice.buffer
wav = base64.b64decode(buffer) wav = base64.b64decode(buffer)
if wav == 0: # if wav == 0:
samplerate, data = read("dummy.wav") # samplerate, data = read("dummy.wav")
unpackedData = data # unpackedData = data
else: # else:
unpackedData = np.array(struct.unpack( # unpackedData = np.array(
'<%sh' % (len(wav) // struct.calcsize('<h')), wav)) # struct.unpack("<%sh" % (len(wav) // struct.calcsize("<h")), wav)
# write("logs/received_data.wav", 24000, # )
# unpackedData.astype(np.int16))
unpackedData = np.array(
struct.unpack("<%sh" % (len(wav) // struct.calcsize("<h")), wav)
)
self.tlock.acquire() self.tlock.acquire()
changedVoice = self.voiceChangerManager.changeVoice(unpackedData) changedVoice = self.voiceChangerManager.changeVoice(unpackedData)
self.tlock.release() self.tlock.release()
changedVoiceBase64 = base64.b64encode(changedVoice[0]).decode('utf-8') changedVoiceBase64 = base64.b64encode(changedVoice[0]).decode("utf-8")
data = { data = {"timestamp": timestamp, "changedVoiceBase64": changedVoiceBase64}
"timestamp": timestamp,
"changedVoiceBase64": changedVoiceBase64
}
json_compatible_item_data = jsonable_encoder(data) json_compatible_item_data = jsonable_encoder(data)
return JSONResponse(content=json_compatible_item_data) return JSONResponse(content=json_compatible_item_data)

View File

@ -1,40 +0,0 @@
import os
import numpy as np
import pylab
import librosa
import librosa.display
import pyworld as pw
class IOAnalyzer:
def _get_f0_dio(self, y, sr):
_f0, time = pw.dio(y, sr, frame_period=5)
f0 = pw.stonemask(y, _f0, time, sr)
time = np.linspace(0, y.shape[0] / sr, len(time))
return f0, time
def _get_f0_harvest(self, y, sr):
_f0, time = pw.harvest(y, sr, frame_period=5)
f0 = pw.stonemask(y, _f0, time, sr)
time = np.linspace(0, y.shape[0] / sr, len(time))
return f0, time
def analyze(self, inputDataFile: str, dioImageFile: str, harvestImageFile: str, samplingRate: int):
y, sr = librosa.load(inputDataFile, samplingRate)
y = y.astype(np.float64)
spec = librosa.amplitude_to_db(np.abs(librosa.stft(y, n_fft=2048, win_length=2048, hop_length=128)), ref=np.max)
f0_dio, times = self._get_f0_dio(y, sr=samplingRate)
f0_harvest, times = self._get_f0_harvest(y, sr=samplingRate)
pylab.close()
HOP_LENGTH = 128
img = librosa.display.specshow(spec, sr=samplingRate, hop_length=HOP_LENGTH, x_axis='time', y_axis='log', )
pylab.plot(times, f0_dio, label='f0', color=(0, 1, 1, 0.6), linewidth=3)
pylab.savefig(dioImageFile)
pylab.close()
HOP_LENGTH = 128
img = librosa.display.specshow(spec, sr=samplingRate, hop_length=HOP_LENGTH, x_axis='time', y_axis='log', )
pylab.plot(times, f0_harvest, label='f0', color=(0, 1, 1, 0.6), linewidth=3)
pylab.savefig(harvestImageFile)

View File

@ -30,13 +30,15 @@ class ModelWrapper:
self.embChannels = metadata["embChannels"] self.embChannels = metadata["embChannels"]
self.modelType = metadata["modelType"] self.modelType = metadata["modelType"]
self.deprecated = False self.deprecated = False
print(f"[Voice Changer] Onnx metadata: sr:{self.samplingRate}, f0:{self.f0}") self.embedder = metadata["embedder"] if "embedder" in metadata else "hubert_base"
print(f"[Voice Changer] Onnx metadata: sr:{self.samplingRate}, f0:{self.f0}, embedder:{self.embedder}")
except: except:
self.samplingRate = 48000 self.samplingRate = 48000
self.f0 = True self.f0 = True
self.embChannels = 256 self.embChannels = 256
self.modelType = 0 self.modelType = 0
self.deprecated = True self.deprecated = True
self.embedder = "hubert_base"
print(f"[Voice Changer] ############## !!!! CAUTION !!!! ####################") print(f"[Voice Changer] ############## !!!! CAUTION !!!! ####################")
print(f"[Voice Changer] This onnx's version is depricated. Please regenerate onnxfile. Fallback to default") print(f"[Voice Changer] This onnx's version is depricated. Please regenerate onnxfile. Fallback to default")
print(f"[Voice Changer] Onnx metadata: sr:{self.samplingRate}, f0:{self.f0}") print(f"[Voice Changer] Onnx metadata: sr:{self.samplingRate}, f0:{self.f0}")
@ -57,6 +59,9 @@ class ModelWrapper:
def getDeprecated(self): def getDeprecated(self):
return self.deprecated return self.deprecated
def getEmbedder(self):
return self.embedder
def set_providers(self, providers, provider_options=[{}]): def set_providers(self, providers, provider_options=[{}]):
self.onnx_session.set_providers(providers=providers, provider_options=provider_options) self.onnx_session.set_providers(providers=providers, provider_options=provider_options)

View File

@ -4,11 +4,12 @@ import json
import resampy import resampy
from voice_changer.RVC.ModelWrapper import ModelWrapper from voice_changer.RVC.ModelWrapper import ModelWrapper
from Exceptions import NoModeLoadedException from Exceptions import NoModeLoadedException
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
# avoiding parse arg error in RVC # avoiding parse arg error in RVC
sys.argv = ["MMVCServerSIO.py"] sys.argv = ["MMVCServerSIO.py"]
if sys.platform.startswith('darwin'): if sys.platform.startswith("darwin"):
baseDir = [x for x in sys.path if x.endswith("Contents/MacOS")] baseDir = [x for x in sys.path if x.endswith("Contents/MacOS")]
if len(baseDir) != 1: if len(baseDir) != 1:
print("baseDir should be only one ", baseDir) print("baseDir should be only one ", baseDir)
@ -24,6 +25,7 @@ from functools import reduce
import numpy as np import numpy as np
import torch import torch
import onnxruntime import onnxruntime
# onnxruntime.set_default_logger_severity(3) # onnxruntime.set_default_logger_severity(3)
from const import HUBERT_ONNX_MODEL_PATH, TMP_DIR from const import HUBERT_ONNX_MODEL_PATH, TMP_DIR
@ -36,11 +38,17 @@ from .models import SynthesizerTrnMsNSFsidNono as SynthesizerTrnMsNSFsidNono_web
from .const import RVC_MODEL_TYPE_RVC, RVC_MODEL_TYPE_WEBUI from .const import RVC_MODEL_TYPE_RVC, RVC_MODEL_TYPE_WEBUI
from fairseq import checkpoint_utils from fairseq import checkpoint_utils
providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
providers = [
"OpenVINOExecutionProvider",
"CUDAExecutionProvider",
"DmlExecutionProvider",
"CPUExecutionProvider",
]
@dataclass @dataclass
class ModelSlot(): class ModelSlot:
pyTorchModelFile: str = "" pyTorchModelFile: str = ""
onnxModelFile: str = "" onnxModelFile: str = ""
featureFile: str = "" featureFile: str = ""
@ -51,13 +59,11 @@ class ModelSlot():
f0: bool = True f0: bool = True
embChannels: int = 256 embChannels: int = 256
deprecated: bool = False deprecated: bool = False
# samplingRateOnnx: int = -1 embedder: str = "hubert_base" # "hubert_base", "contentvec", "distilhubert"
# f0Onnx: bool = True
# embChannelsOnnx: int = 256
@dataclass @dataclass
class RVCSettings(): class RVCSettings:
gpu: int = 0 gpu: int = 0
dstId: int = 0 dstId: int = 0
@ -72,9 +78,7 @@ class RVCSettings():
onnxModelFile: str = "" onnxModelFile: str = ""
configFile: str = "" configFile: str = ""
modelSlots: list[ModelSlot] = field( modelSlots: list[ModelSlot] = field(
default_factory=lambda: [ default_factory=lambda: [ModelSlot(), ModelSlot(), ModelSlot()]
ModelSlot(), ModelSlot(), ModelSlot()
]
) )
indexRatio: float = 0 indexRatio: float = 0
rvcQuality: int = 0 rvcQuality: int = 0
@ -82,23 +86,28 @@ class RVCSettings():
modelSamplingRate: int = 48000 modelSamplingRate: int = 48000
modelSlotIndex: int = -1 modelSlotIndex: int = -1
speakers: dict[str, int] = field( speakers: dict[str, int] = field(default_factory=lambda: {})
default_factory=lambda: {}
)
# ↓mutableな物だけ列挙 # ↓mutableな物だけ列挙
intData = ["gpu", "dstId", "tran", "extraConvertSize", "rvcQuality", "modelSamplingRate", "silenceFront", "modelSlotIndex"] intData = [
"gpu",
"dstId",
"tran",
"extraConvertSize",
"rvcQuality",
"modelSamplingRate",
"silenceFront",
"modelSlotIndex",
]
floatData = ["silentThreshold", "indexRatio"] floatData = ["silentThreshold", "indexRatio"]
strData = ["framework", "f0Detector"] strData = ["framework", "f0Detector"]
class RVC: class RVC:
def __init__(self, params): def __init__(self, params: VoiceChangerParams):
self.initialLoad = True self.initialLoad = True
self.settings = RVCSettings() self.settings = RVCSettings()
self.inferenceing: bool = False
self.net_g = None self.net_g = None
self.onnx_session = None self.onnx_session = None
self.feature_file = None self.feature_file = None
@ -108,7 +117,10 @@ class RVC:
self.prevVol = 0 self.prevVol = 0
self.params = params self.params = params
self.mps_enabled: bool = getattr(torch.backends, "mps", None) is not None and torch.backends.mps.is_available() self.mps_enabled: bool = (
getattr(torch.backends, "mps", None) is not None
and torch.backends.mps.is_available()
)
self.currentSlot = -1 self.currentSlot = -1
print("RVC initialization: ", params) print("RVC initialization: ", params)
print("mps: ", self.mps_enabled) print("mps: ", self.mps_enabled)
@ -120,26 +132,41 @@ class RVC:
params = json.loads(params_str) params = json.loads(params_str)
newSlot = asdict(self.settings.modelSlots[tmp_slot]) newSlot = asdict(self.settings.modelSlots[tmp_slot])
newSlot.update({ newSlot.update(
{
"pyTorchModelFile": props["files"]["pyTorchModelFilename"], "pyTorchModelFile": props["files"]["pyTorchModelFilename"],
"onnxModelFile": props["files"]["onnxModelFilename"], "onnxModelFile": props["files"]["onnxModelFilename"],
"featureFile": props["files"]["featureFilename"], "featureFile": props["files"]["featureFilename"],
"indexFile": props["files"]["indexFilename"], "indexFile": props["files"]["indexFilename"],
"defaultTrans": params["trans"] "defaultTrans": params["trans"],
}) }
)
self.settings.modelSlots[tmp_slot] = ModelSlot(**newSlot) self.settings.modelSlots[tmp_slot] = ModelSlot(**newSlot)
print("[Voice Changer] RVC loading... slot:", tmp_slot) print("[Voice Changer] RVC loading... slot:", tmp_slot)
# Load metadata # Load metadata
if self.settings.modelSlots[tmp_slot].pyTorchModelFile != None and self.settings.modelSlots[tmp_slot].pyTorchModelFile != "": if (
self._setInfoByPytorch(tmp_slot, self.settings.modelSlots[tmp_slot].pyTorchModelFile) self.settings.modelSlots[tmp_slot].pyTorchModelFile != None
if self.settings.modelSlots[tmp_slot].onnxModelFile != None and self.settings.modelSlots[tmp_slot].onnxModelFile != "": and self.settings.modelSlots[tmp_slot].pyTorchModelFile != ""
self._setInfoByONNX(tmp_slot, self.settings.modelSlots[tmp_slot].onnxModelFile) ):
self._setInfoByPytorch(
tmp_slot, self.settings.modelSlots[tmp_slot].pyTorchModelFile
)
if (
self.settings.modelSlots[tmp_slot].onnxModelFile != None
and self.settings.modelSlots[tmp_slot].onnxModelFile != ""
):
self._setInfoByONNX(
tmp_slot, self.settings.modelSlots[tmp_slot].onnxModelFile
)
try: try:
hubert_path = self.params["hubert_base"] hubert_path = self.params.hubert_base
models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task([hubert_path], suffix="",) models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
[hubert_path],
suffix="",
)
model = models[0] model = models[0]
model.eval() model.eval()
if self.is_half: if self.is_half:
@ -164,13 +191,21 @@ class RVC:
if config_len == 18: if config_len == 18:
self.settings.modelSlots[slot].modelType = RVC_MODEL_TYPE_RVC self.settings.modelSlots[slot].modelType = RVC_MODEL_TYPE_RVC
self.settings.modelSlots[slot].embChannels = 256 self.settings.modelSlots[slot].embChannels = 256
self.settings.modelSlots[slot].embedder = "hubert_base"
else: else:
self.settings.modelSlots[slot].modelType = RVC_MODEL_TYPE_WEBUI self.settings.modelSlots[slot].modelType = RVC_MODEL_TYPE_WEBUI
self.settings.modelSlots[slot].embChannels = cpt["config"][17] self.settings.modelSlots[slot].embChannels = cpt["config"][17]
self.settings.modelSlots[slot].embedder = cpt["embedder_name"]
if self.settings.modelSlots[slot].embedder.endswith("768"):
self.settings.modelSlots[slot].embedder = self.settings.modelSlots[
slot
].embedder[:-3]
print("embedder....", self.settings.modelSlots[slot].embedder)
self.settings.modelSlots[slot].f0 = True if cpt["f0"] == 1 else False self.settings.modelSlots[slot].f0 = True if cpt["f0"] == 1 else False
self.settings.modelSlots[slot].samplingRate = cpt["config"][-1] self.settings.modelSlots[slot].samplingRate = cpt["config"][-1]
self.settings.modelSamplingRate = cpt["config"][-1] # self.settings.modelSamplingRate = cpt["config"][-1]
def _setInfoByONNX(self, slot, file): def _setInfoByONNX(self, slot, file):
tmp_onnx_session = ModelWrapper(file) tmp_onnx_session = ModelWrapper(file)
@ -179,6 +214,8 @@ class RVC:
self.settings.modelSlots[slot].f0 = tmp_onnx_session.getF0() self.settings.modelSlots[slot].f0 = tmp_onnx_session.getF0()
self.settings.modelSlots[slot].samplingRate = tmp_onnx_session.getSamplingRate() self.settings.modelSlots[slot].samplingRate = tmp_onnx_session.getSamplingRate()
self.settings.modelSlots[slot].deprecated = tmp_onnx_session.getDeprecated() self.settings.modelSlots[slot].deprecated = tmp_onnx_session.getDeprecated()
self.settings.modelSlots[slot].embedder = tmp_onnx_session.getEmbedder()
print("embedder....", self.settings.modelSlots[slot].embedder)
def prepareModel(self, slot: int): def prepareModel(self, slot: int):
print("[Voice Changer] Prepare Model of slot:", slot) print("[Voice Changer] Prepare Model of slot:", slot)
@ -188,7 +225,7 @@ class RVC:
if pyTorchModelFile != None and pyTorchModelFile != "": if pyTorchModelFile != None and pyTorchModelFile != "":
print("[Voice Changer] Loading Pytorch Model...") print("[Voice Changer] Loading Pytorch Model...")
cpt = torch.load(pyTorchModelFile, map_location="cpu") cpt = torch.load(pyTorchModelFile, map_location="cpu")
''' """
(1) オリジナルとrvc-webuiのモデル判定 config全体の形状 (1) オリジナルとrvc-webuiのモデル判定 config全体の形状
ーマル256 ーマル256
[1025, 32, 192, 192, 768, 2, 6, 3, 0, '1', [3, 7, 11], [[1, 3, 5], [1, 3, 5], [1, 3, 5]], [10, 6, 2, 2, 2], 512, [16, 16, 4, 4, 4], 109, 256, 48000] [1025, 32, 192, 192, 768, 2, 6, 3, 0, '1', [3, 7, 11], [[1, 3, 5], [1, 3, 5], [1, 3, 5]], [10, 6, 2, 2, 2], 512, [16, 16, 4, 4, 4], 109, 256, 48000]
@ -200,32 +237,32 @@ class RVC:
0: ピッチレス, 1:ノーマル 0: ピッチレス, 1:ノーマル
(2-2) rvc-webuiの(256 or 768) x (ーマルor pitchレス)判定 256, or 768 は17番目の要素で判定, ーマルor pitchレスはckp["f0"]で判定 (2-2) rvc-webuiの(256 or 768) x (ーマルor pitchレス)判定 256, or 768 は17番目の要素で判定, ーマルor pitchレスはckp["f0"]で判定
''' """
# config_len = len(cpt["config"])
# if config_len == 18:
# self.settings.modelSlots[slot].modelType = RVC_MODEL_TYPE_RVC
# self.settings.modelSlots[slot].embChannels = 256
# else:
# self.settings.modelSlots[slot].modelType = RVC_MODEL_TYPE_WEBUI
# self.settings.modelSlots[slot].embChannels = cpt["config"][17]
# self.settings.modelSlots[slot].f0 = True if cpt["f0"] == 1 else False
# self.settings.modelSlots[slot].samplingRate = cpt["config"][-1]
# self.settings.modelSamplingRate = cpt["config"][-1] if (
self.settings.modelSlots[slot].modelType == RVC_MODEL_TYPE_RVC
if self.settings.modelSlots[slot].modelType == RVC_MODEL_TYPE_RVC and self.settings.modelSlots[slot].f0 == True: and self.settings.modelSlots[slot].f0 == True
):
net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=self.is_half) net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=self.is_half)
elif self.settings.modelSlots[slot].modelType == RVC_MODEL_TYPE_RVC and self.settings.modelSlots[slot].f0 == False: elif (
self.settings.modelSlots[slot].modelType == RVC_MODEL_TYPE_RVC
and self.settings.modelSlots[slot].f0 == False
):
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
elif self.settings.modelSlots[slot].modelType == RVC_MODEL_TYPE_WEBUI and self.settings.modelSlots[slot].f0 == True: elif (
net_g = SynthesizerTrnMsNSFsid_webui(**cpt["params"], is_half=self.is_half) self.settings.modelSlots[slot].modelType == RVC_MODEL_TYPE_WEBUI
elif self.settings.modelSlots[slot].modelType == RVC_MODEL_TYPE_WEBUI and self.settings.modelSlots[slot].f0 == False: and self.settings.modelSlots[slot].f0 == True
###################### ):
# TBD net_g = SynthesizerTrnMsNSFsid_webui(
###################### **cpt["params"], is_half=self.is_half
print("webui non-f0 is not supported yet") )
net_g = SynthesizerTrnMsNSFsidNono_webui(**cpt["params"], is_half=self.is_half) elif (
self.settings.modelSlots[slot].modelType == RVC_MODEL_TYPE_WEBUI
and self.settings.modelSlots[slot].f0 == False
):
net_g = SynthesizerTrnMsNSFsidNono_webui(
**cpt["params"], is_half=self.is_half
)
else: else:
print("unknwon") print("unknwon")
@ -259,11 +296,15 @@ class RVC:
self.next_trans = self.settings.modelSlots[slot].defaultTrans self.next_trans = self.settings.modelSlots[slot].defaultTrans
self.next_samplingRate = self.settings.modelSlots[slot].samplingRate self.next_samplingRate = self.settings.modelSlots[slot].samplingRate
self.next_framework = "ONNX" if self.next_onnx_session != None else "PyTorch" self.next_framework = "ONNX" if self.next_onnx_session != None else "PyTorch"
print("[Voice Changer] Prepare done.",) print(
"[Voice Changer] Prepare done.",
)
return self.get_info() return self.get_info()
def switchModel(self): def switchModel(self):
print("[Voice Changer] Switching model..",) print(
"[Voice Changer] Switching model..",
)
# del self.net_g # del self.net_g
# del self.onnx_session # del self.onnx_session
self.net_g = self.next_net_g self.net_g = self.next_net_g
@ -275,17 +316,23 @@ class RVC:
self.settings.modelSamplingRate = self.next_samplingRate self.settings.modelSamplingRate = self.next_samplingRate
self.next_net_g = None self.next_net_g = None
self.next_onnx_session = None self.next_onnx_session = None
print("[Voice Changer] Switching model..done",) print(
"[Voice Changer] Switching model..done",
)
def update_settings(self, key: str, val: any): def update_settings(self, key: str, val: any):
if key == "onnxExecutionProvider" and self.onnx_session != None: if key == "onnxExecutionProvider" and self.onnx_session != None:
if val == "CUDAExecutionProvider": if val == "CUDAExecutionProvider":
if self.settings.gpu < 0 or self.settings.gpu >= self.gpu_num: if self.settings.gpu < 0 or self.settings.gpu >= self.gpu_num:
self.settings.gpu = 0 self.settings.gpu = 0
provider_options = [{'device_id': self.settings.gpu}] provider_options = [{"device_id": self.settings.gpu}]
self.onnx_session.set_providers(providers=[val], provider_options=provider_options) self.onnx_session.set_providers(
providers=[val], provider_options=provider_options
)
if hasattr(self, "hubert_onnx"): if hasattr(self, "hubert_onnx"):
self.hubert_onnx.set_providers(providers=[val], provider_options=provider_options) self.hubert_onnx.set_providers(
providers=[val], provider_options=provider_options
)
else: else:
self.onnx_session.set_providers(providers=[val]) self.onnx_session.set_providers(providers=[val])
if hasattr(self, "hubert_onnx"): if hasattr(self, "hubert_onnx"):
@ -294,12 +341,20 @@ class RVC:
print("Onnx is not enabled. Please load model.") print("Onnx is not enabled. Please load model.")
return False return False
elif key in self.settings.intData: elif key in self.settings.intData:
if key == "gpu" and val >= 0 and val < self.gpu_num and self.onnx_session != None: if (
key == "gpu"
and val >= 0
and val < self.gpu_num
and self.onnx_session != None
):
providers = self.onnx_session.get_providers() providers = self.onnx_session.get_providers()
print("Providers:", providers) print("Providers:", providers)
if "CUDAExecutionProvider" in providers: if "CUDAExecutionProvider" in providers:
provider_options = [{'device_id': self.settings.gpu}] provider_options = [{"device_id": self.settings.gpu}]
self.onnx_session.set_providers(providers=["CUDAExecutionProvider"], provider_options=provider_options) self.onnx_session.set_providers(
providers=["CUDAExecutionProvider"],
provider_options=provider_options,
)
if key == "modelSlotIndex": if key == "modelSlotIndex":
# self.switchModel(int(val)) # self.switchModel(int(val))
val = int(val) % 1000 # Quick hack for same slot is selected val = int(val) % 1000 # Quick hack for same slot is selected
@ -318,7 +373,9 @@ class RVC:
def get_info(self): def get_info(self):
data = asdict(self.settings) data = asdict(self.settings)
data["onnxExecutionProviders"] = self.onnx_session.get_providers() if self.onnx_session != None else [] data["onnxExecutionProviders"] = (
self.onnx_session.get_providers() if self.onnx_session != None else []
)
files = ["configFile", "pyTorchModelFile", "onnxModelFile"] files = ["configFile", "pyTorchModelFile", "onnxModelFile"]
for f in files: for f in files:
if data[f] != None and os.path.exists(data[f]): if data[f] != None and os.path.exists(data[f]):
@ -331,22 +388,30 @@ class RVC:
def get_processing_sampling_rate(self): def get_processing_sampling_rate(self):
return self.settings.modelSamplingRate return self.settings.modelSamplingRate
def generate_input(self, newData: any, inputSize: int, crossfadeSize: int, solaSearchFrame: int = 0): def generate_input(
self, newData: any, inputSize: int, crossfadeSize: int, solaSearchFrame: int = 0
):
newData = newData.astype(np.float32) / 32768.0 newData = newData.astype(np.float32) / 32768.0
if hasattr(self, "audio_buffer"): if hasattr(self, "audio_buffer"):
self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0) # 過去のデータに連結 self.audio_buffer = np.concatenate(
[self.audio_buffer, newData], 0
) # 過去のデータに連結
else: else:
self.audio_buffer = newData self.audio_buffer = newData
convertSize = inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize convertSize = (
inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
)
if convertSize % 128 != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。 if convertSize % 128 != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
convertSize = convertSize + (128 - (convertSize % 128)) convertSize = convertSize + (128 - (convertSize % 128))
self.audio_buffer = self.audio_buffer[-1 * convertSize :] # 変換対象の部分だけ抽出 self.audio_buffer = self.audio_buffer[-1 * convertSize :] # 変換対象の部分だけ抽出
crop = self.audio_buffer[-1 * (inputSize + crossfadeSize):-1 * (crossfadeSize)] # 出力部分だけ切り出して音量を確認。(solaとの関係性について、現状は無考慮) crop = self.audio_buffer[
-1 * (inputSize + crossfadeSize) : -1 * (crossfadeSize)
] # 出力部分だけ切り出して音量を確認。(solaとの関係性について、現状は無考慮)
rms = np.sqrt(np.square(crop).mean(axis=0)) rms = np.sqrt(np.square(crop).mean(axis=0))
vol = max(rms, self.prevVol * 0.0) vol = max(rms, self.prevVol * 0.0)
self.prevVol = vol self.prevVol = vol
@ -390,15 +455,34 @@ class RVC:
f0 = self.settings.modelSlots[self.currentSlot].f0 f0 = self.settings.modelSlots[self.currentSlot].f0
embChannels = self.settings.modelSlots[self.currentSlot].embChannels embChannels = self.settings.modelSlots[self.currentSlot].embChannels
audio_out = vc.pipeline(self.hubert_model, self.onnx_session, sid, audio, times, f0_up_key, f0_method, audio_out = vc.pipeline(
file_index, file_big_npy, index_rate, if_f0, f0_file=f0_file, silence_front=self.settings.extraConvertSize / self.settings.modelSamplingRate, embChannels=embChannels) self.hubert_model,
self.onnx_session,
sid,
audio,
times,
f0_up_key,
f0_method,
file_index,
file_big_npy,
index_rate,
if_f0,
f0_file=f0_file,
silence_front=self.settings.extraConvertSize
/ self.settings.modelSamplingRate,
embChannels=embChannels,
)
result = audio_out * np.sqrt(vol) result = audio_out * np.sqrt(vol)
return result return result
def _pyTorch_inference(self, data): def _pyTorch_inference(self, data):
if hasattr(self, "net_g") == False or self.net_g == None: if hasattr(self, "net_g") == False or self.net_g == None:
print("[Voice Changer] No pyTorch session.", hasattr(self, "net_g"), self.net_g) print(
"[Voice Changer] No pyTorch session.",
hasattr(self, "net_g"),
self.net_g,
)
raise NoModeLoadedException("pytorch") raise NoModeLoadedException("pytorch")
if self.settings.gpu < 0 or (self.gpu_num == 0 and self.mps_enabled == False): if self.settings.gpu < 0 or (self.gpu_num == 0 and self.mps_enabled == False):
@ -436,8 +520,23 @@ class RVC:
f0_file = None f0_file = None
embChannels = self.settings.modelSlots[self.currentSlot].embChannels embChannels = self.settings.modelSlots[self.currentSlot].embChannels
audio_out = vc.pipeline(self.hubert_model, self.net_g, sid, audio, times, f0_up_key, f0_method, audio_out = vc.pipeline(
file_index, file_big_npy, index_rate, if_f0, f0_file=f0_file, silence_front=self.settings.extraConvertSize / self.settings.modelSamplingRate, embChannels=embChannels) self.hubert_model,
self.net_g,
sid,
audio,
times,
f0_up_key,
f0_method,
file_index,
file_big_npy,
index_rate,
if_f0,
f0_file=f0_file,
silence_front=self.settings.extraConvertSize
/ self.settings.modelSamplingRate,
embChannels=embChannels,
)
result = audio_out * np.sqrt(vol) result = audio_out * np.sqrt(vol)
@ -445,7 +544,11 @@ class RVC:
def inference(self, data): def inference(self, data):
if self.settings.modelSlotIndex < 0: if self.settings.modelSlotIndex < 0:
print("[Voice Changer] wait for loading model...", self.settings.modelSlotIndex, self.currentSlot) print(
"[Voice Changer] wait for loading model...",
self.settings.modelSlotIndex,
self.currentSlot,
)
raise NoModeLoadedException("model_common") raise NoModeLoadedException("model_common")
if self.currentSlot != self.settings.modelSlotIndex: if self.currentSlot != self.settings.modelSlotIndex:
@ -482,7 +585,9 @@ class RVC:
print("[Voice Changer] export2onnx, No pyTorch session.") print("[Voice Changer] export2onnx, No pyTorch session.")
return {"status": "ng", "path": f""} return {"status": "ng", "path": f""}
pyTorchModelFile = self.settings.modelSlots[self.settings.modelSlotIndex].pyTorchModelFile # inference前にexportできるようにcurrentSlotではなくslot pyTorchModelFile = self.settings.modelSlots[
self.settings.modelSlotIndex
].pyTorchModelFile # inference前にexportできるようにcurrentSlotではなくslot
if pyTorchModelFile == None: if pyTorchModelFile == None:
print("[Voice Changer] export2onnx, No pyTorch filepath.") print("[Voice Changer] export2onnx, No pyTorch filepath.")
@ -490,23 +595,45 @@ class RVC:
import voice_changer.RVC.export2onnx as onnxExporter import voice_changer.RVC.export2onnx as onnxExporter
output_file = os.path.splitext(os.path.basename(pyTorchModelFile))[0] + ".onnx" output_file = os.path.splitext(os.path.basename(pyTorchModelFile))[0] + ".onnx"
output_file_simple = os.path.splitext(os.path.basename(pyTorchModelFile))[0] + "_simple.onnx" output_file_simple = (
os.path.splitext(os.path.basename(pyTorchModelFile))[0] + "_simple.onnx"
)
output_path = os.path.join(TMP_DIR, output_file) output_path = os.path.join(TMP_DIR, output_file)
output_path_simple = os.path.join(TMP_DIR, output_file_simple) output_path_simple = os.path.join(TMP_DIR, output_file_simple)
print("embChannels", self.settings.modelSlots[self.settings.modelSlotIndex].embChannels) print(
"embChannels",
self.settings.modelSlots[self.settings.modelSlotIndex].embChannels,
)
metadata = { metadata = {
"application": "VC_CLIENT", "application": "VC_CLIENT",
"version": "1", "version": "1",
"modelType": self.settings.modelSlots[self.settings.modelSlotIndex].modelType, "modelType": self.settings.modelSlots[
"samplingRate": self.settings.modelSlots[self.settings.modelSlotIndex].samplingRate, self.settings.modelSlotIndex
].modelType,
"samplingRate": self.settings.modelSlots[
self.settings.modelSlotIndex
].samplingRate,
"f0": self.settings.modelSlots[self.settings.modelSlotIndex].f0, "f0": self.settings.modelSlots[self.settings.modelSlotIndex].f0,
"embChannels": self.settings.modelSlots[self.settings.modelSlotIndex].embChannels, "embChannels": self.settings.modelSlots[
self.settings.modelSlotIndex
].embChannels,
"embedder": self.settings.modelSlots[self.settings.modelSlotIndex].embedder,
} }
if torch.cuda.device_count() > 0: if torch.cuda.device_count() > 0:
onnxExporter.export2onnx(pyTorchModelFile, output_path, output_path_simple, True, metadata) onnxExporter.export2onnx(
pyTorchModelFile, output_path, output_path_simple, True, metadata
)
else: else:
print("[Voice Changer] Warning!!! onnx export with float32. maybe size is doubled.") print(
onnxExporter.export2onnx(pyTorchModelFile, output_path, output_path_simple, False, metadata) "[Voice Changer] Warning!!! onnx export with float32. maybe size is doubled."
)
onnxExporter.export2onnx(
pyTorchModelFile, output_path, output_path_simple, False, metadata
)
return {"status": "ok", "path": f"/tmp/{output_file_simple}", "filename": output_file_simple} return {
"status": "ok",
"path": f"/tmp/{output_file_simple}",
"filename": output_file_simple,
}

View File

@ -9,14 +9,18 @@ import resampy
from voice_changer.IORecorder import IORecorder from voice_changer.IORecorder import IORecorder
# from voice_changer.IOAnalyzer import IOAnalyzer
from voice_changer.utils.Timer import Timer from voice_changer.utils.Timer import Timer
from voice_changer.utils.VoiceChangerModel import VoiceChangerModel, AudioInOut from voice_changer.utils.VoiceChangerModel import VoiceChangerModel, AudioInOut
import time
from Exceptions import NoModeLoadedException, ONNXInputArgumentException from Exceptions import NoModeLoadedException, ONNXInputArgumentException
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"] providers = [
"OpenVINOExecutionProvider",
"CUDAExecutionProvider",
"DmlExecutionProvider",
"CPUExecutionProvider",
]
STREAM_INPUT_FILE = os.path.join(TMP_DIR, "in.wav") STREAM_INPUT_FILE = os.path.join(TMP_DIR, "in.wav")
STREAM_OUTPUT_FILE = os.path.join(TMP_DIR, "out.wav") STREAM_OUTPUT_FILE = os.path.join(TMP_DIR, "out.wav")
@ -25,7 +29,7 @@ STREAM_ANALYZE_FILE_HARVEST = os.path.join(TMP_DIR, "analyze-harvest.png")
@dataclass @dataclass
class VoiceChangerSettings(): class VoiceChangerSettings:
inputSampleRate: int = 48000 # 48000 or 24000 inputSampleRate: int = 48000 # 48000 or 24000
crossFadeOffsetRate: float = 0.1 crossFadeOffsetRate: float = 0.1
@ -41,16 +45,14 @@ class VoiceChangerSettings():
floatData: list[str] = field( floatData: list[str] = field(
default_factory=lambda: ["crossFadeOffsetRate", "crossFadeEndRate"] default_factory=lambda: ["crossFadeOffsetRate", "crossFadeEndRate"]
) )
strData: list[str] = field( strData: list[str] = field(default_factory=lambda: [])
default_factory=lambda: []
)
class VoiceChanger(): class VoiceChanger:
settings: VoiceChangerSettings settings: VoiceChangerSettings
voiceChanger: VoiceChangerModel voiceChanger: VoiceChangerModel
def __init__(self, params): def __init__(self, params: VoiceChangerParams):
# 初期化 # 初期化
self.settings = VoiceChangerSettings() self.settings = VoiceChangerSettings()
self.onnx_session = None self.onnx_session = None
@ -64,9 +66,14 @@ class VoiceChanger():
self.params = params self.params = params
self.gpu_num = torch.cuda.device_count() self.gpu_num = torch.cuda.device_count()
self.prev_audio = np.zeros(4096) self.prev_audio = np.zeros(4096)
self.mps_enabled: bool = getattr(torch.backends, "mps", None) is not None and torch.backends.mps.is_available() self.mps_enabled: bool = (
getattr(torch.backends, "mps", None) is not None
and torch.backends.mps.is_available()
)
print(f"VoiceChanger Initialized (GPU_NUM:{self.gpu_num}, mps_enabled:{self.mps_enabled})") print(
f"VoiceChanger Initialized (GPU_NUM:{self.gpu_num}, mps_enabled:{self.mps_enabled})"
)
def switchModelType(self, modelType: ModelType): def switchModelType(self, modelType: ModelType):
if hasattr(self, "voiceChanger") and self.voiceChanger != None: if hasattr(self, "voiceChanger") and self.voiceChanger != None:
@ -77,24 +84,31 @@ class VoiceChanger():
self.modelType = modelType self.modelType = modelType
if self.modelType == "MMVCv15": if self.modelType == "MMVCv15":
from voice_changer.MMVCv15.MMVCv15 import MMVCv15 from voice_changer.MMVCv15.MMVCv15 import MMVCv15
self.voiceChanger = MMVCv15() # type: ignore self.voiceChanger = MMVCv15() # type: ignore
elif self.modelType == "MMVCv13": elif self.modelType == "MMVCv13":
from voice_changer.MMVCv13.MMVCv13 import MMVCv13 from voice_changer.MMVCv13.MMVCv13 import MMVCv13
self.voiceChanger = MMVCv13() self.voiceChanger = MMVCv13()
elif self.modelType == "so-vits-svc-40v2": elif self.modelType == "so-vits-svc-40v2":
from voice_changer.SoVitsSvc40v2.SoVitsSvc40v2 import SoVitsSvc40v2 from voice_changer.SoVitsSvc40v2.SoVitsSvc40v2 import SoVitsSvc40v2
self.voiceChanger = SoVitsSvc40v2(self.params) self.voiceChanger = SoVitsSvc40v2(self.params)
elif self.modelType == "so-vits-svc-40" or self.modelType == "so-vits-svc-40_c": elif self.modelType == "so-vits-svc-40" or self.modelType == "so-vits-svc-40_c":
from voice_changer.SoVitsSvc40.SoVitsSvc40 import SoVitsSvc40 from voice_changer.SoVitsSvc40.SoVitsSvc40 import SoVitsSvc40
self.voiceChanger = SoVitsSvc40(self.params) self.voiceChanger = SoVitsSvc40(self.params)
elif self.modelType == "DDSP-SVC": elif self.modelType == "DDSP-SVC":
from voice_changer.DDSP_SVC.DDSP_SVC import DDSP_SVC from voice_changer.DDSP_SVC.DDSP_SVC import DDSP_SVC
self.voiceChanger = DDSP_SVC(self.params) self.voiceChanger = DDSP_SVC(self.params)
elif self.modelType == "RVC": elif self.modelType == "RVC":
from voice_changer.RVC.RVC import RVC from voice_changer.RVC.RVC import RVC
self.voiceChanger = RVC(self.params) self.voiceChanger = RVC(self.params)
else: else:
from voice_changer.MMVCv13.MMVCv13 import MMVCv13 from voice_changer.MMVCv13.MMVCv13 import MMVCv13
self.voiceChanger = MMVCv13() self.voiceChanger = MMVCv13()
return {"status": "OK", "msg": "vc is switched."} return {"status": "OK", "msg": "vc is switched."}
@ -109,7 +123,6 @@ class VoiceChanger():
self, self,
props, props,
): ):
try: try:
return self.voiceChanger.loadModel(props) return self.voiceChanger.loadModel(props)
except Exception as e: except Exception as e:
@ -143,7 +156,9 @@ class VoiceChanger():
if key == "recordIO" and val == 1: if key == "recordIO" and val == 1:
if hasattr(self, "ioRecorder"): if hasattr(self, "ioRecorder"):
self.ioRecorder.close() self.ioRecorder.close()
self.ioRecorder = IORecorder(STREAM_INPUT_FILE, STREAM_OUTPUT_FILE, self.settings.inputSampleRate) self.ioRecorder = IORecorder(
STREAM_INPUT_FILE, STREAM_OUTPUT_FILE, self.settings.inputSampleRate
)
if key == "recordIO" and val == 0: if key == "recordIO" and val == 0:
if hasattr(self, "ioRecorder"): if hasattr(self, "ioRecorder"):
self.ioRecorder.close() self.ioRecorder.close()
@ -174,12 +189,12 @@ class VoiceChanger():
return self.get_info() return self.get_info()
def _generate_strength(self, crossfadeSize: int): def _generate_strength(self, crossfadeSize: int):
if (
if self.crossfadeSize != crossfadeSize or \ self.crossfadeSize != crossfadeSize
self.currentCrossFadeOffsetRate != self.settings.crossFadeOffsetRate or \ or self.currentCrossFadeOffsetRate != self.settings.crossFadeOffsetRate
self.currentCrossFadeEndRate != self.settings.crossFadeEndRate or \ or self.currentCrossFadeEndRate != self.settings.crossFadeEndRate
self.currentCrossFadeOverlapSize != self.settings.crossFadeOverlapSize: or self.currentCrossFadeOverlapSize != self.settings.crossFadeOverlapSize
):
self.crossfadeSize = crossfadeSize self.crossfadeSize = crossfadeSize
self.currentCrossFadeOffsetRate = self.settings.crossFadeOffsetRate self.currentCrossFadeOffsetRate = self.settings.crossFadeOffsetRate
self.currentCrossFadeEndRate = self.settings.crossFadeEndRate self.currentCrossFadeEndRate = self.settings.crossFadeEndRate
@ -193,30 +208,54 @@ class VoiceChanger():
np_prev_strength = np.cos(percent * 0.5 * np.pi) ** 2 np_prev_strength = np.cos(percent * 0.5 * np.pi) ** 2
np_cur_strength = np.cos((1 - percent) * 0.5 * np.pi) ** 2 np_cur_strength = np.cos((1 - percent) * 0.5 * np.pi) ** 2
self.np_prev_strength = np.concatenate([np.ones(cf_offset), np_prev_strength, self.np_prev_strength = np.concatenate(
np.zeros(crossfadeSize - cf_offset - len(np_prev_strength))]) [
self.np_cur_strength = np.concatenate([np.zeros(cf_offset), np_cur_strength, np.ones(crossfadeSize - cf_offset - len(np_cur_strength))]) np.ones(cf_offset),
np_prev_strength,
np.zeros(crossfadeSize - cf_offset - len(np_prev_strength)),
]
)
self.np_cur_strength = np.concatenate(
[
np.zeros(cf_offset),
np_cur_strength,
np.ones(crossfadeSize - cf_offset - len(np_cur_strength)),
]
)
print(f"Generated Strengths: for prev:{self.np_prev_strength.shape}, for cur:{self.np_cur_strength.shape}") print(
f"Generated Strengths: for prev:{self.np_prev_strength.shape}, for cur:{self.np_cur_strength.shape}"
)
# ひとつ前の結果とサイズが変わるため、記録は消去する。 # ひとつ前の結果とサイズが変わるため、記録は消去する。
if hasattr(self, 'np_prev_audio1') == True: if hasattr(self, "np_prev_audio1") == True:
delattr(self, "np_prev_audio1") delattr(self, "np_prev_audio1")
if hasattr(self, "sola_buffer"): if hasattr(self, "sola_buffer"):
del self.sola_buffer del self.sola_buffer
# receivedData: tuple of short # receivedData: tuple of short
def on_request(self, receivedData: AudioInOut) -> tuple[AudioInOut, list[Union[int, float]]]: def on_request(
self, receivedData: AudioInOut
) -> tuple[AudioInOut, list[Union[int, float]]]:
return self.on_request_sola(receivedData) return self.on_request_sola(receivedData)
def on_request_sola(self, receivedData: AudioInOut) -> tuple[AudioInOut, list[Union[int, float]]]: def on_request_sola(
self, receivedData: AudioInOut
) -> tuple[AudioInOut, list[Union[int, float]]]:
try: try:
processing_sampling_rate = self.voiceChanger.get_processing_sampling_rate() processing_sampling_rate = self.voiceChanger.get_processing_sampling_rate()
# 前処理 # 前処理
with Timer("pre-process") as t: with Timer("pre-process") as t:
if self.settings.inputSampleRate != processing_sampling_rate: if self.settings.inputSampleRate != processing_sampling_rate:
newData = cast(AudioInOut, resampy.resample(receivedData, self.settings.inputSampleRate, processing_sampling_rate)) newData = cast(
AudioInOut,
resampy.resample(
receivedData,
self.settings.inputSampleRate,
processing_sampling_rate,
),
)
else: else:
newData = receivedData newData = receivedData
@ -226,7 +265,9 @@ class VoiceChanger():
crossfade_frame = min(self.settings.crossFadeOverlapSize, block_frame) crossfade_frame = min(self.settings.crossFadeOverlapSize, block_frame)
self._generate_strength(crossfade_frame) self._generate_strength(crossfade_frame)
data = self.voiceChanger.generate_input(newData, block_frame, crossfade_frame, sola_search_frame) data = self.voiceChanger.generate_input(
newData, block_frame, crossfade_frame, sola_search_frame
)
preprocess_time = t.secs preprocess_time = t.secs
# 変換処理 # 変換処理
@ -234,15 +275,28 @@ class VoiceChanger():
# Inference # Inference
audio = self.voiceChanger.inference(data) audio = self.voiceChanger.inference(data)
if hasattr(self, 'sola_buffer') == True: if hasattr(self, "sola_buffer") == True:
np.set_printoptions(threshold=10000) np.set_printoptions(threshold=10000)
audio = audio[-sola_search_frame - crossfade_frame - block_frame :] audio = audio[-sola_search_frame - crossfade_frame - block_frame :]
# SOLA algorithm from https://github.com/yxlllc/DDSP-SVC, https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI # SOLA algorithm from https://github.com/yxlllc/DDSP-SVC, https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI
cor_nom = np.convolve(audio[: crossfade_frame + sola_search_frame], np.flip(self.sola_buffer), 'valid') cor_nom = np.convolve(
cor_den = np.sqrt(np.convolve(audio[: crossfade_frame + sola_search_frame] ** 2, np.ones(crossfade_frame), 'valid') + 1e-3) audio[: crossfade_frame + sola_search_frame],
np.flip(self.sola_buffer),
"valid",
)
cor_den = np.sqrt(
np.convolve(
audio[: crossfade_frame + sola_search_frame] ** 2,
np.ones(crossfade_frame),
"valid",
)
+ 1e-3
)
sola_offset = np.argmax(cor_nom / cor_den) sola_offset = np.argmax(cor_nom / cor_den)
output_wav = audio[sola_offset: sola_offset + block_frame].astype(np.float64) output_wav = audio[sola_offset : sola_offset + block_frame].astype(
np.float64
)
output_wav[:crossfade_frame] *= self.np_cur_strength output_wav[:crossfade_frame] *= self.np_cur_strength
output_wav[:crossfade_frame] += self.sola_buffer[:] output_wav[:crossfade_frame] += self.sola_buffer[:]
@ -251,8 +305,16 @@ class VoiceChanger():
print("[Voice Changer] no sola buffer. (You can ignore this.)") print("[Voice Changer] no sola buffer. (You can ignore this.)")
result = np.zeros(4096).astype(np.int16) result = np.zeros(4096).astype(np.int16)
if hasattr(self, 'sola_buffer') == True and sola_offset < sola_search_frame: if (
sola_buf_org = audio[- sola_search_frame - crossfade_frame + sola_offset: -sola_search_frame + sola_offset] hasattr(self, "sola_buffer") == True
and sola_offset < sola_search_frame
):
sola_buf_org = audio[
-sola_search_frame
- crossfade_frame
+ sola_offset : -sola_search_frame
+ sola_offset
]
self.sola_buffer = sola_buf_org * self.np_prev_strength self.sola_buffer = sola_buf_org * self.np_prev_strength
else: else:
self.sola_buffer = audio[-crossfade_frame:] * self.np_prev_strength self.sola_buffer = audio[-crossfade_frame:] * self.np_prev_strength
@ -263,12 +325,20 @@ class VoiceChanger():
with Timer("post-process") as t: with Timer("post-process") as t:
result = result.astype(np.int16) result = result.astype(np.int16)
if self.settings.inputSampleRate != processing_sampling_rate: if self.settings.inputSampleRate != processing_sampling_rate:
outputData = cast(AudioInOut, resampy.resample(result, processing_sampling_rate, self.settings.inputSampleRate).astype(np.int16)) outputData = cast(
AudioInOut,
resampy.resample(
result,
processing_sampling_rate,
self.settings.inputSampleRate,
).astype(np.int16),
)
else: else:
outputData = result outputData = result
print_convert_processing( print_convert_processing(
f" Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz") f" Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz"
)
if self.settings.recordIO == 1: if self.settings.recordIO == 1:
self.ioRecorder.writeInput(receivedData) self.ioRecorder.writeInput(receivedData)
@ -281,7 +351,9 @@ class VoiceChanger():
# # f" Padded!, Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz") # # f" Padded!, Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz")
postprocess_time = t.secs postprocess_time = t.secs
print_convert_processing(f" [fin] Input/Output size:{receivedData.shape[0]},{outputData.shape[0]}") print_convert_processing(
f" [fin] Input/Output size:{receivedData.shape[0]},{outputData.shape[0]}"
)
perf = [preprocess_time, mainprocess_time, postprocess_time] perf = [preprocess_time, mainprocess_time, postprocess_time]
return outputData, perf return outputData, perf
@ -299,8 +371,9 @@ class VoiceChanger():
def export2onnx(self): def export2onnx(self):
return self.voiceChanger.export2onnx() return self.voiceChanger.export2onnx()
############## ##############
PRINT_CONVERT_PROCESSING: bool = False PRINT_CONVERT_PROCESSING: bool = False
# PRINT_CONVERT_PROCESSING = True # PRINT_CONVERT_PROCESSING = True
@ -318,5 +391,7 @@ def pad_array(arr: AudioInOut, target_length: int):
pad_width = target_length - current_length pad_width = target_length - current_length
pad_left = pad_width // 2 pad_left = pad_width // 2
pad_right = pad_width - pad_left pad_right = pad_width - pad_left
padded_arr = np.pad(arr, (pad_left, pad_right), 'constant', constant_values=(0, 0)) padded_arr = np.pad(
arr, (pad_left, pad_right), "constant", constant_values=(0, 0)
)
return padded_arr return padded_arr

View File

@ -1,12 +1,16 @@
import numpy as np import numpy as np
from voice_changer.VoiceChanger import VoiceChanger from voice_changer.VoiceChanger import VoiceChanger
from const import ModelType from const import ModelType
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
class VoiceChangerManager(): class VoiceChangerManager(object):
_instance = None
voiceChanger: VoiceChanger = None
@classmethod @classmethod
def get_instance(cls, params): def get_instance(cls, params: VoiceChangerParams):
if not hasattr(cls, "_instance"): if cls._instance is None:
cls._instance = cls() cls._instance = cls()
cls._instance.voiceChanger = VoiceChanger(params) cls._instance.voiceChanger = VoiceChanger(params)
return cls._instance return cls._instance
@ -20,7 +24,7 @@ class VoiceChangerManager():
return info return info
def get_info(self): def get_info(self):
if hasattr(self, 'voiceChanger'): if hasattr(self, "voiceChanger"):
info = self.voiceChanger.get_info() info = self.voiceChanger.get_info()
info["status"] = "OK" info["status"] = "OK"
return info return info
@ -28,7 +32,7 @@ class VoiceChangerManager():
return {"status": "ERROR", "msg": "no model loaded"} return {"status": "ERROR", "msg": "no model loaded"}
def update_settings(self, key: str, val: any): def update_settings(self, key: str, val: any):
if hasattr(self, 'voiceChanger'): if hasattr(self, "voiceChanger"):
info = self.voiceChanger.update_settings(key, val) info = self.voiceChanger.update_settings(key, val)
info["status"] = "OK" info["status"] = "OK"
return info return info
@ -36,7 +40,7 @@ class VoiceChangerManager():
return {"status": "ERROR", "msg": "no model loaded"} return {"status": "ERROR", "msg": "no model loaded"}
def changeVoice(self, receivedData: any): def changeVoice(self, receivedData: any):
if hasattr(self, 'voiceChanger') == True: if hasattr(self, "voiceChanger") is True:
return self.voiceChanger.on_request(receivedData) return self.voiceChanger.on_request(receivedData)
else: else:
print("Voice Change is not loaded. Did you load a correct model?") print("Voice Change is not loaded. Did you load a correct model?")

View File

@ -0,0 +1,11 @@
from dataclasses import dataclass
@dataclass
class VoiceChangerParams():
content_vec_500: str
content_vec_500_onnx: str
content_vec_500_onnx_on: bool
hubert_base: str
hubert_soft: str
nsf_hifigan: str