mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-01-23 13:35:12 +03:00
refactoring
This commit is contained in:
parent
a59631609c
commit
55118815b4
16
server/.vscode/settings.json
vendored
Normal file
16
server/.vscode/settings.json
vendored
Normal file
@ -0,0 +1,16 @@
|
||||
{
|
||||
"workbench.colorCustomizations": {
|
||||
"tab.activeBackground": "#65952acc"
|
||||
},
|
||||
"python.formatting.provider": "black",
|
||||
"python.linting.mypyEnabled": true,
|
||||
"[python]": {
|
||||
"editor.defaultFormatter": null, // Prettier を使わないようにする
|
||||
"editor.formatOnSave": true // ファイル保存時に自動フォーマット
|
||||
},
|
||||
"flake8.args": [
|
||||
"--ignore=E501"
|
||||
// "--max-line-length=150",
|
||||
// "--max-complexity=20"
|
||||
]
|
||||
}
|
@ -2,12 +2,12 @@ import sys
|
||||
|
||||
from distutils.util import strtobool
|
||||
from datetime import datetime
|
||||
from dataclasses import dataclass
|
||||
import misc.log_control
|
||||
import socket
|
||||
import platform
|
||||
import os
|
||||
import argparse
|
||||
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
||||
|
||||
import uvicorn
|
||||
from mods.ssl import create_self_signed_cert
|
||||
from voice_changer.VoiceChangerManager import VoiceChangerManager
|
||||
@ -21,30 +21,48 @@ import multiprocessing as mp
|
||||
def setupArgParser():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("-p", type=int, default=18888, help="port")
|
||||
parser.add_argument("--https", type=strtobool,
|
||||
default=False, help="use https")
|
||||
parser.add_argument("--httpsKey", type=str,
|
||||
default="ssl.key", help="path for the key of https")
|
||||
parser.add_argument("--httpsCert", type=str,
|
||||
default="ssl.cert", help="path for the cert of https")
|
||||
parser.add_argument("--httpsSelfSigned", type=strtobool,
|
||||
default=True, help="generate self-signed certificate")
|
||||
parser.add_argument("--https", type=strtobool, default=False, help="use https")
|
||||
parser.add_argument(
|
||||
"--httpsKey", type=str, default="ssl.key", help="path for the key of https"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--httpsCert", type=str, default="ssl.cert", help="path for the cert of https"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--httpsSelfSigned",
|
||||
type=strtobool,
|
||||
default=True,
|
||||
help="generate self-signed certificate",
|
||||
)
|
||||
|
||||
# parser.add_argument("--internal", type=strtobool, default=False, help="各種パスをmac appの中身に変換")
|
||||
|
||||
parser.add_argument("--content_vec_500", type=str, help="path to content_vec_500 model(pytorch)")
|
||||
parser.add_argument("--content_vec_500_onnx", type=str, help="path to content_vec_500 model(onnx)")
|
||||
parser.add_argument("--content_vec_500_onnx_on", type=strtobool, default=False, help="use or not onnx for content_vec_500")
|
||||
parser.add_argument("--hubert_base", type=str, help="path to hubert_base model(pytorch)")
|
||||
parser.add_argument("--hubert_soft", type=str, help="path to hubert_soft model(pytorch)")
|
||||
parser.add_argument("--nsf_hifigan", type=str, help="path to nsf_hifigan model(pytorch)")
|
||||
parser.add_argument(
|
||||
"--content_vec_500", type=str, help="path to content_vec_500 model(pytorch)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--content_vec_500_onnx", type=str, help="path to content_vec_500 model(onnx)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--content_vec_500_onnx_on",
|
||||
type=strtobool,
|
||||
default=False,
|
||||
help="use or not onnx for content_vec_500",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--hubert_base", type=str, help="path to hubert_base model(pytorch)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--hubert_soft", type=str, help="path to hubert_soft model(pytorch)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--nsf_hifigan", type=str, help="path to nsf_hifigan model(pytorch)"
|
||||
)
|
||||
|
||||
return parser
|
||||
|
||||
|
||||
def printMessage(message, level=0):
|
||||
pf = platform.system()
|
||||
if pf == 'Windows':
|
||||
if pf == "Windows":
|
||||
if level == 0:
|
||||
print(f"{message}")
|
||||
elif level == 1:
|
||||
@ -78,37 +96,38 @@ def localServer():
|
||||
host="0.0.0.0",
|
||||
port=int(PORT),
|
||||
reload=False if hasattr(sys, "_MEIPASS") else True,
|
||||
log_level="warning"
|
||||
log_level="warning",
|
||||
)
|
||||
|
||||
|
||||
if __name__ == 'MMVCServerSIO':
|
||||
voiceChangerManager = VoiceChangerManager.get_instance({
|
||||
"content_vec_500": args.content_vec_500,
|
||||
"content_vec_500_onnx": args.content_vec_500_onnx,
|
||||
"content_vec_500_onnx_on": args.content_vec_500_onnx_on,
|
||||
"hubert_base": args.hubert_base,
|
||||
"hubert_soft": args.hubert_soft,
|
||||
"nsf_hifigan": args.nsf_hifigan,
|
||||
})
|
||||
if __name__ == "MMVCServerSIO":
|
||||
voiceChangerParams = VoiceChangerParams(
|
||||
content_vec_500=args.content_vec_500,
|
||||
content_vec_500_onnx=args.content_vec_500_onnx,
|
||||
content_vec_500_onnx_on=args.content_vec_500_onnx_on,
|
||||
hubert_base=args.hubert_base,
|
||||
hubert_soft=args.hubert_soft,
|
||||
nsf_hifigan=args.nsf_hifigan,
|
||||
)
|
||||
voiceChangerManager = VoiceChangerManager.get_instance(voiceChangerParams)
|
||||
print("voiceChangerManager", voiceChangerManager)
|
||||
|
||||
app_fastapi = MMVC_Rest.get_instance(voiceChangerManager)
|
||||
app_socketio = MMVC_SocketIOApp.get_instance(app_fastapi, voiceChangerManager)
|
||||
|
||||
|
||||
if __name__ == '__mp_main__':
|
||||
printMessage(f"サーバプロセスを起動しています。", level=2)
|
||||
if __name__ == "__mp_main__":
|
||||
printMessage("サーバプロセスを起動しています。", level=2)
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
mp.freeze_support()
|
||||
|
||||
printMessage(f"Voice Changerを起動しています。", level=2)
|
||||
printMessage("Voice Changerを起動しています。", level=2)
|
||||
PORT = args.p
|
||||
|
||||
if os.getenv("EX_PORT"):
|
||||
EX_PORT = os.environ["EX_PORT"]
|
||||
printMessage(
|
||||
f"External_Port:{EX_PORT} Internal_Port:{PORT}", level=1)
|
||||
printMessage(f"External_Port:{EX_PORT} Internal_Port:{PORT}", level=1)
|
||||
else:
|
||||
printMessage(f"Internal_Port:{PORT}", level=1)
|
||||
|
||||
@ -123,38 +142,42 @@ if __name__ == '__main__':
|
||||
key_base_name = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
||||
keyname = f"{key_base_name}.key"
|
||||
certname = f"{key_base_name}.cert"
|
||||
create_self_signed_cert(certname, keyname, certargs={"Country": "JP",
|
||||
create_self_signed_cert(
|
||||
certname,
|
||||
keyname,
|
||||
certargs={
|
||||
"Country": "JP",
|
||||
"State": "Tokyo",
|
||||
"City": "Chuo-ku",
|
||||
"Organization": "F",
|
||||
"Org. Unit": "F"}, cert_dir=SSL_KEY_DIR)
|
||||
"Org. Unit": "F",
|
||||
},
|
||||
cert_dir=SSL_KEY_DIR,
|
||||
)
|
||||
key_path = os.path.join(SSL_KEY_DIR, keyname)
|
||||
cert_path = os.path.join(SSL_KEY_DIR, certname)
|
||||
printMessage(
|
||||
f"protocol: HTTPS(self-signed), key:{key_path}, cert:{cert_path}", level=1)
|
||||
f"protocol: HTTPS(self-signed), key:{key_path}, cert:{cert_path}", level=1
|
||||
)
|
||||
|
||||
elif args.https and args.httpsSelfSigned == 0:
|
||||
# HTTPS
|
||||
key_path = args.httpsKey
|
||||
cert_path = args.httpsCert
|
||||
printMessage(
|
||||
f"protocol: HTTPS, key:{key_path}, cert:{cert_path}", level=1)
|
||||
printMessage(f"protocol: HTTPS, key:{key_path}, cert:{cert_path}", level=1)
|
||||
else:
|
||||
# HTTP
|
||||
printMessage(f"protocol: HTTP", level=1)
|
||||
printMessage(f"-- ---- -- ", level=1)
|
||||
printMessage("protocol: HTTP", level=1)
|
||||
printMessage("-- ---- -- ", level=1)
|
||||
|
||||
# アドレス表示
|
||||
printMessage(
|
||||
f"ブラウザで次のURLを開いてください.", level=2)
|
||||
printMessage("ブラウザで次のURLを開いてください.", level=2)
|
||||
if args.https == 1:
|
||||
printMessage(
|
||||
f"https://<IP>:<PORT>/", level=1)
|
||||
printMessage("https://<IP>:<PORT>/", level=1)
|
||||
else:
|
||||
printMessage(
|
||||
f"http://<IP>:<PORT>/", level=1)
|
||||
printMessage("http://<IP>:<PORT>/", level=1)
|
||||
|
||||
printMessage(f"多くの場合は次のいずれかのURLにアクセスすると起動します。", level=2)
|
||||
printMessage("多くの場合は次のいずれかのURLにアクセスすると起動します。", level=2)
|
||||
if "EX_PORT" in locals() and "EX_IP" in locals(): # シェルスクリプト経由起動(docker)
|
||||
if args.https == 1:
|
||||
printMessage(f"https://localhost:{EX_PORT}/", level=1)
|
||||
@ -175,7 +198,7 @@ if __name__ == '__main__':
|
||||
# サーバ起動
|
||||
if args.https:
|
||||
# HTTPS サーバ起動
|
||||
res = uvicorn.run(
|
||||
uvicorn.run(
|
||||
f"{os.path.basename(__file__)[:-3]}:app_socketio",
|
||||
host="0.0.0.0",
|
||||
port=int(PORT),
|
||||
@ -188,13 +211,17 @@ if __name__ == '__main__':
|
||||
p = mp.Process(name="p", target=localServer)
|
||||
p.start()
|
||||
try:
|
||||
if sys.platform.startswith('win'):
|
||||
process = subprocess.Popen([NATIVE_CLIENT_FILE_WIN, "-u", f"http://localhost:{PORT}/"])
|
||||
if sys.platform.startswith("win"):
|
||||
process = subprocess.Popen(
|
||||
[NATIVE_CLIENT_FILE_WIN, "-u", f"http://localhost:{PORT}/"]
|
||||
)
|
||||
return_code = process.wait()
|
||||
print("client closed.")
|
||||
p.terminate()
|
||||
elif sys.platform.startswith('darwin'):
|
||||
process = subprocess.Popen([NATIVE_CLIENT_FILE_MAC, "-u", f"http://localhost:{PORT}/"])
|
||||
elif sys.platform.startswith("darwin"):
|
||||
process = subprocess.Popen(
|
||||
[NATIVE_CLIENT_FILE_MAC, "-u", f"http://localhost:{PORT}/"]
|
||||
)
|
||||
return_code = process.wait()
|
||||
print("client closed.")
|
||||
p.terminate()
|
||||
|
@ -26,14 +26,6 @@ TMP_DIR = os.path.join(tmpdir.name, "tmp_dir") if hasattr(sys, "_MEIPASS") else
|
||||
os.makedirs(TMP_DIR, exist_ok=True)
|
||||
|
||||
|
||||
# modelType: ModelType = "MMVCv15"
|
||||
# def getModelType() -> ModelType:
|
||||
# return modelType
|
||||
# def setModelType(_modelType: ModelType):
|
||||
# global modelType
|
||||
# modelType = _modelType
|
||||
|
||||
|
||||
def getFrontendPath():
|
||||
frontend_path = os.path.join(sys._MEIPASS, "dist") if hasattr(sys, "_MEIPASS") else "../client/demo/dist"
|
||||
return frontend_path
|
||||
|
@ -1,7 +1,8 @@
|
||||
from fastapi import FastAPI, Request, Response
|
||||
from fastapi import FastAPI, Request, Response, HTTPException
|
||||
from fastapi.routing import APIRoute
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.exceptions import RequestValidationError
|
||||
from typing import Callable
|
||||
from voice_changer.VoiceChangerManager import VoiceChangerManager
|
||||
|
||||
@ -18,7 +19,7 @@ class ValidationErrorLoggingRoute(APIRoute):
|
||||
async def custom_route_handler(request: Request) -> Response:
|
||||
try:
|
||||
return await original_route_handler(request)
|
||||
except Exception as exc:
|
||||
except RequestValidationError as exc:
|
||||
print("Exception", request.url, str(exc))
|
||||
body = await request.body()
|
||||
detail = {"errors": exc.errors(), "body": body.decode()}
|
||||
@ -28,10 +29,11 @@ class ValidationErrorLoggingRoute(APIRoute):
|
||||
|
||||
|
||||
class MMVC_Rest:
|
||||
_instance = None
|
||||
|
||||
@classmethod
|
||||
def get_instance(cls, voiceChangerManager: VoiceChangerManager):
|
||||
if not hasattr(cls, "_instance"):
|
||||
if cls._instance is None:
|
||||
app_fastapi = FastAPI()
|
||||
app_fastapi.router.route_class = ValidationErrorLoggingRoute
|
||||
app_fastapi.add_middleware(
|
||||
@ -43,15 +45,25 @@ class MMVC_Rest:
|
||||
)
|
||||
|
||||
app_fastapi.mount(
|
||||
"/front", StaticFiles(directory=f'{getFrontendPath()}', html=True), name="static")
|
||||
"/front",
|
||||
StaticFiles(directory=f"{getFrontendPath()}", html=True),
|
||||
name="static",
|
||||
)
|
||||
|
||||
app_fastapi.mount(
|
||||
"/trainer", StaticFiles(directory=f'{getFrontendPath()}', html=True), name="static")
|
||||
"/trainer",
|
||||
StaticFiles(directory=f"{getFrontendPath()}", html=True),
|
||||
name="static",
|
||||
)
|
||||
|
||||
app_fastapi.mount(
|
||||
"/recorder", StaticFiles(directory=f'{getFrontendPath()}', html=True), name="static")
|
||||
"/recorder",
|
||||
StaticFiles(directory=f"{getFrontendPath()}", html=True),
|
||||
name="static",
|
||||
)
|
||||
app_fastapi.mount(
|
||||
"/tmp", StaticFiles(directory=f'{TMP_DIR}'), name="static")
|
||||
"/tmp", StaticFiles(directory=f"{TMP_DIR}"), name="static"
|
||||
)
|
||||
|
||||
restHello = MMVC_Rest_Hello()
|
||||
app_fastapi.include_router(restHello.router)
|
||||
|
@ -4,12 +4,13 @@ from typing import Union
|
||||
from fastapi import APIRouter
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
from fastapi.responses import JSONResponse
|
||||
from fastapi import HTTPException, FastAPI, UploadFile, File, Form
|
||||
from fastapi import UploadFile, File, Form
|
||||
|
||||
from restapi.mods.FileUploader import upload_file, concat_file_chunks
|
||||
from voice_changer.VoiceChangerManager import VoiceChangerManager
|
||||
|
||||
from const import MODEL_DIR, UPLOAD_DIR, ModelType
|
||||
|
||||
os.makedirs(UPLOAD_DIR, exist_ok=True)
|
||||
os.makedirs(MODEL_DIR, exist_ok=True)
|
||||
|
||||
@ -19,12 +20,22 @@ class MMVC_Rest_Fileuploader:
|
||||
self.voiceChangerManager = voiceChangerManager
|
||||
self.router = APIRouter()
|
||||
self.router.add_api_route("/info", self.get_info, methods=["GET"])
|
||||
self.router.add_api_route("/upload_file", self.post_upload_file, methods=["POST"])
|
||||
self.router.add_api_route("/concat_uploaded_file", self.post_concat_uploaded_file, methods=["POST"])
|
||||
self.router.add_api_route("/update_settings", self.post_update_settings, methods=["POST"])
|
||||
self.router.add_api_route(
|
||||
"/upload_file", self.post_upload_file, methods=["POST"]
|
||||
)
|
||||
self.router.add_api_route(
|
||||
"/concat_uploaded_file", self.post_concat_uploaded_file, methods=["POST"]
|
||||
)
|
||||
self.router.add_api_route(
|
||||
"/update_settings", self.post_update_settings, methods=["POST"]
|
||||
)
|
||||
self.router.add_api_route("/load_model", self.post_load_model, methods=["POST"])
|
||||
self.router.add_api_route("/load_model_for_train", self.post_load_model_for_train, methods=["POST"])
|
||||
self.router.add_api_route("/extract_voices", self.post_extract_voices, methods=["POST"])
|
||||
self.router.add_api_route(
|
||||
"/load_model_for_train", self.post_load_model_for_train, methods=["POST"]
|
||||
)
|
||||
self.router.add_api_route(
|
||||
"/extract_voices", self.post_extract_voices, methods=["POST"]
|
||||
)
|
||||
self.router.add_api_route("/model_type", self.post_model_type, methods=["POST"])
|
||||
self.router.add_api_route("/model_type", self.get_model_type, methods=["GET"])
|
||||
self.router.add_api_route("/onnx", self.get_onnx, methods=["GET"])
|
||||
@ -34,9 +45,13 @@ class MMVC_Rest_Fileuploader:
|
||||
json_compatible_item_data = jsonable_encoder(res)
|
||||
return JSONResponse(content=json_compatible_item_data)
|
||||
|
||||
def post_concat_uploaded_file(self, filename: str = Form(...), filenameChunkNum: int = Form(...)):
|
||||
def post_concat_uploaded_file(
|
||||
self, filename: str = Form(...), filenameChunkNum: int = Form(...)
|
||||
):
|
||||
slot = 0
|
||||
res = concat_file_chunks(slot, UPLOAD_DIR, filename, filenameChunkNum, UPLOAD_DIR)
|
||||
res = concat_file_chunks(
|
||||
slot, UPLOAD_DIR, filename, filenameChunkNum, UPLOAD_DIR
|
||||
)
|
||||
json_compatible_item_data = jsonable_encoder(res)
|
||||
return JSONResponse(content=json_compatible_item_data)
|
||||
|
||||
@ -45,7 +60,9 @@ class MMVC_Rest_Fileuploader:
|
||||
json_compatible_item_data = jsonable_encoder(info)
|
||||
return JSONResponse(content=json_compatible_item_data)
|
||||
|
||||
def post_update_settings(self, key: str = Form(...), val: Union[int, str, float] = Form(...)):
|
||||
def post_update_settings(
|
||||
self, key: str = Form(...), val: Union[int, str, float] = Form(...)
|
||||
):
|
||||
print("post_update_settings", key, val)
|
||||
info = self.voiceChangerManager.update_settings(key, val)
|
||||
json_compatible_item_data = jsonable_encoder(info)
|
||||
@ -63,7 +80,6 @@ class MMVC_Rest_Fileuploader:
|
||||
isHalf: bool = Form(...),
|
||||
params: str = Form(...),
|
||||
):
|
||||
|
||||
props = {
|
||||
"slot": slot,
|
||||
"isHalf": isHalf,
|
||||
@ -73,9 +89,9 @@ class MMVC_Rest_Fileuploader:
|
||||
"onnxModelFilename": onnxModelFilename,
|
||||
"clusterTorchModelFilename": clusterTorchModelFilename,
|
||||
"featureFilename": featureFilename,
|
||||
"indexFilename": indexFilename
|
||||
"indexFilename": indexFilename,
|
||||
},
|
||||
"params": params
|
||||
"params": params,
|
||||
}
|
||||
# Change Filepath
|
||||
for key, val in props["files"].items():
|
||||
@ -103,9 +119,11 @@ class MMVC_Rest_Fileuploader:
|
||||
modelDFilenameChunkNum: int = Form(...),
|
||||
):
|
||||
modelGFilePath = concat_file_chunks(
|
||||
UPLOAD_DIR, modelGFilename, modelGFilenameChunkNum, MODEL_DIR)
|
||||
UPLOAD_DIR, modelGFilename, modelGFilenameChunkNum, MODEL_DIR
|
||||
)
|
||||
modelDFilePath = concat_file_chunks(
|
||||
UPLOAD_DIR, modelDFilename, modelDFilenameChunkNum, MODEL_DIR)
|
||||
UPLOAD_DIR, modelDFilename, modelDFilenameChunkNum, MODEL_DIR
|
||||
)
|
||||
return {"File saved": f"{modelGFilePath}, {modelDFilePath}"}
|
||||
|
||||
def post_extract_voices(
|
||||
@ -114,7 +132,8 @@ class MMVC_Rest_Fileuploader:
|
||||
zipFileChunkNum: int = Form(...),
|
||||
):
|
||||
zipFilePath = concat_file_chunks(
|
||||
UPLOAD_DIR, zipFilename, zipFileChunkNum, UPLOAD_DIR)
|
||||
UPLOAD_DIR, zipFilename, zipFileChunkNum, UPLOAD_DIR
|
||||
)
|
||||
shutil.unpack_archive(zipFilePath, "MMVC_Trainer/dataset/textful/")
|
||||
return {"Zip file unpacked": f"{zipFilePath}"}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
from fastapi import APIRouter
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
|
||||
class MMVC_Rest_Hello:
|
||||
def __init__(self):
|
||||
self.router = APIRouter()
|
||||
@ -8,6 +8,3 @@ class MMVC_Rest_Hello:
|
||||
|
||||
def hello(self):
|
||||
return {"result": "Index"}
|
||||
|
||||
|
||||
|
||||
|
@ -31,24 +31,24 @@ class MMVC_Rest_VoiceChanger:
|
||||
buffer = voice.buffer
|
||||
wav = base64.b64decode(buffer)
|
||||
|
||||
if wav == 0:
|
||||
samplerate, data = read("dummy.wav")
|
||||
unpackedData = data
|
||||
else:
|
||||
unpackedData = np.array(struct.unpack(
|
||||
'<%sh' % (len(wav) // struct.calcsize('<h')), wav))
|
||||
# write("logs/received_data.wav", 24000,
|
||||
# unpackedData.astype(np.int16))
|
||||
# if wav == 0:
|
||||
# samplerate, data = read("dummy.wav")
|
||||
# unpackedData = data
|
||||
# else:
|
||||
# unpackedData = np.array(
|
||||
# struct.unpack("<%sh" % (len(wav) // struct.calcsize("<h")), wav)
|
||||
# )
|
||||
|
||||
unpackedData = np.array(
|
||||
struct.unpack("<%sh" % (len(wav) // struct.calcsize("<h")), wav)
|
||||
)
|
||||
|
||||
self.tlock.acquire()
|
||||
changedVoice = self.voiceChangerManager.changeVoice(unpackedData)
|
||||
self.tlock.release()
|
||||
|
||||
changedVoiceBase64 = base64.b64encode(changedVoice[0]).decode('utf-8')
|
||||
data = {
|
||||
"timestamp": timestamp,
|
||||
"changedVoiceBase64": changedVoiceBase64
|
||||
}
|
||||
changedVoiceBase64 = base64.b64encode(changedVoice[0]).decode("utf-8")
|
||||
data = {"timestamp": timestamp, "changedVoiceBase64": changedVoiceBase64}
|
||||
|
||||
json_compatible_item_data = jsonable_encoder(data)
|
||||
return JSONResponse(content=json_compatible_item_data)
|
||||
|
@ -1,40 +0,0 @@
|
||||
import os
|
||||
import numpy as np
|
||||
import pylab
|
||||
import librosa
|
||||
import librosa.display
|
||||
import pyworld as pw
|
||||
|
||||
|
||||
class IOAnalyzer:
|
||||
|
||||
def _get_f0_dio(self, y, sr):
|
||||
_f0, time = pw.dio(y, sr, frame_period=5)
|
||||
f0 = pw.stonemask(y, _f0, time, sr)
|
||||
time = np.linspace(0, y.shape[0] / sr, len(time))
|
||||
return f0, time
|
||||
|
||||
def _get_f0_harvest(self, y, sr):
|
||||
_f0, time = pw.harvest(y, sr, frame_period=5)
|
||||
f0 = pw.stonemask(y, _f0, time, sr)
|
||||
time = np.linspace(0, y.shape[0] / sr, len(time))
|
||||
return f0, time
|
||||
|
||||
def analyze(self, inputDataFile: str, dioImageFile: str, harvestImageFile: str, samplingRate: int):
|
||||
y, sr = librosa.load(inputDataFile, samplingRate)
|
||||
y = y.astype(np.float64)
|
||||
spec = librosa.amplitude_to_db(np.abs(librosa.stft(y, n_fft=2048, win_length=2048, hop_length=128)), ref=np.max)
|
||||
f0_dio, times = self._get_f0_dio(y, sr=samplingRate)
|
||||
f0_harvest, times = self._get_f0_harvest(y, sr=samplingRate)
|
||||
|
||||
pylab.close()
|
||||
HOP_LENGTH = 128
|
||||
img = librosa.display.specshow(spec, sr=samplingRate, hop_length=HOP_LENGTH, x_axis='time', y_axis='log', )
|
||||
pylab.plot(times, f0_dio, label='f0', color=(0, 1, 1, 0.6), linewidth=3)
|
||||
pylab.savefig(dioImageFile)
|
||||
|
||||
pylab.close()
|
||||
HOP_LENGTH = 128
|
||||
img = librosa.display.specshow(spec, sr=samplingRate, hop_length=HOP_LENGTH, x_axis='time', y_axis='log', )
|
||||
pylab.plot(times, f0_harvest, label='f0', color=(0, 1, 1, 0.6), linewidth=3)
|
||||
pylab.savefig(harvestImageFile)
|
@ -30,13 +30,15 @@ class ModelWrapper:
|
||||
self.embChannels = metadata["embChannels"]
|
||||
self.modelType = metadata["modelType"]
|
||||
self.deprecated = False
|
||||
print(f"[Voice Changer] Onnx metadata: sr:{self.samplingRate}, f0:{self.f0}")
|
||||
self.embedder = metadata["embedder"] if "embedder" in metadata else "hubert_base"
|
||||
print(f"[Voice Changer] Onnx metadata: sr:{self.samplingRate}, f0:{self.f0}, embedder:{self.embedder}")
|
||||
except:
|
||||
self.samplingRate = 48000
|
||||
self.f0 = True
|
||||
self.embChannels = 256
|
||||
self.modelType = 0
|
||||
self.deprecated = True
|
||||
self.embedder = "hubert_base"
|
||||
print(f"[Voice Changer] ############## !!!! CAUTION !!!! ####################")
|
||||
print(f"[Voice Changer] This onnx's version is depricated. Please regenerate onnxfile. Fallback to default")
|
||||
print(f"[Voice Changer] Onnx metadata: sr:{self.samplingRate}, f0:{self.f0}")
|
||||
@ -57,6 +59,9 @@ class ModelWrapper:
|
||||
def getDeprecated(self):
|
||||
return self.deprecated
|
||||
|
||||
def getEmbedder(self):
|
||||
return self.embedder
|
||||
|
||||
def set_providers(self, providers, provider_options=[{}]):
|
||||
self.onnx_session.set_providers(providers=providers, provider_options=provider_options)
|
||||
|
||||
|
@ -4,11 +4,12 @@ import json
|
||||
import resampy
|
||||
from voice_changer.RVC.ModelWrapper import ModelWrapper
|
||||
from Exceptions import NoModeLoadedException
|
||||
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
||||
|
||||
# avoiding parse arg error in RVC
|
||||
sys.argv = ["MMVCServerSIO.py"]
|
||||
|
||||
if sys.platform.startswith('darwin'):
|
||||
if sys.platform.startswith("darwin"):
|
||||
baseDir = [x for x in sys.path if x.endswith("Contents/MacOS")]
|
||||
if len(baseDir) != 1:
|
||||
print("baseDir should be only one ", baseDir)
|
||||
@ -24,6 +25,7 @@ from functools import reduce
|
||||
import numpy as np
|
||||
import torch
|
||||
import onnxruntime
|
||||
|
||||
# onnxruntime.set_default_logger_severity(3)
|
||||
from const import HUBERT_ONNX_MODEL_PATH, TMP_DIR
|
||||
|
||||
@ -36,11 +38,17 @@ from .models import SynthesizerTrnMsNSFsidNono as SynthesizerTrnMsNSFsidNono_web
|
||||
|
||||
from .const import RVC_MODEL_TYPE_RVC, RVC_MODEL_TYPE_WEBUI
|
||||
from fairseq import checkpoint_utils
|
||||
providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
|
||||
|
||||
providers = [
|
||||
"OpenVINOExecutionProvider",
|
||||
"CUDAExecutionProvider",
|
||||
"DmlExecutionProvider",
|
||||
"CPUExecutionProvider",
|
||||
]
|
||||
|
||||
|
||||
@dataclass
|
||||
class ModelSlot():
|
||||
class ModelSlot:
|
||||
pyTorchModelFile: str = ""
|
||||
onnxModelFile: str = ""
|
||||
featureFile: str = ""
|
||||
@ -51,13 +59,11 @@ class ModelSlot():
|
||||
f0: bool = True
|
||||
embChannels: int = 256
|
||||
deprecated: bool = False
|
||||
# samplingRateOnnx: int = -1
|
||||
# f0Onnx: bool = True
|
||||
# embChannelsOnnx: int = 256
|
||||
embedder: str = "hubert_base" # "hubert_base", "contentvec", "distilhubert"
|
||||
|
||||
|
||||
@dataclass
|
||||
class RVCSettings():
|
||||
class RVCSettings:
|
||||
gpu: int = 0
|
||||
dstId: int = 0
|
||||
|
||||
@ -72,9 +78,7 @@ class RVCSettings():
|
||||
onnxModelFile: str = ""
|
||||
configFile: str = ""
|
||||
modelSlots: list[ModelSlot] = field(
|
||||
default_factory=lambda: [
|
||||
ModelSlot(), ModelSlot(), ModelSlot()
|
||||
]
|
||||
default_factory=lambda: [ModelSlot(), ModelSlot(), ModelSlot()]
|
||||
)
|
||||
indexRatio: float = 0
|
||||
rvcQuality: int = 0
|
||||
@ -82,23 +86,28 @@ class RVCSettings():
|
||||
modelSamplingRate: int = 48000
|
||||
modelSlotIndex: int = -1
|
||||
|
||||
speakers: dict[str, int] = field(
|
||||
default_factory=lambda: {}
|
||||
)
|
||||
speakers: dict[str, int] = field(default_factory=lambda: {})
|
||||
|
||||
# ↓mutableな物だけ列挙
|
||||
intData = ["gpu", "dstId", "tran", "extraConvertSize", "rvcQuality", "modelSamplingRate", "silenceFront", "modelSlotIndex"]
|
||||
intData = [
|
||||
"gpu",
|
||||
"dstId",
|
||||
"tran",
|
||||
"extraConvertSize",
|
||||
"rvcQuality",
|
||||
"modelSamplingRate",
|
||||
"silenceFront",
|
||||
"modelSlotIndex",
|
||||
]
|
||||
floatData = ["silentThreshold", "indexRatio"]
|
||||
strData = ["framework", "f0Detector"]
|
||||
|
||||
|
||||
class RVC:
|
||||
def __init__(self, params):
|
||||
def __init__(self, params: VoiceChangerParams):
|
||||
self.initialLoad = True
|
||||
self.settings = RVCSettings()
|
||||
|
||||
self.inferenceing: bool = False
|
||||
|
||||
self.net_g = None
|
||||
self.onnx_session = None
|
||||
self.feature_file = None
|
||||
@ -108,7 +117,10 @@ class RVC:
|
||||
self.prevVol = 0
|
||||
self.params = params
|
||||
|
||||
self.mps_enabled: bool = getattr(torch.backends, "mps", None) is not None and torch.backends.mps.is_available()
|
||||
self.mps_enabled: bool = (
|
||||
getattr(torch.backends, "mps", None) is not None
|
||||
and torch.backends.mps.is_available()
|
||||
)
|
||||
self.currentSlot = -1
|
||||
print("RVC initialization: ", params)
|
||||
print("mps: ", self.mps_enabled)
|
||||
@ -120,26 +132,41 @@ class RVC:
|
||||
params = json.loads(params_str)
|
||||
|
||||
newSlot = asdict(self.settings.modelSlots[tmp_slot])
|
||||
newSlot.update({
|
||||
newSlot.update(
|
||||
{
|
||||
"pyTorchModelFile": props["files"]["pyTorchModelFilename"],
|
||||
"onnxModelFile": props["files"]["onnxModelFilename"],
|
||||
"featureFile": props["files"]["featureFilename"],
|
||||
"indexFile": props["files"]["indexFilename"],
|
||||
"defaultTrans": params["trans"]
|
||||
})
|
||||
"defaultTrans": params["trans"],
|
||||
}
|
||||
)
|
||||
self.settings.modelSlots[tmp_slot] = ModelSlot(**newSlot)
|
||||
|
||||
print("[Voice Changer] RVC loading... slot:", tmp_slot)
|
||||
|
||||
# Load metadata
|
||||
if self.settings.modelSlots[tmp_slot].pyTorchModelFile != None and self.settings.modelSlots[tmp_slot].pyTorchModelFile != "":
|
||||
self._setInfoByPytorch(tmp_slot, self.settings.modelSlots[tmp_slot].pyTorchModelFile)
|
||||
if self.settings.modelSlots[tmp_slot].onnxModelFile != None and self.settings.modelSlots[tmp_slot].onnxModelFile != "":
|
||||
self._setInfoByONNX(tmp_slot, self.settings.modelSlots[tmp_slot].onnxModelFile)
|
||||
if (
|
||||
self.settings.modelSlots[tmp_slot].pyTorchModelFile != None
|
||||
and self.settings.modelSlots[tmp_slot].pyTorchModelFile != ""
|
||||
):
|
||||
self._setInfoByPytorch(
|
||||
tmp_slot, self.settings.modelSlots[tmp_slot].pyTorchModelFile
|
||||
)
|
||||
if (
|
||||
self.settings.modelSlots[tmp_slot].onnxModelFile != None
|
||||
and self.settings.modelSlots[tmp_slot].onnxModelFile != ""
|
||||
):
|
||||
self._setInfoByONNX(
|
||||
tmp_slot, self.settings.modelSlots[tmp_slot].onnxModelFile
|
||||
)
|
||||
|
||||
try:
|
||||
hubert_path = self.params["hubert_base"]
|
||||
models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task([hubert_path], suffix="",)
|
||||
hubert_path = self.params.hubert_base
|
||||
models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
|
||||
[hubert_path],
|
||||
suffix="",
|
||||
)
|
||||
model = models[0]
|
||||
model.eval()
|
||||
if self.is_half:
|
||||
@ -164,13 +191,21 @@ class RVC:
|
||||
if config_len == 18:
|
||||
self.settings.modelSlots[slot].modelType = RVC_MODEL_TYPE_RVC
|
||||
self.settings.modelSlots[slot].embChannels = 256
|
||||
self.settings.modelSlots[slot].embedder = "hubert_base"
|
||||
else:
|
||||
self.settings.modelSlots[slot].modelType = RVC_MODEL_TYPE_WEBUI
|
||||
self.settings.modelSlots[slot].embChannels = cpt["config"][17]
|
||||
self.settings.modelSlots[slot].embedder = cpt["embedder_name"]
|
||||
if self.settings.modelSlots[slot].embedder.endswith("768"):
|
||||
self.settings.modelSlots[slot].embedder = self.settings.modelSlots[
|
||||
slot
|
||||
].embedder[:-3]
|
||||
print("embedder....", self.settings.modelSlots[slot].embedder)
|
||||
|
||||
self.settings.modelSlots[slot].f0 = True if cpt["f0"] == 1 else False
|
||||
self.settings.modelSlots[slot].samplingRate = cpt["config"][-1]
|
||||
|
||||
self.settings.modelSamplingRate = cpt["config"][-1]
|
||||
# self.settings.modelSamplingRate = cpt["config"][-1]
|
||||
|
||||
def _setInfoByONNX(self, slot, file):
|
||||
tmp_onnx_session = ModelWrapper(file)
|
||||
@ -179,6 +214,8 @@ class RVC:
|
||||
self.settings.modelSlots[slot].f0 = tmp_onnx_session.getF0()
|
||||
self.settings.modelSlots[slot].samplingRate = tmp_onnx_session.getSamplingRate()
|
||||
self.settings.modelSlots[slot].deprecated = tmp_onnx_session.getDeprecated()
|
||||
self.settings.modelSlots[slot].embedder = tmp_onnx_session.getEmbedder()
|
||||
print("embedder....", self.settings.modelSlots[slot].embedder)
|
||||
|
||||
def prepareModel(self, slot: int):
|
||||
print("[Voice Changer] Prepare Model of slot:", slot)
|
||||
@ -188,7 +225,7 @@ class RVC:
|
||||
if pyTorchModelFile != None and pyTorchModelFile != "":
|
||||
print("[Voice Changer] Loading Pytorch Model...")
|
||||
cpt = torch.load(pyTorchModelFile, map_location="cpu")
|
||||
'''
|
||||
"""
|
||||
(1) オリジナルとrvc-webuiのモデル判定 ⇒ config全体の形状
|
||||
■ ノーマル256
|
||||
[1025, 32, 192, 192, 768, 2, 6, 3, 0, '1', [3, 7, 11], [[1, 3, 5], [1, 3, 5], [1, 3, 5]], [10, 6, 2, 2, 2], 512, [16, 16, 4, 4, 4], 109, 256, 48000]
|
||||
@ -200,32 +237,32 @@ class RVC:
|
||||
0: ピッチレス, 1:ノーマル
|
||||
|
||||
(2-2) rvc-webuiの、(256 or 768) x (ノーマルor pitchレス)判定 ⇒ 256, or 768 は17番目の要素で判定。, ノーマルor pitchレスはckp["f0"]で判定
|
||||
'''
|
||||
# config_len = len(cpt["config"])
|
||||
# if config_len == 18:
|
||||
# self.settings.modelSlots[slot].modelType = RVC_MODEL_TYPE_RVC
|
||||
# self.settings.modelSlots[slot].embChannels = 256
|
||||
# else:
|
||||
# self.settings.modelSlots[slot].modelType = RVC_MODEL_TYPE_WEBUI
|
||||
# self.settings.modelSlots[slot].embChannels = cpt["config"][17]
|
||||
# self.settings.modelSlots[slot].f0 = True if cpt["f0"] == 1 else False
|
||||
# self.settings.modelSlots[slot].samplingRate = cpt["config"][-1]
|
||||
"""
|
||||
|
||||
# self.settings.modelSamplingRate = cpt["config"][-1]
|
||||
|
||||
if self.settings.modelSlots[slot].modelType == RVC_MODEL_TYPE_RVC and self.settings.modelSlots[slot].f0 == True:
|
||||
if (
|
||||
self.settings.modelSlots[slot].modelType == RVC_MODEL_TYPE_RVC
|
||||
and self.settings.modelSlots[slot].f0 == True
|
||||
):
|
||||
net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=self.is_half)
|
||||
elif self.settings.modelSlots[slot].modelType == RVC_MODEL_TYPE_RVC and self.settings.modelSlots[slot].f0 == False:
|
||||
elif (
|
||||
self.settings.modelSlots[slot].modelType == RVC_MODEL_TYPE_RVC
|
||||
and self.settings.modelSlots[slot].f0 == False
|
||||
):
|
||||
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
|
||||
elif self.settings.modelSlots[slot].modelType == RVC_MODEL_TYPE_WEBUI and self.settings.modelSlots[slot].f0 == True:
|
||||
net_g = SynthesizerTrnMsNSFsid_webui(**cpt["params"], is_half=self.is_half)
|
||||
elif self.settings.modelSlots[slot].modelType == RVC_MODEL_TYPE_WEBUI and self.settings.modelSlots[slot].f0 == False:
|
||||
######################
|
||||
# TBD
|
||||
######################
|
||||
print("webui non-f0 is not supported yet")
|
||||
net_g = SynthesizerTrnMsNSFsidNono_webui(**cpt["params"], is_half=self.is_half)
|
||||
|
||||
elif (
|
||||
self.settings.modelSlots[slot].modelType == RVC_MODEL_TYPE_WEBUI
|
||||
and self.settings.modelSlots[slot].f0 == True
|
||||
):
|
||||
net_g = SynthesizerTrnMsNSFsid_webui(
|
||||
**cpt["params"], is_half=self.is_half
|
||||
)
|
||||
elif (
|
||||
self.settings.modelSlots[slot].modelType == RVC_MODEL_TYPE_WEBUI
|
||||
and self.settings.modelSlots[slot].f0 == False
|
||||
):
|
||||
net_g = SynthesizerTrnMsNSFsidNono_webui(
|
||||
**cpt["params"], is_half=self.is_half
|
||||
)
|
||||
else:
|
||||
print("unknwon")
|
||||
|
||||
@ -259,11 +296,15 @@ class RVC:
|
||||
self.next_trans = self.settings.modelSlots[slot].defaultTrans
|
||||
self.next_samplingRate = self.settings.modelSlots[slot].samplingRate
|
||||
self.next_framework = "ONNX" if self.next_onnx_session != None else "PyTorch"
|
||||
print("[Voice Changer] Prepare done.",)
|
||||
print(
|
||||
"[Voice Changer] Prepare done.",
|
||||
)
|
||||
return self.get_info()
|
||||
|
||||
def switchModel(self):
|
||||
print("[Voice Changer] Switching model..",)
|
||||
print(
|
||||
"[Voice Changer] Switching model..",
|
||||
)
|
||||
# del self.net_g
|
||||
# del self.onnx_session
|
||||
self.net_g = self.next_net_g
|
||||
@ -275,17 +316,23 @@ class RVC:
|
||||
self.settings.modelSamplingRate = self.next_samplingRate
|
||||
self.next_net_g = None
|
||||
self.next_onnx_session = None
|
||||
print("[Voice Changer] Switching model..done",)
|
||||
print(
|
||||
"[Voice Changer] Switching model..done",
|
||||
)
|
||||
|
||||
def update_settings(self, key: str, val: any):
|
||||
if key == "onnxExecutionProvider" and self.onnx_session != None:
|
||||
if val == "CUDAExecutionProvider":
|
||||
if self.settings.gpu < 0 or self.settings.gpu >= self.gpu_num:
|
||||
self.settings.gpu = 0
|
||||
provider_options = [{'device_id': self.settings.gpu}]
|
||||
self.onnx_session.set_providers(providers=[val], provider_options=provider_options)
|
||||
provider_options = [{"device_id": self.settings.gpu}]
|
||||
self.onnx_session.set_providers(
|
||||
providers=[val], provider_options=provider_options
|
||||
)
|
||||
if hasattr(self, "hubert_onnx"):
|
||||
self.hubert_onnx.set_providers(providers=[val], provider_options=provider_options)
|
||||
self.hubert_onnx.set_providers(
|
||||
providers=[val], provider_options=provider_options
|
||||
)
|
||||
else:
|
||||
self.onnx_session.set_providers(providers=[val])
|
||||
if hasattr(self, "hubert_onnx"):
|
||||
@ -294,12 +341,20 @@ class RVC:
|
||||
print("Onnx is not enabled. Please load model.")
|
||||
return False
|
||||
elif key in self.settings.intData:
|
||||
if key == "gpu" and val >= 0 and val < self.gpu_num and self.onnx_session != None:
|
||||
if (
|
||||
key == "gpu"
|
||||
and val >= 0
|
||||
and val < self.gpu_num
|
||||
and self.onnx_session != None
|
||||
):
|
||||
providers = self.onnx_session.get_providers()
|
||||
print("Providers:", providers)
|
||||
if "CUDAExecutionProvider" in providers:
|
||||
provider_options = [{'device_id': self.settings.gpu}]
|
||||
self.onnx_session.set_providers(providers=["CUDAExecutionProvider"], provider_options=provider_options)
|
||||
provider_options = [{"device_id": self.settings.gpu}]
|
||||
self.onnx_session.set_providers(
|
||||
providers=["CUDAExecutionProvider"],
|
||||
provider_options=provider_options,
|
||||
)
|
||||
if key == "modelSlotIndex":
|
||||
# self.switchModel(int(val))
|
||||
val = int(val) % 1000 # Quick hack for same slot is selected
|
||||
@ -318,7 +373,9 @@ class RVC:
|
||||
def get_info(self):
|
||||
data = asdict(self.settings)
|
||||
|
||||
data["onnxExecutionProviders"] = self.onnx_session.get_providers() if self.onnx_session != None else []
|
||||
data["onnxExecutionProviders"] = (
|
||||
self.onnx_session.get_providers() if self.onnx_session != None else []
|
||||
)
|
||||
files = ["configFile", "pyTorchModelFile", "onnxModelFile"]
|
||||
for f in files:
|
||||
if data[f] != None and os.path.exists(data[f]):
|
||||
@ -331,22 +388,30 @@ class RVC:
|
||||
def get_processing_sampling_rate(self):
|
||||
return self.settings.modelSamplingRate
|
||||
|
||||
def generate_input(self, newData: any, inputSize: int, crossfadeSize: int, solaSearchFrame: int = 0):
|
||||
def generate_input(
|
||||
self, newData: any, inputSize: int, crossfadeSize: int, solaSearchFrame: int = 0
|
||||
):
|
||||
newData = newData.astype(np.float32) / 32768.0
|
||||
|
||||
if hasattr(self, "audio_buffer"):
|
||||
self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0) # 過去のデータに連結
|
||||
self.audio_buffer = np.concatenate(
|
||||
[self.audio_buffer, newData], 0
|
||||
) # 過去のデータに連結
|
||||
else:
|
||||
self.audio_buffer = newData
|
||||
|
||||
convertSize = inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
|
||||
convertSize = (
|
||||
inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
|
||||
)
|
||||
|
||||
if convertSize % 128 != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
|
||||
convertSize = convertSize + (128 - (convertSize % 128))
|
||||
|
||||
self.audio_buffer = self.audio_buffer[-1 * convertSize :] # 変換対象の部分だけ抽出
|
||||
|
||||
crop = self.audio_buffer[-1 * (inputSize + crossfadeSize):-1 * (crossfadeSize)] # 出力部分だけ切り出して音量を確認。(solaとの関係性について、現状は無考慮)
|
||||
crop = self.audio_buffer[
|
||||
-1 * (inputSize + crossfadeSize) : -1 * (crossfadeSize)
|
||||
] # 出力部分だけ切り出して音量を確認。(solaとの関係性について、現状は無考慮)
|
||||
rms = np.sqrt(np.square(crop).mean(axis=0))
|
||||
vol = max(rms, self.prevVol * 0.0)
|
||||
self.prevVol = vol
|
||||
@ -390,15 +455,34 @@ class RVC:
|
||||
|
||||
f0 = self.settings.modelSlots[self.currentSlot].f0
|
||||
embChannels = self.settings.modelSlots[self.currentSlot].embChannels
|
||||
audio_out = vc.pipeline(self.hubert_model, self.onnx_session, sid, audio, times, f0_up_key, f0_method,
|
||||
file_index, file_big_npy, index_rate, if_f0, f0_file=f0_file, silence_front=self.settings.extraConvertSize / self.settings.modelSamplingRate, embChannels=embChannels)
|
||||
audio_out = vc.pipeline(
|
||||
self.hubert_model,
|
||||
self.onnx_session,
|
||||
sid,
|
||||
audio,
|
||||
times,
|
||||
f0_up_key,
|
||||
f0_method,
|
||||
file_index,
|
||||
file_big_npy,
|
||||
index_rate,
|
||||
if_f0,
|
||||
f0_file=f0_file,
|
||||
silence_front=self.settings.extraConvertSize
|
||||
/ self.settings.modelSamplingRate,
|
||||
embChannels=embChannels,
|
||||
)
|
||||
result = audio_out * np.sqrt(vol)
|
||||
|
||||
return result
|
||||
|
||||
def _pyTorch_inference(self, data):
|
||||
if hasattr(self, "net_g") == False or self.net_g == None:
|
||||
print("[Voice Changer] No pyTorch session.", hasattr(self, "net_g"), self.net_g)
|
||||
print(
|
||||
"[Voice Changer] No pyTorch session.",
|
||||
hasattr(self, "net_g"),
|
||||
self.net_g,
|
||||
)
|
||||
raise NoModeLoadedException("pytorch")
|
||||
|
||||
if self.settings.gpu < 0 or (self.gpu_num == 0 and self.mps_enabled == False):
|
||||
@ -436,8 +520,23 @@ class RVC:
|
||||
f0_file = None
|
||||
|
||||
embChannels = self.settings.modelSlots[self.currentSlot].embChannels
|
||||
audio_out = vc.pipeline(self.hubert_model, self.net_g, sid, audio, times, f0_up_key, f0_method,
|
||||
file_index, file_big_npy, index_rate, if_f0, f0_file=f0_file, silence_front=self.settings.extraConvertSize / self.settings.modelSamplingRate, embChannels=embChannels)
|
||||
audio_out = vc.pipeline(
|
||||
self.hubert_model,
|
||||
self.net_g,
|
||||
sid,
|
||||
audio,
|
||||
times,
|
||||
f0_up_key,
|
||||
f0_method,
|
||||
file_index,
|
||||
file_big_npy,
|
||||
index_rate,
|
||||
if_f0,
|
||||
f0_file=f0_file,
|
||||
silence_front=self.settings.extraConvertSize
|
||||
/ self.settings.modelSamplingRate,
|
||||
embChannels=embChannels,
|
||||
)
|
||||
|
||||
result = audio_out * np.sqrt(vol)
|
||||
|
||||
@ -445,7 +544,11 @@ class RVC:
|
||||
|
||||
def inference(self, data):
|
||||
if self.settings.modelSlotIndex < 0:
|
||||
print("[Voice Changer] wait for loading model...", self.settings.modelSlotIndex, self.currentSlot)
|
||||
print(
|
||||
"[Voice Changer] wait for loading model...",
|
||||
self.settings.modelSlotIndex,
|
||||
self.currentSlot,
|
||||
)
|
||||
raise NoModeLoadedException("model_common")
|
||||
|
||||
if self.currentSlot != self.settings.modelSlotIndex:
|
||||
@ -482,7 +585,9 @@ class RVC:
|
||||
print("[Voice Changer] export2onnx, No pyTorch session.")
|
||||
return {"status": "ng", "path": f""}
|
||||
|
||||
pyTorchModelFile = self.settings.modelSlots[self.settings.modelSlotIndex].pyTorchModelFile # inference前にexportできるようにcurrentSlotではなくslot
|
||||
pyTorchModelFile = self.settings.modelSlots[
|
||||
self.settings.modelSlotIndex
|
||||
].pyTorchModelFile # inference前にexportできるようにcurrentSlotではなくslot
|
||||
|
||||
if pyTorchModelFile == None:
|
||||
print("[Voice Changer] export2onnx, No pyTorch filepath.")
|
||||
@ -490,23 +595,45 @@ class RVC:
|
||||
import voice_changer.RVC.export2onnx as onnxExporter
|
||||
|
||||
output_file = os.path.splitext(os.path.basename(pyTorchModelFile))[0] + ".onnx"
|
||||
output_file_simple = os.path.splitext(os.path.basename(pyTorchModelFile))[0] + "_simple.onnx"
|
||||
output_file_simple = (
|
||||
os.path.splitext(os.path.basename(pyTorchModelFile))[0] + "_simple.onnx"
|
||||
)
|
||||
output_path = os.path.join(TMP_DIR, output_file)
|
||||
output_path_simple = os.path.join(TMP_DIR, output_file_simple)
|
||||
print("embChannels", self.settings.modelSlots[self.settings.modelSlotIndex].embChannels)
|
||||
print(
|
||||
"embChannels",
|
||||
self.settings.modelSlots[self.settings.modelSlotIndex].embChannels,
|
||||
)
|
||||
metadata = {
|
||||
"application": "VC_CLIENT",
|
||||
"version": "1",
|
||||
"modelType": self.settings.modelSlots[self.settings.modelSlotIndex].modelType,
|
||||
"samplingRate": self.settings.modelSlots[self.settings.modelSlotIndex].samplingRate,
|
||||
"modelType": self.settings.modelSlots[
|
||||
self.settings.modelSlotIndex
|
||||
].modelType,
|
||||
"samplingRate": self.settings.modelSlots[
|
||||
self.settings.modelSlotIndex
|
||||
].samplingRate,
|
||||
"f0": self.settings.modelSlots[self.settings.modelSlotIndex].f0,
|
||||
"embChannels": self.settings.modelSlots[self.settings.modelSlotIndex].embChannels,
|
||||
"embChannels": self.settings.modelSlots[
|
||||
self.settings.modelSlotIndex
|
||||
].embChannels,
|
||||
"embedder": self.settings.modelSlots[self.settings.modelSlotIndex].embedder,
|
||||
}
|
||||
|
||||
if torch.cuda.device_count() > 0:
|
||||
onnxExporter.export2onnx(pyTorchModelFile, output_path, output_path_simple, True, metadata)
|
||||
onnxExporter.export2onnx(
|
||||
pyTorchModelFile, output_path, output_path_simple, True, metadata
|
||||
)
|
||||
else:
|
||||
print("[Voice Changer] Warning!!! onnx export with float32. maybe size is doubled.")
|
||||
onnxExporter.export2onnx(pyTorchModelFile, output_path, output_path_simple, False, metadata)
|
||||
print(
|
||||
"[Voice Changer] Warning!!! onnx export with float32. maybe size is doubled."
|
||||
)
|
||||
onnxExporter.export2onnx(
|
||||
pyTorchModelFile, output_path, output_path_simple, False, metadata
|
||||
)
|
||||
|
||||
return {"status": "ok", "path": f"/tmp/{output_file_simple}", "filename": output_file_simple}
|
||||
return {
|
||||
"status": "ok",
|
||||
"path": f"/tmp/{output_file_simple}",
|
||||
"filename": output_file_simple,
|
||||
}
|
||||
|
@ -9,14 +9,18 @@ import resampy
|
||||
|
||||
|
||||
from voice_changer.IORecorder import IORecorder
|
||||
# from voice_changer.IOAnalyzer import IOAnalyzer
|
||||
|
||||
from voice_changer.utils.Timer import Timer
|
||||
from voice_changer.utils.VoiceChangerModel import VoiceChangerModel, AudioInOut
|
||||
import time
|
||||
from Exceptions import NoModeLoadedException, ONNXInputArgumentException
|
||||
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
||||
|
||||
providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
|
||||
providers = [
|
||||
"OpenVINOExecutionProvider",
|
||||
"CUDAExecutionProvider",
|
||||
"DmlExecutionProvider",
|
||||
"CPUExecutionProvider",
|
||||
]
|
||||
|
||||
STREAM_INPUT_FILE = os.path.join(TMP_DIR, "in.wav")
|
||||
STREAM_OUTPUT_FILE = os.path.join(TMP_DIR, "out.wav")
|
||||
@ -25,7 +29,7 @@ STREAM_ANALYZE_FILE_HARVEST = os.path.join(TMP_DIR, "analyze-harvest.png")
|
||||
|
||||
|
||||
@dataclass
|
||||
class VoiceChangerSettings():
|
||||
class VoiceChangerSettings:
|
||||
inputSampleRate: int = 48000 # 48000 or 24000
|
||||
|
||||
crossFadeOffsetRate: float = 0.1
|
||||
@ -41,16 +45,14 @@ class VoiceChangerSettings():
|
||||
floatData: list[str] = field(
|
||||
default_factory=lambda: ["crossFadeOffsetRate", "crossFadeEndRate"]
|
||||
)
|
||||
strData: list[str] = field(
|
||||
default_factory=lambda: []
|
||||
)
|
||||
strData: list[str] = field(default_factory=lambda: [])
|
||||
|
||||
|
||||
class VoiceChanger():
|
||||
class VoiceChanger:
|
||||
settings: VoiceChangerSettings
|
||||
voiceChanger: VoiceChangerModel
|
||||
|
||||
def __init__(self, params):
|
||||
def __init__(self, params: VoiceChangerParams):
|
||||
# 初期化
|
||||
self.settings = VoiceChangerSettings()
|
||||
self.onnx_session = None
|
||||
@ -64,9 +66,14 @@ class VoiceChanger():
|
||||
self.params = params
|
||||
self.gpu_num = torch.cuda.device_count()
|
||||
self.prev_audio = np.zeros(4096)
|
||||
self.mps_enabled: bool = getattr(torch.backends, "mps", None) is not None and torch.backends.mps.is_available()
|
||||
self.mps_enabled: bool = (
|
||||
getattr(torch.backends, "mps", None) is not None
|
||||
and torch.backends.mps.is_available()
|
||||
)
|
||||
|
||||
print(f"VoiceChanger Initialized (GPU_NUM:{self.gpu_num}, mps_enabled:{self.mps_enabled})")
|
||||
print(
|
||||
f"VoiceChanger Initialized (GPU_NUM:{self.gpu_num}, mps_enabled:{self.mps_enabled})"
|
||||
)
|
||||
|
||||
def switchModelType(self, modelType: ModelType):
|
||||
if hasattr(self, "voiceChanger") and self.voiceChanger != None:
|
||||
@ -77,24 +84,31 @@ class VoiceChanger():
|
||||
self.modelType = modelType
|
||||
if self.modelType == "MMVCv15":
|
||||
from voice_changer.MMVCv15.MMVCv15 import MMVCv15
|
||||
|
||||
self.voiceChanger = MMVCv15() # type: ignore
|
||||
elif self.modelType == "MMVCv13":
|
||||
from voice_changer.MMVCv13.MMVCv13 import MMVCv13
|
||||
|
||||
self.voiceChanger = MMVCv13()
|
||||
elif self.modelType == "so-vits-svc-40v2":
|
||||
from voice_changer.SoVitsSvc40v2.SoVitsSvc40v2 import SoVitsSvc40v2
|
||||
|
||||
self.voiceChanger = SoVitsSvc40v2(self.params)
|
||||
elif self.modelType == "so-vits-svc-40" or self.modelType == "so-vits-svc-40_c":
|
||||
from voice_changer.SoVitsSvc40.SoVitsSvc40 import SoVitsSvc40
|
||||
|
||||
self.voiceChanger = SoVitsSvc40(self.params)
|
||||
elif self.modelType == "DDSP-SVC":
|
||||
from voice_changer.DDSP_SVC.DDSP_SVC import DDSP_SVC
|
||||
|
||||
self.voiceChanger = DDSP_SVC(self.params)
|
||||
elif self.modelType == "RVC":
|
||||
from voice_changer.RVC.RVC import RVC
|
||||
|
||||
self.voiceChanger = RVC(self.params)
|
||||
else:
|
||||
from voice_changer.MMVCv13.MMVCv13 import MMVCv13
|
||||
|
||||
self.voiceChanger = MMVCv13()
|
||||
|
||||
return {"status": "OK", "msg": "vc is switched."}
|
||||
@ -109,7 +123,6 @@ class VoiceChanger():
|
||||
self,
|
||||
props,
|
||||
):
|
||||
|
||||
try:
|
||||
return self.voiceChanger.loadModel(props)
|
||||
except Exception as e:
|
||||
@ -143,7 +156,9 @@ class VoiceChanger():
|
||||
if key == "recordIO" and val == 1:
|
||||
if hasattr(self, "ioRecorder"):
|
||||
self.ioRecorder.close()
|
||||
self.ioRecorder = IORecorder(STREAM_INPUT_FILE, STREAM_OUTPUT_FILE, self.settings.inputSampleRate)
|
||||
self.ioRecorder = IORecorder(
|
||||
STREAM_INPUT_FILE, STREAM_OUTPUT_FILE, self.settings.inputSampleRate
|
||||
)
|
||||
if key == "recordIO" and val == 0:
|
||||
if hasattr(self, "ioRecorder"):
|
||||
self.ioRecorder.close()
|
||||
@ -174,12 +189,12 @@ class VoiceChanger():
|
||||
return self.get_info()
|
||||
|
||||
def _generate_strength(self, crossfadeSize: int):
|
||||
|
||||
if self.crossfadeSize != crossfadeSize or \
|
||||
self.currentCrossFadeOffsetRate != self.settings.crossFadeOffsetRate or \
|
||||
self.currentCrossFadeEndRate != self.settings.crossFadeEndRate or \
|
||||
self.currentCrossFadeOverlapSize != self.settings.crossFadeOverlapSize:
|
||||
|
||||
if (
|
||||
self.crossfadeSize != crossfadeSize
|
||||
or self.currentCrossFadeOffsetRate != self.settings.crossFadeOffsetRate
|
||||
or self.currentCrossFadeEndRate != self.settings.crossFadeEndRate
|
||||
or self.currentCrossFadeOverlapSize != self.settings.crossFadeOverlapSize
|
||||
):
|
||||
self.crossfadeSize = crossfadeSize
|
||||
self.currentCrossFadeOffsetRate = self.settings.crossFadeOffsetRate
|
||||
self.currentCrossFadeEndRate = self.settings.crossFadeEndRate
|
||||
@ -193,30 +208,54 @@ class VoiceChanger():
|
||||
np_prev_strength = np.cos(percent * 0.5 * np.pi) ** 2
|
||||
np_cur_strength = np.cos((1 - percent) * 0.5 * np.pi) ** 2
|
||||
|
||||
self.np_prev_strength = np.concatenate([np.ones(cf_offset), np_prev_strength,
|
||||
np.zeros(crossfadeSize - cf_offset - len(np_prev_strength))])
|
||||
self.np_cur_strength = np.concatenate([np.zeros(cf_offset), np_cur_strength, np.ones(crossfadeSize - cf_offset - len(np_cur_strength))])
|
||||
self.np_prev_strength = np.concatenate(
|
||||
[
|
||||
np.ones(cf_offset),
|
||||
np_prev_strength,
|
||||
np.zeros(crossfadeSize - cf_offset - len(np_prev_strength)),
|
||||
]
|
||||
)
|
||||
self.np_cur_strength = np.concatenate(
|
||||
[
|
||||
np.zeros(cf_offset),
|
||||
np_cur_strength,
|
||||
np.ones(crossfadeSize - cf_offset - len(np_cur_strength)),
|
||||
]
|
||||
)
|
||||
|
||||
print(f"Generated Strengths: for prev:{self.np_prev_strength.shape}, for cur:{self.np_cur_strength.shape}")
|
||||
print(
|
||||
f"Generated Strengths: for prev:{self.np_prev_strength.shape}, for cur:{self.np_cur_strength.shape}"
|
||||
)
|
||||
|
||||
# ひとつ前の結果とサイズが変わるため、記録は消去する。
|
||||
if hasattr(self, 'np_prev_audio1') == True:
|
||||
if hasattr(self, "np_prev_audio1") == True:
|
||||
delattr(self, "np_prev_audio1")
|
||||
if hasattr(self, "sola_buffer"):
|
||||
del self.sola_buffer
|
||||
|
||||
# receivedData: tuple of short
|
||||
def on_request(self, receivedData: AudioInOut) -> tuple[AudioInOut, list[Union[int, float]]]:
|
||||
def on_request(
|
||||
self, receivedData: AudioInOut
|
||||
) -> tuple[AudioInOut, list[Union[int, float]]]:
|
||||
return self.on_request_sola(receivedData)
|
||||
|
||||
def on_request_sola(self, receivedData: AudioInOut) -> tuple[AudioInOut, list[Union[int, float]]]:
|
||||
def on_request_sola(
|
||||
self, receivedData: AudioInOut
|
||||
) -> tuple[AudioInOut, list[Union[int, float]]]:
|
||||
try:
|
||||
processing_sampling_rate = self.voiceChanger.get_processing_sampling_rate()
|
||||
|
||||
# 前処理
|
||||
with Timer("pre-process") as t:
|
||||
if self.settings.inputSampleRate != processing_sampling_rate:
|
||||
newData = cast(AudioInOut, resampy.resample(receivedData, self.settings.inputSampleRate, processing_sampling_rate))
|
||||
newData = cast(
|
||||
AudioInOut,
|
||||
resampy.resample(
|
||||
receivedData,
|
||||
self.settings.inputSampleRate,
|
||||
processing_sampling_rate,
|
||||
),
|
||||
)
|
||||
else:
|
||||
newData = receivedData
|
||||
|
||||
@ -226,7 +265,9 @@ class VoiceChanger():
|
||||
crossfade_frame = min(self.settings.crossFadeOverlapSize, block_frame)
|
||||
self._generate_strength(crossfade_frame)
|
||||
|
||||
data = self.voiceChanger.generate_input(newData, block_frame, crossfade_frame, sola_search_frame)
|
||||
data = self.voiceChanger.generate_input(
|
||||
newData, block_frame, crossfade_frame, sola_search_frame
|
||||
)
|
||||
preprocess_time = t.secs
|
||||
|
||||
# 変換処理
|
||||
@ -234,15 +275,28 @@ class VoiceChanger():
|
||||
# Inference
|
||||
audio = self.voiceChanger.inference(data)
|
||||
|
||||
if hasattr(self, 'sola_buffer') == True:
|
||||
if hasattr(self, "sola_buffer") == True:
|
||||
np.set_printoptions(threshold=10000)
|
||||
audio = audio[-sola_search_frame - crossfade_frame - block_frame :]
|
||||
# SOLA algorithm from https://github.com/yxlllc/DDSP-SVC, https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI
|
||||
cor_nom = np.convolve(audio[: crossfade_frame + sola_search_frame], np.flip(self.sola_buffer), 'valid')
|
||||
cor_den = np.sqrt(np.convolve(audio[: crossfade_frame + sola_search_frame] ** 2, np.ones(crossfade_frame), 'valid') + 1e-3)
|
||||
cor_nom = np.convolve(
|
||||
audio[: crossfade_frame + sola_search_frame],
|
||||
np.flip(self.sola_buffer),
|
||||
"valid",
|
||||
)
|
||||
cor_den = np.sqrt(
|
||||
np.convolve(
|
||||
audio[: crossfade_frame + sola_search_frame] ** 2,
|
||||
np.ones(crossfade_frame),
|
||||
"valid",
|
||||
)
|
||||
+ 1e-3
|
||||
)
|
||||
sola_offset = np.argmax(cor_nom / cor_den)
|
||||
|
||||
output_wav = audio[sola_offset: sola_offset + block_frame].astype(np.float64)
|
||||
output_wav = audio[sola_offset : sola_offset + block_frame].astype(
|
||||
np.float64
|
||||
)
|
||||
output_wav[:crossfade_frame] *= self.np_cur_strength
|
||||
output_wav[:crossfade_frame] += self.sola_buffer[:]
|
||||
|
||||
@ -251,8 +305,16 @@ class VoiceChanger():
|
||||
print("[Voice Changer] no sola buffer. (You can ignore this.)")
|
||||
result = np.zeros(4096).astype(np.int16)
|
||||
|
||||
if hasattr(self, 'sola_buffer') == True and sola_offset < sola_search_frame:
|
||||
sola_buf_org = audio[- sola_search_frame - crossfade_frame + sola_offset: -sola_search_frame + sola_offset]
|
||||
if (
|
||||
hasattr(self, "sola_buffer") == True
|
||||
and sola_offset < sola_search_frame
|
||||
):
|
||||
sola_buf_org = audio[
|
||||
-sola_search_frame
|
||||
- crossfade_frame
|
||||
+ sola_offset : -sola_search_frame
|
||||
+ sola_offset
|
||||
]
|
||||
self.sola_buffer = sola_buf_org * self.np_prev_strength
|
||||
else:
|
||||
self.sola_buffer = audio[-crossfade_frame:] * self.np_prev_strength
|
||||
@ -263,12 +325,20 @@ class VoiceChanger():
|
||||
with Timer("post-process") as t:
|
||||
result = result.astype(np.int16)
|
||||
if self.settings.inputSampleRate != processing_sampling_rate:
|
||||
outputData = cast(AudioInOut, resampy.resample(result, processing_sampling_rate, self.settings.inputSampleRate).astype(np.int16))
|
||||
outputData = cast(
|
||||
AudioInOut,
|
||||
resampy.resample(
|
||||
result,
|
||||
processing_sampling_rate,
|
||||
self.settings.inputSampleRate,
|
||||
).astype(np.int16),
|
||||
)
|
||||
else:
|
||||
outputData = result
|
||||
|
||||
print_convert_processing(
|
||||
f" Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz")
|
||||
f" Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz"
|
||||
)
|
||||
|
||||
if self.settings.recordIO == 1:
|
||||
self.ioRecorder.writeInput(receivedData)
|
||||
@ -281,7 +351,9 @@ class VoiceChanger():
|
||||
# # f" Padded!, Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz")
|
||||
postprocess_time = t.secs
|
||||
|
||||
print_convert_processing(f" [fin] Input/Output size:{receivedData.shape[0]},{outputData.shape[0]}")
|
||||
print_convert_processing(
|
||||
f" [fin] Input/Output size:{receivedData.shape[0]},{outputData.shape[0]}"
|
||||
)
|
||||
perf = [preprocess_time, mainprocess_time, postprocess_time]
|
||||
return outputData, perf
|
||||
|
||||
@ -299,8 +371,9 @@ class VoiceChanger():
|
||||
def export2onnx(self):
|
||||
return self.voiceChanger.export2onnx()
|
||||
|
||||
|
||||
##############
|
||||
|
||||
|
||||
PRINT_CONVERT_PROCESSING: bool = False
|
||||
# PRINT_CONVERT_PROCESSING = True
|
||||
|
||||
@ -318,5 +391,7 @@ def pad_array(arr: AudioInOut, target_length: int):
|
||||
pad_width = target_length - current_length
|
||||
pad_left = pad_width // 2
|
||||
pad_right = pad_width - pad_left
|
||||
padded_arr = np.pad(arr, (pad_left, pad_right), 'constant', constant_values=(0, 0))
|
||||
padded_arr = np.pad(
|
||||
arr, (pad_left, pad_right), "constant", constant_values=(0, 0)
|
||||
)
|
||||
return padded_arr
|
||||
|
@ -1,12 +1,16 @@
|
||||
import numpy as np
|
||||
from voice_changer.VoiceChanger import VoiceChanger
|
||||
from const import ModelType
|
||||
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
||||
|
||||
|
||||
class VoiceChangerManager():
|
||||
class VoiceChangerManager(object):
|
||||
_instance = None
|
||||
voiceChanger: VoiceChanger = None
|
||||
|
||||
@classmethod
|
||||
def get_instance(cls, params):
|
||||
if not hasattr(cls, "_instance"):
|
||||
def get_instance(cls, params: VoiceChangerParams):
|
||||
if cls._instance is None:
|
||||
cls._instance = cls()
|
||||
cls._instance.voiceChanger = VoiceChanger(params)
|
||||
return cls._instance
|
||||
@ -20,7 +24,7 @@ class VoiceChangerManager():
|
||||
return info
|
||||
|
||||
def get_info(self):
|
||||
if hasattr(self, 'voiceChanger'):
|
||||
if hasattr(self, "voiceChanger"):
|
||||
info = self.voiceChanger.get_info()
|
||||
info["status"] = "OK"
|
||||
return info
|
||||
@ -28,7 +32,7 @@ class VoiceChangerManager():
|
||||
return {"status": "ERROR", "msg": "no model loaded"}
|
||||
|
||||
def update_settings(self, key: str, val: any):
|
||||
if hasattr(self, 'voiceChanger'):
|
||||
if hasattr(self, "voiceChanger"):
|
||||
info = self.voiceChanger.update_settings(key, val)
|
||||
info["status"] = "OK"
|
||||
return info
|
||||
@ -36,7 +40,7 @@ class VoiceChangerManager():
|
||||
return {"status": "ERROR", "msg": "no model loaded"}
|
||||
|
||||
def changeVoice(self, receivedData: any):
|
||||
if hasattr(self, 'voiceChanger') == True:
|
||||
if hasattr(self, "voiceChanger") is True:
|
||||
return self.voiceChanger.on_request(receivedData)
|
||||
else:
|
||||
print("Voice Change is not loaded. Did you load a correct model?")
|
||||
|
11
server/voice_changer/utils/VoiceChangerParams.py
Normal file
11
server/voice_changer/utils/VoiceChangerParams.py
Normal file
@ -0,0 +1,11 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class VoiceChangerParams():
|
||||
content_vec_500: str
|
||||
content_vec_500_onnx: str
|
||||
content_vec_500_onnx_on: bool
|
||||
hubert_base: str
|
||||
hubert_soft: str
|
||||
nsf_hifigan: str
|
Loading…
Reference in New Issue
Block a user