mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-01-23 21:45:00 +03:00
refactoring
This commit is contained in:
parent
a59631609c
commit
55118815b4
16
server/.vscode/settings.json
vendored
Normal file
16
server/.vscode/settings.json
vendored
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
{
|
||||||
|
"workbench.colorCustomizations": {
|
||||||
|
"tab.activeBackground": "#65952acc"
|
||||||
|
},
|
||||||
|
"python.formatting.provider": "black",
|
||||||
|
"python.linting.mypyEnabled": true,
|
||||||
|
"[python]": {
|
||||||
|
"editor.defaultFormatter": null, // Prettier を使わないようにする
|
||||||
|
"editor.formatOnSave": true // ファイル保存時に自動フォーマット
|
||||||
|
},
|
||||||
|
"flake8.args": [
|
||||||
|
"--ignore=E501"
|
||||||
|
// "--max-line-length=150",
|
||||||
|
// "--max-complexity=20"
|
||||||
|
]
|
||||||
|
}
|
@ -2,12 +2,12 @@ import sys
|
|||||||
|
|
||||||
from distutils.util import strtobool
|
from distutils.util import strtobool
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from dataclasses import dataclass
|
|
||||||
import misc.log_control
|
|
||||||
import socket
|
import socket
|
||||||
import platform
|
import platform
|
||||||
import os
|
import os
|
||||||
import argparse
|
import argparse
|
||||||
|
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
||||||
|
|
||||||
import uvicorn
|
import uvicorn
|
||||||
from mods.ssl import create_self_signed_cert
|
from mods.ssl import create_self_signed_cert
|
||||||
from voice_changer.VoiceChangerManager import VoiceChangerManager
|
from voice_changer.VoiceChangerManager import VoiceChangerManager
|
||||||
@ -21,30 +21,48 @@ import multiprocessing as mp
|
|||||||
def setupArgParser():
|
def setupArgParser():
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("-p", type=int, default=18888, help="port")
|
parser.add_argument("-p", type=int, default=18888, help="port")
|
||||||
parser.add_argument("--https", type=strtobool,
|
parser.add_argument("--https", type=strtobool, default=False, help="use https")
|
||||||
default=False, help="use https")
|
parser.add_argument(
|
||||||
parser.add_argument("--httpsKey", type=str,
|
"--httpsKey", type=str, default="ssl.key", help="path for the key of https"
|
||||||
default="ssl.key", help="path for the key of https")
|
)
|
||||||
parser.add_argument("--httpsCert", type=str,
|
parser.add_argument(
|
||||||
default="ssl.cert", help="path for the cert of https")
|
"--httpsCert", type=str, default="ssl.cert", help="path for the cert of https"
|
||||||
parser.add_argument("--httpsSelfSigned", type=strtobool,
|
)
|
||||||
default=True, help="generate self-signed certificate")
|
parser.add_argument(
|
||||||
|
"--httpsSelfSigned",
|
||||||
|
type=strtobool,
|
||||||
|
default=True,
|
||||||
|
help="generate self-signed certificate",
|
||||||
|
)
|
||||||
|
|
||||||
# parser.add_argument("--internal", type=strtobool, default=False, help="各種パスをmac appの中身に変換")
|
parser.add_argument(
|
||||||
|
"--content_vec_500", type=str, help="path to content_vec_500 model(pytorch)"
|
||||||
parser.add_argument("--content_vec_500", type=str, help="path to content_vec_500 model(pytorch)")
|
)
|
||||||
parser.add_argument("--content_vec_500_onnx", type=str, help="path to content_vec_500 model(onnx)")
|
parser.add_argument(
|
||||||
parser.add_argument("--content_vec_500_onnx_on", type=strtobool, default=False, help="use or not onnx for content_vec_500")
|
"--content_vec_500_onnx", type=str, help="path to content_vec_500 model(onnx)"
|
||||||
parser.add_argument("--hubert_base", type=str, help="path to hubert_base model(pytorch)")
|
)
|
||||||
parser.add_argument("--hubert_soft", type=str, help="path to hubert_soft model(pytorch)")
|
parser.add_argument(
|
||||||
parser.add_argument("--nsf_hifigan", type=str, help="path to nsf_hifigan model(pytorch)")
|
"--content_vec_500_onnx_on",
|
||||||
|
type=strtobool,
|
||||||
|
default=False,
|
||||||
|
help="use or not onnx for content_vec_500",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--hubert_base", type=str, help="path to hubert_base model(pytorch)"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--hubert_soft", type=str, help="path to hubert_soft model(pytorch)"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--nsf_hifigan", type=str, help="path to nsf_hifigan model(pytorch)"
|
||||||
|
)
|
||||||
|
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
def printMessage(message, level=0):
|
def printMessage(message, level=0):
|
||||||
pf = platform.system()
|
pf = platform.system()
|
||||||
if pf == 'Windows':
|
if pf == "Windows":
|
||||||
if level == 0:
|
if level == 0:
|
||||||
print(f"{message}")
|
print(f"{message}")
|
||||||
elif level == 1:
|
elif level == 1:
|
||||||
@ -78,37 +96,38 @@ def localServer():
|
|||||||
host="0.0.0.0",
|
host="0.0.0.0",
|
||||||
port=int(PORT),
|
port=int(PORT),
|
||||||
reload=False if hasattr(sys, "_MEIPASS") else True,
|
reload=False if hasattr(sys, "_MEIPASS") else True,
|
||||||
log_level="warning"
|
log_level="warning",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == 'MMVCServerSIO':
|
if __name__ == "MMVCServerSIO":
|
||||||
voiceChangerManager = VoiceChangerManager.get_instance({
|
voiceChangerParams = VoiceChangerParams(
|
||||||
"content_vec_500": args.content_vec_500,
|
content_vec_500=args.content_vec_500,
|
||||||
"content_vec_500_onnx": args.content_vec_500_onnx,
|
content_vec_500_onnx=args.content_vec_500_onnx,
|
||||||
"content_vec_500_onnx_on": args.content_vec_500_onnx_on,
|
content_vec_500_onnx_on=args.content_vec_500_onnx_on,
|
||||||
"hubert_base": args.hubert_base,
|
hubert_base=args.hubert_base,
|
||||||
"hubert_soft": args.hubert_soft,
|
hubert_soft=args.hubert_soft,
|
||||||
"nsf_hifigan": args.nsf_hifigan,
|
nsf_hifigan=args.nsf_hifigan,
|
||||||
})
|
)
|
||||||
|
voiceChangerManager = VoiceChangerManager.get_instance(voiceChangerParams)
|
||||||
|
print("voiceChangerManager", voiceChangerManager)
|
||||||
|
|
||||||
app_fastapi = MMVC_Rest.get_instance(voiceChangerManager)
|
app_fastapi = MMVC_Rest.get_instance(voiceChangerManager)
|
||||||
app_socketio = MMVC_SocketIOApp.get_instance(app_fastapi, voiceChangerManager)
|
app_socketio = MMVC_SocketIOApp.get_instance(app_fastapi, voiceChangerManager)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__mp_main__':
|
if __name__ == "__mp_main__":
|
||||||
printMessage(f"サーバプロセスを起動しています。", level=2)
|
printMessage("サーバプロセスを起動しています。", level=2)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
mp.freeze_support()
|
mp.freeze_support()
|
||||||
|
|
||||||
printMessage(f"Voice Changerを起動しています。", level=2)
|
printMessage("Voice Changerを起動しています。", level=2)
|
||||||
PORT = args.p
|
PORT = args.p
|
||||||
|
|
||||||
if os.getenv("EX_PORT"):
|
if os.getenv("EX_PORT"):
|
||||||
EX_PORT = os.environ["EX_PORT"]
|
EX_PORT = os.environ["EX_PORT"]
|
||||||
printMessage(
|
printMessage(f"External_Port:{EX_PORT} Internal_Port:{PORT}", level=1)
|
||||||
f"External_Port:{EX_PORT} Internal_Port:{PORT}", level=1)
|
|
||||||
else:
|
else:
|
||||||
printMessage(f"Internal_Port:{PORT}", level=1)
|
printMessage(f"Internal_Port:{PORT}", level=1)
|
||||||
|
|
||||||
@ -123,38 +142,42 @@ if __name__ == '__main__':
|
|||||||
key_base_name = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
key_base_name = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
||||||
keyname = f"{key_base_name}.key"
|
keyname = f"{key_base_name}.key"
|
||||||
certname = f"{key_base_name}.cert"
|
certname = f"{key_base_name}.cert"
|
||||||
create_self_signed_cert(certname, keyname, certargs={"Country": "JP",
|
create_self_signed_cert(
|
||||||
|
certname,
|
||||||
|
keyname,
|
||||||
|
certargs={
|
||||||
|
"Country": "JP",
|
||||||
"State": "Tokyo",
|
"State": "Tokyo",
|
||||||
"City": "Chuo-ku",
|
"City": "Chuo-ku",
|
||||||
"Organization": "F",
|
"Organization": "F",
|
||||||
"Org. Unit": "F"}, cert_dir=SSL_KEY_DIR)
|
"Org. Unit": "F",
|
||||||
|
},
|
||||||
|
cert_dir=SSL_KEY_DIR,
|
||||||
|
)
|
||||||
key_path = os.path.join(SSL_KEY_DIR, keyname)
|
key_path = os.path.join(SSL_KEY_DIR, keyname)
|
||||||
cert_path = os.path.join(SSL_KEY_DIR, certname)
|
cert_path = os.path.join(SSL_KEY_DIR, certname)
|
||||||
printMessage(
|
printMessage(
|
||||||
f"protocol: HTTPS(self-signed), key:{key_path}, cert:{cert_path}", level=1)
|
f"protocol: HTTPS(self-signed), key:{key_path}, cert:{cert_path}", level=1
|
||||||
|
)
|
||||||
|
|
||||||
elif args.https and args.httpsSelfSigned == 0:
|
elif args.https and args.httpsSelfSigned == 0:
|
||||||
# HTTPS
|
# HTTPS
|
||||||
key_path = args.httpsKey
|
key_path = args.httpsKey
|
||||||
cert_path = args.httpsCert
|
cert_path = args.httpsCert
|
||||||
printMessage(
|
printMessage(f"protocol: HTTPS, key:{key_path}, cert:{cert_path}", level=1)
|
||||||
f"protocol: HTTPS, key:{key_path}, cert:{cert_path}", level=1)
|
|
||||||
else:
|
else:
|
||||||
# HTTP
|
# HTTP
|
||||||
printMessage(f"protocol: HTTP", level=1)
|
printMessage("protocol: HTTP", level=1)
|
||||||
printMessage(f"-- ---- -- ", level=1)
|
printMessage("-- ---- -- ", level=1)
|
||||||
|
|
||||||
# アドレス表示
|
# アドレス表示
|
||||||
printMessage(
|
printMessage("ブラウザで次のURLを開いてください.", level=2)
|
||||||
f"ブラウザで次のURLを開いてください.", level=2)
|
|
||||||
if args.https == 1:
|
if args.https == 1:
|
||||||
printMessage(
|
printMessage("https://<IP>:<PORT>/", level=1)
|
||||||
f"https://<IP>:<PORT>/", level=1)
|
|
||||||
else:
|
else:
|
||||||
printMessage(
|
printMessage("http://<IP>:<PORT>/", level=1)
|
||||||
f"http://<IP>:<PORT>/", level=1)
|
|
||||||
|
|
||||||
printMessage(f"多くの場合は次のいずれかのURLにアクセスすると起動します。", level=2)
|
printMessage("多くの場合は次のいずれかのURLにアクセスすると起動します。", level=2)
|
||||||
if "EX_PORT" in locals() and "EX_IP" in locals(): # シェルスクリプト経由起動(docker)
|
if "EX_PORT" in locals() and "EX_IP" in locals(): # シェルスクリプト経由起動(docker)
|
||||||
if args.https == 1:
|
if args.https == 1:
|
||||||
printMessage(f"https://localhost:{EX_PORT}/", level=1)
|
printMessage(f"https://localhost:{EX_PORT}/", level=1)
|
||||||
@ -175,7 +198,7 @@ if __name__ == '__main__':
|
|||||||
# サーバ起動
|
# サーバ起動
|
||||||
if args.https:
|
if args.https:
|
||||||
# HTTPS サーバ起動
|
# HTTPS サーバ起動
|
||||||
res = uvicorn.run(
|
uvicorn.run(
|
||||||
f"{os.path.basename(__file__)[:-3]}:app_socketio",
|
f"{os.path.basename(__file__)[:-3]}:app_socketio",
|
||||||
host="0.0.0.0",
|
host="0.0.0.0",
|
||||||
port=int(PORT),
|
port=int(PORT),
|
||||||
@ -188,13 +211,17 @@ if __name__ == '__main__':
|
|||||||
p = mp.Process(name="p", target=localServer)
|
p = mp.Process(name="p", target=localServer)
|
||||||
p.start()
|
p.start()
|
||||||
try:
|
try:
|
||||||
if sys.platform.startswith('win'):
|
if sys.platform.startswith("win"):
|
||||||
process = subprocess.Popen([NATIVE_CLIENT_FILE_WIN, "-u", f"http://localhost:{PORT}/"])
|
process = subprocess.Popen(
|
||||||
|
[NATIVE_CLIENT_FILE_WIN, "-u", f"http://localhost:{PORT}/"]
|
||||||
|
)
|
||||||
return_code = process.wait()
|
return_code = process.wait()
|
||||||
print("client closed.")
|
print("client closed.")
|
||||||
p.terminate()
|
p.terminate()
|
||||||
elif sys.platform.startswith('darwin'):
|
elif sys.platform.startswith("darwin"):
|
||||||
process = subprocess.Popen([NATIVE_CLIENT_FILE_MAC, "-u", f"http://localhost:{PORT}/"])
|
process = subprocess.Popen(
|
||||||
|
[NATIVE_CLIENT_FILE_MAC, "-u", f"http://localhost:{PORT}/"]
|
||||||
|
)
|
||||||
return_code = process.wait()
|
return_code = process.wait()
|
||||||
print("client closed.")
|
print("client closed.")
|
||||||
p.terminate()
|
p.terminate()
|
||||||
|
@ -26,14 +26,6 @@ TMP_DIR = os.path.join(tmpdir.name, "tmp_dir") if hasattr(sys, "_MEIPASS") else
|
|||||||
os.makedirs(TMP_DIR, exist_ok=True)
|
os.makedirs(TMP_DIR, exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
# modelType: ModelType = "MMVCv15"
|
|
||||||
# def getModelType() -> ModelType:
|
|
||||||
# return modelType
|
|
||||||
# def setModelType(_modelType: ModelType):
|
|
||||||
# global modelType
|
|
||||||
# modelType = _modelType
|
|
||||||
|
|
||||||
|
|
||||||
def getFrontendPath():
|
def getFrontendPath():
|
||||||
frontend_path = os.path.join(sys._MEIPASS, "dist") if hasattr(sys, "_MEIPASS") else "../client/demo/dist"
|
frontend_path = os.path.join(sys._MEIPASS, "dist") if hasattr(sys, "_MEIPASS") else "../client/demo/dist"
|
||||||
return frontend_path
|
return frontend_path
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
from fastapi import FastAPI, Request, Response
|
from fastapi import FastAPI, Request, Response, HTTPException
|
||||||
from fastapi.routing import APIRoute
|
from fastapi.routing import APIRoute
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
from fastapi.staticfiles import StaticFiles
|
from fastapi.staticfiles import StaticFiles
|
||||||
|
from fastapi.exceptions import RequestValidationError
|
||||||
from typing import Callable
|
from typing import Callable
|
||||||
from voice_changer.VoiceChangerManager import VoiceChangerManager
|
from voice_changer.VoiceChangerManager import VoiceChangerManager
|
||||||
|
|
||||||
@ -18,7 +19,7 @@ class ValidationErrorLoggingRoute(APIRoute):
|
|||||||
async def custom_route_handler(request: Request) -> Response:
|
async def custom_route_handler(request: Request) -> Response:
|
||||||
try:
|
try:
|
||||||
return await original_route_handler(request)
|
return await original_route_handler(request)
|
||||||
except Exception as exc:
|
except RequestValidationError as exc:
|
||||||
print("Exception", request.url, str(exc))
|
print("Exception", request.url, str(exc))
|
||||||
body = await request.body()
|
body = await request.body()
|
||||||
detail = {"errors": exc.errors(), "body": body.decode()}
|
detail = {"errors": exc.errors(), "body": body.decode()}
|
||||||
@ -28,10 +29,11 @@ class ValidationErrorLoggingRoute(APIRoute):
|
|||||||
|
|
||||||
|
|
||||||
class MMVC_Rest:
|
class MMVC_Rest:
|
||||||
|
_instance = None
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_instance(cls, voiceChangerManager: VoiceChangerManager):
|
def get_instance(cls, voiceChangerManager: VoiceChangerManager):
|
||||||
if not hasattr(cls, "_instance"):
|
if cls._instance is None:
|
||||||
app_fastapi = FastAPI()
|
app_fastapi = FastAPI()
|
||||||
app_fastapi.router.route_class = ValidationErrorLoggingRoute
|
app_fastapi.router.route_class = ValidationErrorLoggingRoute
|
||||||
app_fastapi.add_middleware(
|
app_fastapi.add_middleware(
|
||||||
@ -43,15 +45,25 @@ class MMVC_Rest:
|
|||||||
)
|
)
|
||||||
|
|
||||||
app_fastapi.mount(
|
app_fastapi.mount(
|
||||||
"/front", StaticFiles(directory=f'{getFrontendPath()}', html=True), name="static")
|
"/front",
|
||||||
|
StaticFiles(directory=f"{getFrontendPath()}", html=True),
|
||||||
|
name="static",
|
||||||
|
)
|
||||||
|
|
||||||
app_fastapi.mount(
|
app_fastapi.mount(
|
||||||
"/trainer", StaticFiles(directory=f'{getFrontendPath()}', html=True), name="static")
|
"/trainer",
|
||||||
|
StaticFiles(directory=f"{getFrontendPath()}", html=True),
|
||||||
|
name="static",
|
||||||
|
)
|
||||||
|
|
||||||
app_fastapi.mount(
|
app_fastapi.mount(
|
||||||
"/recorder", StaticFiles(directory=f'{getFrontendPath()}', html=True), name="static")
|
"/recorder",
|
||||||
|
StaticFiles(directory=f"{getFrontendPath()}", html=True),
|
||||||
|
name="static",
|
||||||
|
)
|
||||||
app_fastapi.mount(
|
app_fastapi.mount(
|
||||||
"/tmp", StaticFiles(directory=f'{TMP_DIR}'), name="static")
|
"/tmp", StaticFiles(directory=f"{TMP_DIR}"), name="static"
|
||||||
|
)
|
||||||
|
|
||||||
restHello = MMVC_Rest_Hello()
|
restHello = MMVC_Rest_Hello()
|
||||||
app_fastapi.include_router(restHello.router)
|
app_fastapi.include_router(restHello.router)
|
||||||
|
@ -4,12 +4,13 @@ from typing import Union
|
|||||||
from fastapi import APIRouter
|
from fastapi import APIRouter
|
||||||
from fastapi.encoders import jsonable_encoder
|
from fastapi.encoders import jsonable_encoder
|
||||||
from fastapi.responses import JSONResponse
|
from fastapi.responses import JSONResponse
|
||||||
from fastapi import HTTPException, FastAPI, UploadFile, File, Form
|
from fastapi import UploadFile, File, Form
|
||||||
|
|
||||||
from restapi.mods.FileUploader import upload_file, concat_file_chunks
|
from restapi.mods.FileUploader import upload_file, concat_file_chunks
|
||||||
from voice_changer.VoiceChangerManager import VoiceChangerManager
|
from voice_changer.VoiceChangerManager import VoiceChangerManager
|
||||||
|
|
||||||
from const import MODEL_DIR, UPLOAD_DIR, ModelType
|
from const import MODEL_DIR, UPLOAD_DIR, ModelType
|
||||||
|
|
||||||
os.makedirs(UPLOAD_DIR, exist_ok=True)
|
os.makedirs(UPLOAD_DIR, exist_ok=True)
|
||||||
os.makedirs(MODEL_DIR, exist_ok=True)
|
os.makedirs(MODEL_DIR, exist_ok=True)
|
||||||
|
|
||||||
@ -19,12 +20,22 @@ class MMVC_Rest_Fileuploader:
|
|||||||
self.voiceChangerManager = voiceChangerManager
|
self.voiceChangerManager = voiceChangerManager
|
||||||
self.router = APIRouter()
|
self.router = APIRouter()
|
||||||
self.router.add_api_route("/info", self.get_info, methods=["GET"])
|
self.router.add_api_route("/info", self.get_info, methods=["GET"])
|
||||||
self.router.add_api_route("/upload_file", self.post_upload_file, methods=["POST"])
|
self.router.add_api_route(
|
||||||
self.router.add_api_route("/concat_uploaded_file", self.post_concat_uploaded_file, methods=["POST"])
|
"/upload_file", self.post_upload_file, methods=["POST"]
|
||||||
self.router.add_api_route("/update_settings", self.post_update_settings, methods=["POST"])
|
)
|
||||||
|
self.router.add_api_route(
|
||||||
|
"/concat_uploaded_file", self.post_concat_uploaded_file, methods=["POST"]
|
||||||
|
)
|
||||||
|
self.router.add_api_route(
|
||||||
|
"/update_settings", self.post_update_settings, methods=["POST"]
|
||||||
|
)
|
||||||
self.router.add_api_route("/load_model", self.post_load_model, methods=["POST"])
|
self.router.add_api_route("/load_model", self.post_load_model, methods=["POST"])
|
||||||
self.router.add_api_route("/load_model_for_train", self.post_load_model_for_train, methods=["POST"])
|
self.router.add_api_route(
|
||||||
self.router.add_api_route("/extract_voices", self.post_extract_voices, methods=["POST"])
|
"/load_model_for_train", self.post_load_model_for_train, methods=["POST"]
|
||||||
|
)
|
||||||
|
self.router.add_api_route(
|
||||||
|
"/extract_voices", self.post_extract_voices, methods=["POST"]
|
||||||
|
)
|
||||||
self.router.add_api_route("/model_type", self.post_model_type, methods=["POST"])
|
self.router.add_api_route("/model_type", self.post_model_type, methods=["POST"])
|
||||||
self.router.add_api_route("/model_type", self.get_model_type, methods=["GET"])
|
self.router.add_api_route("/model_type", self.get_model_type, methods=["GET"])
|
||||||
self.router.add_api_route("/onnx", self.get_onnx, methods=["GET"])
|
self.router.add_api_route("/onnx", self.get_onnx, methods=["GET"])
|
||||||
@ -34,9 +45,13 @@ class MMVC_Rest_Fileuploader:
|
|||||||
json_compatible_item_data = jsonable_encoder(res)
|
json_compatible_item_data = jsonable_encoder(res)
|
||||||
return JSONResponse(content=json_compatible_item_data)
|
return JSONResponse(content=json_compatible_item_data)
|
||||||
|
|
||||||
def post_concat_uploaded_file(self, filename: str = Form(...), filenameChunkNum: int = Form(...)):
|
def post_concat_uploaded_file(
|
||||||
|
self, filename: str = Form(...), filenameChunkNum: int = Form(...)
|
||||||
|
):
|
||||||
slot = 0
|
slot = 0
|
||||||
res = concat_file_chunks(slot, UPLOAD_DIR, filename, filenameChunkNum, UPLOAD_DIR)
|
res = concat_file_chunks(
|
||||||
|
slot, UPLOAD_DIR, filename, filenameChunkNum, UPLOAD_DIR
|
||||||
|
)
|
||||||
json_compatible_item_data = jsonable_encoder(res)
|
json_compatible_item_data = jsonable_encoder(res)
|
||||||
return JSONResponse(content=json_compatible_item_data)
|
return JSONResponse(content=json_compatible_item_data)
|
||||||
|
|
||||||
@ -45,7 +60,9 @@ class MMVC_Rest_Fileuploader:
|
|||||||
json_compatible_item_data = jsonable_encoder(info)
|
json_compatible_item_data = jsonable_encoder(info)
|
||||||
return JSONResponse(content=json_compatible_item_data)
|
return JSONResponse(content=json_compatible_item_data)
|
||||||
|
|
||||||
def post_update_settings(self, key: str = Form(...), val: Union[int, str, float] = Form(...)):
|
def post_update_settings(
|
||||||
|
self, key: str = Form(...), val: Union[int, str, float] = Form(...)
|
||||||
|
):
|
||||||
print("post_update_settings", key, val)
|
print("post_update_settings", key, val)
|
||||||
info = self.voiceChangerManager.update_settings(key, val)
|
info = self.voiceChangerManager.update_settings(key, val)
|
||||||
json_compatible_item_data = jsonable_encoder(info)
|
json_compatible_item_data = jsonable_encoder(info)
|
||||||
@ -63,7 +80,6 @@ class MMVC_Rest_Fileuploader:
|
|||||||
isHalf: bool = Form(...),
|
isHalf: bool = Form(...),
|
||||||
params: str = Form(...),
|
params: str = Form(...),
|
||||||
):
|
):
|
||||||
|
|
||||||
props = {
|
props = {
|
||||||
"slot": slot,
|
"slot": slot,
|
||||||
"isHalf": isHalf,
|
"isHalf": isHalf,
|
||||||
@ -73,9 +89,9 @@ class MMVC_Rest_Fileuploader:
|
|||||||
"onnxModelFilename": onnxModelFilename,
|
"onnxModelFilename": onnxModelFilename,
|
||||||
"clusterTorchModelFilename": clusterTorchModelFilename,
|
"clusterTorchModelFilename": clusterTorchModelFilename,
|
||||||
"featureFilename": featureFilename,
|
"featureFilename": featureFilename,
|
||||||
"indexFilename": indexFilename
|
"indexFilename": indexFilename,
|
||||||
},
|
},
|
||||||
"params": params
|
"params": params,
|
||||||
}
|
}
|
||||||
# Change Filepath
|
# Change Filepath
|
||||||
for key, val in props["files"].items():
|
for key, val in props["files"].items():
|
||||||
@ -103,9 +119,11 @@ class MMVC_Rest_Fileuploader:
|
|||||||
modelDFilenameChunkNum: int = Form(...),
|
modelDFilenameChunkNum: int = Form(...),
|
||||||
):
|
):
|
||||||
modelGFilePath = concat_file_chunks(
|
modelGFilePath = concat_file_chunks(
|
||||||
UPLOAD_DIR, modelGFilename, modelGFilenameChunkNum, MODEL_DIR)
|
UPLOAD_DIR, modelGFilename, modelGFilenameChunkNum, MODEL_DIR
|
||||||
|
)
|
||||||
modelDFilePath = concat_file_chunks(
|
modelDFilePath = concat_file_chunks(
|
||||||
UPLOAD_DIR, modelDFilename, modelDFilenameChunkNum, MODEL_DIR)
|
UPLOAD_DIR, modelDFilename, modelDFilenameChunkNum, MODEL_DIR
|
||||||
|
)
|
||||||
return {"File saved": f"{modelGFilePath}, {modelDFilePath}"}
|
return {"File saved": f"{modelGFilePath}, {modelDFilePath}"}
|
||||||
|
|
||||||
def post_extract_voices(
|
def post_extract_voices(
|
||||||
@ -114,7 +132,8 @@ class MMVC_Rest_Fileuploader:
|
|||||||
zipFileChunkNum: int = Form(...),
|
zipFileChunkNum: int = Form(...),
|
||||||
):
|
):
|
||||||
zipFilePath = concat_file_chunks(
|
zipFilePath = concat_file_chunks(
|
||||||
UPLOAD_DIR, zipFilename, zipFileChunkNum, UPLOAD_DIR)
|
UPLOAD_DIR, zipFilename, zipFileChunkNum, UPLOAD_DIR
|
||||||
|
)
|
||||||
shutil.unpack_archive(zipFilePath, "MMVC_Trainer/dataset/textful/")
|
shutil.unpack_archive(zipFilePath, "MMVC_Trainer/dataset/textful/")
|
||||||
return {"Zip file unpacked": f"{zipFilePath}"}
|
return {"Zip file unpacked": f"{zipFilePath}"}
|
||||||
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
from fastapi import APIRouter
|
from fastapi import APIRouter
|
||||||
from fastapi.encoders import jsonable_encoder
|
|
||||||
from fastapi.responses import JSONResponse
|
|
||||||
class MMVC_Rest_Hello:
|
class MMVC_Rest_Hello:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.router = APIRouter()
|
self.router = APIRouter()
|
||||||
@ -8,6 +8,3 @@ class MMVC_Rest_Hello:
|
|||||||
|
|
||||||
def hello(self):
|
def hello(self):
|
||||||
return {"result": "Index"}
|
return {"result": "Index"}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -31,24 +31,24 @@ class MMVC_Rest_VoiceChanger:
|
|||||||
buffer = voice.buffer
|
buffer = voice.buffer
|
||||||
wav = base64.b64decode(buffer)
|
wav = base64.b64decode(buffer)
|
||||||
|
|
||||||
if wav == 0:
|
# if wav == 0:
|
||||||
samplerate, data = read("dummy.wav")
|
# samplerate, data = read("dummy.wav")
|
||||||
unpackedData = data
|
# unpackedData = data
|
||||||
else:
|
# else:
|
||||||
unpackedData = np.array(struct.unpack(
|
# unpackedData = np.array(
|
||||||
'<%sh' % (len(wav) // struct.calcsize('<h')), wav))
|
# struct.unpack("<%sh" % (len(wav) // struct.calcsize("<h")), wav)
|
||||||
# write("logs/received_data.wav", 24000,
|
# )
|
||||||
# unpackedData.astype(np.int16))
|
|
||||||
|
unpackedData = np.array(
|
||||||
|
struct.unpack("<%sh" % (len(wav) // struct.calcsize("<h")), wav)
|
||||||
|
)
|
||||||
|
|
||||||
self.tlock.acquire()
|
self.tlock.acquire()
|
||||||
changedVoice = self.voiceChangerManager.changeVoice(unpackedData)
|
changedVoice = self.voiceChangerManager.changeVoice(unpackedData)
|
||||||
self.tlock.release()
|
self.tlock.release()
|
||||||
|
|
||||||
changedVoiceBase64 = base64.b64encode(changedVoice[0]).decode('utf-8')
|
changedVoiceBase64 = base64.b64encode(changedVoice[0]).decode("utf-8")
|
||||||
data = {
|
data = {"timestamp": timestamp, "changedVoiceBase64": changedVoiceBase64}
|
||||||
"timestamp": timestamp,
|
|
||||||
"changedVoiceBase64": changedVoiceBase64
|
|
||||||
}
|
|
||||||
|
|
||||||
json_compatible_item_data = jsonable_encoder(data)
|
json_compatible_item_data = jsonable_encoder(data)
|
||||||
return JSONResponse(content=json_compatible_item_data)
|
return JSONResponse(content=json_compatible_item_data)
|
||||||
|
@ -1,40 +0,0 @@
|
|||||||
import os
|
|
||||||
import numpy as np
|
|
||||||
import pylab
|
|
||||||
import librosa
|
|
||||||
import librosa.display
|
|
||||||
import pyworld as pw
|
|
||||||
|
|
||||||
|
|
||||||
class IOAnalyzer:
|
|
||||||
|
|
||||||
def _get_f0_dio(self, y, sr):
|
|
||||||
_f0, time = pw.dio(y, sr, frame_period=5)
|
|
||||||
f0 = pw.stonemask(y, _f0, time, sr)
|
|
||||||
time = np.linspace(0, y.shape[0] / sr, len(time))
|
|
||||||
return f0, time
|
|
||||||
|
|
||||||
def _get_f0_harvest(self, y, sr):
|
|
||||||
_f0, time = pw.harvest(y, sr, frame_period=5)
|
|
||||||
f0 = pw.stonemask(y, _f0, time, sr)
|
|
||||||
time = np.linspace(0, y.shape[0] / sr, len(time))
|
|
||||||
return f0, time
|
|
||||||
|
|
||||||
def analyze(self, inputDataFile: str, dioImageFile: str, harvestImageFile: str, samplingRate: int):
|
|
||||||
y, sr = librosa.load(inputDataFile, samplingRate)
|
|
||||||
y = y.astype(np.float64)
|
|
||||||
spec = librosa.amplitude_to_db(np.abs(librosa.stft(y, n_fft=2048, win_length=2048, hop_length=128)), ref=np.max)
|
|
||||||
f0_dio, times = self._get_f0_dio(y, sr=samplingRate)
|
|
||||||
f0_harvest, times = self._get_f0_harvest(y, sr=samplingRate)
|
|
||||||
|
|
||||||
pylab.close()
|
|
||||||
HOP_LENGTH = 128
|
|
||||||
img = librosa.display.specshow(spec, sr=samplingRate, hop_length=HOP_LENGTH, x_axis='time', y_axis='log', )
|
|
||||||
pylab.plot(times, f0_dio, label='f0', color=(0, 1, 1, 0.6), linewidth=3)
|
|
||||||
pylab.savefig(dioImageFile)
|
|
||||||
|
|
||||||
pylab.close()
|
|
||||||
HOP_LENGTH = 128
|
|
||||||
img = librosa.display.specshow(spec, sr=samplingRate, hop_length=HOP_LENGTH, x_axis='time', y_axis='log', )
|
|
||||||
pylab.plot(times, f0_harvest, label='f0', color=(0, 1, 1, 0.6), linewidth=3)
|
|
||||||
pylab.savefig(harvestImageFile)
|
|
@ -30,13 +30,15 @@ class ModelWrapper:
|
|||||||
self.embChannels = metadata["embChannels"]
|
self.embChannels = metadata["embChannels"]
|
||||||
self.modelType = metadata["modelType"]
|
self.modelType = metadata["modelType"]
|
||||||
self.deprecated = False
|
self.deprecated = False
|
||||||
print(f"[Voice Changer] Onnx metadata: sr:{self.samplingRate}, f0:{self.f0}")
|
self.embedder = metadata["embedder"] if "embedder" in metadata else "hubert_base"
|
||||||
|
print(f"[Voice Changer] Onnx metadata: sr:{self.samplingRate}, f0:{self.f0}, embedder:{self.embedder}")
|
||||||
except:
|
except:
|
||||||
self.samplingRate = 48000
|
self.samplingRate = 48000
|
||||||
self.f0 = True
|
self.f0 = True
|
||||||
self.embChannels = 256
|
self.embChannels = 256
|
||||||
self.modelType = 0
|
self.modelType = 0
|
||||||
self.deprecated = True
|
self.deprecated = True
|
||||||
|
self.embedder = "hubert_base"
|
||||||
print(f"[Voice Changer] ############## !!!! CAUTION !!!! ####################")
|
print(f"[Voice Changer] ############## !!!! CAUTION !!!! ####################")
|
||||||
print(f"[Voice Changer] This onnx's version is depricated. Please regenerate onnxfile. Fallback to default")
|
print(f"[Voice Changer] This onnx's version is depricated. Please regenerate onnxfile. Fallback to default")
|
||||||
print(f"[Voice Changer] Onnx metadata: sr:{self.samplingRate}, f0:{self.f0}")
|
print(f"[Voice Changer] Onnx metadata: sr:{self.samplingRate}, f0:{self.f0}")
|
||||||
@ -57,6 +59,9 @@ class ModelWrapper:
|
|||||||
def getDeprecated(self):
|
def getDeprecated(self):
|
||||||
return self.deprecated
|
return self.deprecated
|
||||||
|
|
||||||
|
def getEmbedder(self):
|
||||||
|
return self.embedder
|
||||||
|
|
||||||
def set_providers(self, providers, provider_options=[{}]):
|
def set_providers(self, providers, provider_options=[{}]):
|
||||||
self.onnx_session.set_providers(providers=providers, provider_options=provider_options)
|
self.onnx_session.set_providers(providers=providers, provider_options=provider_options)
|
||||||
|
|
||||||
|
@ -4,11 +4,12 @@ import json
|
|||||||
import resampy
|
import resampy
|
||||||
from voice_changer.RVC.ModelWrapper import ModelWrapper
|
from voice_changer.RVC.ModelWrapper import ModelWrapper
|
||||||
from Exceptions import NoModeLoadedException
|
from Exceptions import NoModeLoadedException
|
||||||
|
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
||||||
|
|
||||||
# avoiding parse arg error in RVC
|
# avoiding parse arg error in RVC
|
||||||
sys.argv = ["MMVCServerSIO.py"]
|
sys.argv = ["MMVCServerSIO.py"]
|
||||||
|
|
||||||
if sys.platform.startswith('darwin'):
|
if sys.platform.startswith("darwin"):
|
||||||
baseDir = [x for x in sys.path if x.endswith("Contents/MacOS")]
|
baseDir = [x for x in sys.path if x.endswith("Contents/MacOS")]
|
||||||
if len(baseDir) != 1:
|
if len(baseDir) != 1:
|
||||||
print("baseDir should be only one ", baseDir)
|
print("baseDir should be only one ", baseDir)
|
||||||
@ -24,6 +25,7 @@ from functools import reduce
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
import onnxruntime
|
import onnxruntime
|
||||||
|
|
||||||
# onnxruntime.set_default_logger_severity(3)
|
# onnxruntime.set_default_logger_severity(3)
|
||||||
from const import HUBERT_ONNX_MODEL_PATH, TMP_DIR
|
from const import HUBERT_ONNX_MODEL_PATH, TMP_DIR
|
||||||
|
|
||||||
@ -36,11 +38,17 @@ from .models import SynthesizerTrnMsNSFsidNono as SynthesizerTrnMsNSFsidNono_web
|
|||||||
|
|
||||||
from .const import RVC_MODEL_TYPE_RVC, RVC_MODEL_TYPE_WEBUI
|
from .const import RVC_MODEL_TYPE_RVC, RVC_MODEL_TYPE_WEBUI
|
||||||
from fairseq import checkpoint_utils
|
from fairseq import checkpoint_utils
|
||||||
providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
|
|
||||||
|
providers = [
|
||||||
|
"OpenVINOExecutionProvider",
|
||||||
|
"CUDAExecutionProvider",
|
||||||
|
"DmlExecutionProvider",
|
||||||
|
"CPUExecutionProvider",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ModelSlot():
|
class ModelSlot:
|
||||||
pyTorchModelFile: str = ""
|
pyTorchModelFile: str = ""
|
||||||
onnxModelFile: str = ""
|
onnxModelFile: str = ""
|
||||||
featureFile: str = ""
|
featureFile: str = ""
|
||||||
@ -51,13 +59,11 @@ class ModelSlot():
|
|||||||
f0: bool = True
|
f0: bool = True
|
||||||
embChannels: int = 256
|
embChannels: int = 256
|
||||||
deprecated: bool = False
|
deprecated: bool = False
|
||||||
# samplingRateOnnx: int = -1
|
embedder: str = "hubert_base" # "hubert_base", "contentvec", "distilhubert"
|
||||||
# f0Onnx: bool = True
|
|
||||||
# embChannelsOnnx: int = 256
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class RVCSettings():
|
class RVCSettings:
|
||||||
gpu: int = 0
|
gpu: int = 0
|
||||||
dstId: int = 0
|
dstId: int = 0
|
||||||
|
|
||||||
@ -72,9 +78,7 @@ class RVCSettings():
|
|||||||
onnxModelFile: str = ""
|
onnxModelFile: str = ""
|
||||||
configFile: str = ""
|
configFile: str = ""
|
||||||
modelSlots: list[ModelSlot] = field(
|
modelSlots: list[ModelSlot] = field(
|
||||||
default_factory=lambda: [
|
default_factory=lambda: [ModelSlot(), ModelSlot(), ModelSlot()]
|
||||||
ModelSlot(), ModelSlot(), ModelSlot()
|
|
||||||
]
|
|
||||||
)
|
)
|
||||||
indexRatio: float = 0
|
indexRatio: float = 0
|
||||||
rvcQuality: int = 0
|
rvcQuality: int = 0
|
||||||
@ -82,23 +86,28 @@ class RVCSettings():
|
|||||||
modelSamplingRate: int = 48000
|
modelSamplingRate: int = 48000
|
||||||
modelSlotIndex: int = -1
|
modelSlotIndex: int = -1
|
||||||
|
|
||||||
speakers: dict[str, int] = field(
|
speakers: dict[str, int] = field(default_factory=lambda: {})
|
||||||
default_factory=lambda: {}
|
|
||||||
)
|
|
||||||
|
|
||||||
# ↓mutableな物だけ列挙
|
# ↓mutableな物だけ列挙
|
||||||
intData = ["gpu", "dstId", "tran", "extraConvertSize", "rvcQuality", "modelSamplingRate", "silenceFront", "modelSlotIndex"]
|
intData = [
|
||||||
|
"gpu",
|
||||||
|
"dstId",
|
||||||
|
"tran",
|
||||||
|
"extraConvertSize",
|
||||||
|
"rvcQuality",
|
||||||
|
"modelSamplingRate",
|
||||||
|
"silenceFront",
|
||||||
|
"modelSlotIndex",
|
||||||
|
]
|
||||||
floatData = ["silentThreshold", "indexRatio"]
|
floatData = ["silentThreshold", "indexRatio"]
|
||||||
strData = ["framework", "f0Detector"]
|
strData = ["framework", "f0Detector"]
|
||||||
|
|
||||||
|
|
||||||
class RVC:
|
class RVC:
|
||||||
def __init__(self, params):
|
def __init__(self, params: VoiceChangerParams):
|
||||||
self.initialLoad = True
|
self.initialLoad = True
|
||||||
self.settings = RVCSettings()
|
self.settings = RVCSettings()
|
||||||
|
|
||||||
self.inferenceing: bool = False
|
|
||||||
|
|
||||||
self.net_g = None
|
self.net_g = None
|
||||||
self.onnx_session = None
|
self.onnx_session = None
|
||||||
self.feature_file = None
|
self.feature_file = None
|
||||||
@ -108,7 +117,10 @@ class RVC:
|
|||||||
self.prevVol = 0
|
self.prevVol = 0
|
||||||
self.params = params
|
self.params = params
|
||||||
|
|
||||||
self.mps_enabled: bool = getattr(torch.backends, "mps", None) is not None and torch.backends.mps.is_available()
|
self.mps_enabled: bool = (
|
||||||
|
getattr(torch.backends, "mps", None) is not None
|
||||||
|
and torch.backends.mps.is_available()
|
||||||
|
)
|
||||||
self.currentSlot = -1
|
self.currentSlot = -1
|
||||||
print("RVC initialization: ", params)
|
print("RVC initialization: ", params)
|
||||||
print("mps: ", self.mps_enabled)
|
print("mps: ", self.mps_enabled)
|
||||||
@ -120,26 +132,41 @@ class RVC:
|
|||||||
params = json.loads(params_str)
|
params = json.loads(params_str)
|
||||||
|
|
||||||
newSlot = asdict(self.settings.modelSlots[tmp_slot])
|
newSlot = asdict(self.settings.modelSlots[tmp_slot])
|
||||||
newSlot.update({
|
newSlot.update(
|
||||||
|
{
|
||||||
"pyTorchModelFile": props["files"]["pyTorchModelFilename"],
|
"pyTorchModelFile": props["files"]["pyTorchModelFilename"],
|
||||||
"onnxModelFile": props["files"]["onnxModelFilename"],
|
"onnxModelFile": props["files"]["onnxModelFilename"],
|
||||||
"featureFile": props["files"]["featureFilename"],
|
"featureFile": props["files"]["featureFilename"],
|
||||||
"indexFile": props["files"]["indexFilename"],
|
"indexFile": props["files"]["indexFilename"],
|
||||||
"defaultTrans": params["trans"]
|
"defaultTrans": params["trans"],
|
||||||
})
|
}
|
||||||
|
)
|
||||||
self.settings.modelSlots[tmp_slot] = ModelSlot(**newSlot)
|
self.settings.modelSlots[tmp_slot] = ModelSlot(**newSlot)
|
||||||
|
|
||||||
print("[Voice Changer] RVC loading... slot:", tmp_slot)
|
print("[Voice Changer] RVC loading... slot:", tmp_slot)
|
||||||
|
|
||||||
# Load metadata
|
# Load metadata
|
||||||
if self.settings.modelSlots[tmp_slot].pyTorchModelFile != None and self.settings.modelSlots[tmp_slot].pyTorchModelFile != "":
|
if (
|
||||||
self._setInfoByPytorch(tmp_slot, self.settings.modelSlots[tmp_slot].pyTorchModelFile)
|
self.settings.modelSlots[tmp_slot].pyTorchModelFile != None
|
||||||
if self.settings.modelSlots[tmp_slot].onnxModelFile != None and self.settings.modelSlots[tmp_slot].onnxModelFile != "":
|
and self.settings.modelSlots[tmp_slot].pyTorchModelFile != ""
|
||||||
self._setInfoByONNX(tmp_slot, self.settings.modelSlots[tmp_slot].onnxModelFile)
|
):
|
||||||
|
self._setInfoByPytorch(
|
||||||
|
tmp_slot, self.settings.modelSlots[tmp_slot].pyTorchModelFile
|
||||||
|
)
|
||||||
|
if (
|
||||||
|
self.settings.modelSlots[tmp_slot].onnxModelFile != None
|
||||||
|
and self.settings.modelSlots[tmp_slot].onnxModelFile != ""
|
||||||
|
):
|
||||||
|
self._setInfoByONNX(
|
||||||
|
tmp_slot, self.settings.modelSlots[tmp_slot].onnxModelFile
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
hubert_path = self.params["hubert_base"]
|
hubert_path = self.params.hubert_base
|
||||||
models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task([hubert_path], suffix="",)
|
models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
|
||||||
|
[hubert_path],
|
||||||
|
suffix="",
|
||||||
|
)
|
||||||
model = models[0]
|
model = models[0]
|
||||||
model.eval()
|
model.eval()
|
||||||
if self.is_half:
|
if self.is_half:
|
||||||
@ -164,13 +191,21 @@ class RVC:
|
|||||||
if config_len == 18:
|
if config_len == 18:
|
||||||
self.settings.modelSlots[slot].modelType = RVC_MODEL_TYPE_RVC
|
self.settings.modelSlots[slot].modelType = RVC_MODEL_TYPE_RVC
|
||||||
self.settings.modelSlots[slot].embChannels = 256
|
self.settings.modelSlots[slot].embChannels = 256
|
||||||
|
self.settings.modelSlots[slot].embedder = "hubert_base"
|
||||||
else:
|
else:
|
||||||
self.settings.modelSlots[slot].modelType = RVC_MODEL_TYPE_WEBUI
|
self.settings.modelSlots[slot].modelType = RVC_MODEL_TYPE_WEBUI
|
||||||
self.settings.modelSlots[slot].embChannels = cpt["config"][17]
|
self.settings.modelSlots[slot].embChannels = cpt["config"][17]
|
||||||
|
self.settings.modelSlots[slot].embedder = cpt["embedder_name"]
|
||||||
|
if self.settings.modelSlots[slot].embedder.endswith("768"):
|
||||||
|
self.settings.modelSlots[slot].embedder = self.settings.modelSlots[
|
||||||
|
slot
|
||||||
|
].embedder[:-3]
|
||||||
|
print("embedder....", self.settings.modelSlots[slot].embedder)
|
||||||
|
|
||||||
self.settings.modelSlots[slot].f0 = True if cpt["f0"] == 1 else False
|
self.settings.modelSlots[slot].f0 = True if cpt["f0"] == 1 else False
|
||||||
self.settings.modelSlots[slot].samplingRate = cpt["config"][-1]
|
self.settings.modelSlots[slot].samplingRate = cpt["config"][-1]
|
||||||
|
|
||||||
self.settings.modelSamplingRate = cpt["config"][-1]
|
# self.settings.modelSamplingRate = cpt["config"][-1]
|
||||||
|
|
||||||
def _setInfoByONNX(self, slot, file):
|
def _setInfoByONNX(self, slot, file):
|
||||||
tmp_onnx_session = ModelWrapper(file)
|
tmp_onnx_session = ModelWrapper(file)
|
||||||
@ -179,6 +214,8 @@ class RVC:
|
|||||||
self.settings.modelSlots[slot].f0 = tmp_onnx_session.getF0()
|
self.settings.modelSlots[slot].f0 = tmp_onnx_session.getF0()
|
||||||
self.settings.modelSlots[slot].samplingRate = tmp_onnx_session.getSamplingRate()
|
self.settings.modelSlots[slot].samplingRate = tmp_onnx_session.getSamplingRate()
|
||||||
self.settings.modelSlots[slot].deprecated = tmp_onnx_session.getDeprecated()
|
self.settings.modelSlots[slot].deprecated = tmp_onnx_session.getDeprecated()
|
||||||
|
self.settings.modelSlots[slot].embedder = tmp_onnx_session.getEmbedder()
|
||||||
|
print("embedder....", self.settings.modelSlots[slot].embedder)
|
||||||
|
|
||||||
def prepareModel(self, slot: int):
|
def prepareModel(self, slot: int):
|
||||||
print("[Voice Changer] Prepare Model of slot:", slot)
|
print("[Voice Changer] Prepare Model of slot:", slot)
|
||||||
@ -188,7 +225,7 @@ class RVC:
|
|||||||
if pyTorchModelFile != None and pyTorchModelFile != "":
|
if pyTorchModelFile != None and pyTorchModelFile != "":
|
||||||
print("[Voice Changer] Loading Pytorch Model...")
|
print("[Voice Changer] Loading Pytorch Model...")
|
||||||
cpt = torch.load(pyTorchModelFile, map_location="cpu")
|
cpt = torch.load(pyTorchModelFile, map_location="cpu")
|
||||||
'''
|
"""
|
||||||
(1) オリジナルとrvc-webuiのモデル判定 ⇒ config全体の形状
|
(1) オリジナルとrvc-webuiのモデル判定 ⇒ config全体の形状
|
||||||
■ ノーマル256
|
■ ノーマル256
|
||||||
[1025, 32, 192, 192, 768, 2, 6, 3, 0, '1', [3, 7, 11], [[1, 3, 5], [1, 3, 5], [1, 3, 5]], [10, 6, 2, 2, 2], 512, [16, 16, 4, 4, 4], 109, 256, 48000]
|
[1025, 32, 192, 192, 768, 2, 6, 3, 0, '1', [3, 7, 11], [[1, 3, 5], [1, 3, 5], [1, 3, 5]], [10, 6, 2, 2, 2], 512, [16, 16, 4, 4, 4], 109, 256, 48000]
|
||||||
@ -200,32 +237,32 @@ class RVC:
|
|||||||
0: ピッチレス, 1:ノーマル
|
0: ピッチレス, 1:ノーマル
|
||||||
|
|
||||||
(2-2) rvc-webuiの、(256 or 768) x (ノーマルor pitchレス)判定 ⇒ 256, or 768 は17番目の要素で判定。, ノーマルor pitchレスはckp["f0"]で判定
|
(2-2) rvc-webuiの、(256 or 768) x (ノーマルor pitchレス)判定 ⇒ 256, or 768 は17番目の要素で判定。, ノーマルor pitchレスはckp["f0"]で判定
|
||||||
'''
|
"""
|
||||||
# config_len = len(cpt["config"])
|
|
||||||
# if config_len == 18:
|
|
||||||
# self.settings.modelSlots[slot].modelType = RVC_MODEL_TYPE_RVC
|
|
||||||
# self.settings.modelSlots[slot].embChannels = 256
|
|
||||||
# else:
|
|
||||||
# self.settings.modelSlots[slot].modelType = RVC_MODEL_TYPE_WEBUI
|
|
||||||
# self.settings.modelSlots[slot].embChannels = cpt["config"][17]
|
|
||||||
# self.settings.modelSlots[slot].f0 = True if cpt["f0"] == 1 else False
|
|
||||||
# self.settings.modelSlots[slot].samplingRate = cpt["config"][-1]
|
|
||||||
|
|
||||||
# self.settings.modelSamplingRate = cpt["config"][-1]
|
if (
|
||||||
|
self.settings.modelSlots[slot].modelType == RVC_MODEL_TYPE_RVC
|
||||||
if self.settings.modelSlots[slot].modelType == RVC_MODEL_TYPE_RVC and self.settings.modelSlots[slot].f0 == True:
|
and self.settings.modelSlots[slot].f0 == True
|
||||||
|
):
|
||||||
net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=self.is_half)
|
net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=self.is_half)
|
||||||
elif self.settings.modelSlots[slot].modelType == RVC_MODEL_TYPE_RVC and self.settings.modelSlots[slot].f0 == False:
|
elif (
|
||||||
|
self.settings.modelSlots[slot].modelType == RVC_MODEL_TYPE_RVC
|
||||||
|
and self.settings.modelSlots[slot].f0 == False
|
||||||
|
):
|
||||||
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
|
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
|
||||||
elif self.settings.modelSlots[slot].modelType == RVC_MODEL_TYPE_WEBUI and self.settings.modelSlots[slot].f0 == True:
|
elif (
|
||||||
net_g = SynthesizerTrnMsNSFsid_webui(**cpt["params"], is_half=self.is_half)
|
self.settings.modelSlots[slot].modelType == RVC_MODEL_TYPE_WEBUI
|
||||||
elif self.settings.modelSlots[slot].modelType == RVC_MODEL_TYPE_WEBUI and self.settings.modelSlots[slot].f0 == False:
|
and self.settings.modelSlots[slot].f0 == True
|
||||||
######################
|
):
|
||||||
# TBD
|
net_g = SynthesizerTrnMsNSFsid_webui(
|
||||||
######################
|
**cpt["params"], is_half=self.is_half
|
||||||
print("webui non-f0 is not supported yet")
|
)
|
||||||
net_g = SynthesizerTrnMsNSFsidNono_webui(**cpt["params"], is_half=self.is_half)
|
elif (
|
||||||
|
self.settings.modelSlots[slot].modelType == RVC_MODEL_TYPE_WEBUI
|
||||||
|
and self.settings.modelSlots[slot].f0 == False
|
||||||
|
):
|
||||||
|
net_g = SynthesizerTrnMsNSFsidNono_webui(
|
||||||
|
**cpt["params"], is_half=self.is_half
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
print("unknwon")
|
print("unknwon")
|
||||||
|
|
||||||
@ -259,11 +296,15 @@ class RVC:
|
|||||||
self.next_trans = self.settings.modelSlots[slot].defaultTrans
|
self.next_trans = self.settings.modelSlots[slot].defaultTrans
|
||||||
self.next_samplingRate = self.settings.modelSlots[slot].samplingRate
|
self.next_samplingRate = self.settings.modelSlots[slot].samplingRate
|
||||||
self.next_framework = "ONNX" if self.next_onnx_session != None else "PyTorch"
|
self.next_framework = "ONNX" if self.next_onnx_session != None else "PyTorch"
|
||||||
print("[Voice Changer] Prepare done.",)
|
print(
|
||||||
|
"[Voice Changer] Prepare done.",
|
||||||
|
)
|
||||||
return self.get_info()
|
return self.get_info()
|
||||||
|
|
||||||
def switchModel(self):
|
def switchModel(self):
|
||||||
print("[Voice Changer] Switching model..",)
|
print(
|
||||||
|
"[Voice Changer] Switching model..",
|
||||||
|
)
|
||||||
# del self.net_g
|
# del self.net_g
|
||||||
# del self.onnx_session
|
# del self.onnx_session
|
||||||
self.net_g = self.next_net_g
|
self.net_g = self.next_net_g
|
||||||
@ -275,17 +316,23 @@ class RVC:
|
|||||||
self.settings.modelSamplingRate = self.next_samplingRate
|
self.settings.modelSamplingRate = self.next_samplingRate
|
||||||
self.next_net_g = None
|
self.next_net_g = None
|
||||||
self.next_onnx_session = None
|
self.next_onnx_session = None
|
||||||
print("[Voice Changer] Switching model..done",)
|
print(
|
||||||
|
"[Voice Changer] Switching model..done",
|
||||||
|
)
|
||||||
|
|
||||||
def update_settings(self, key: str, val: any):
|
def update_settings(self, key: str, val: any):
|
||||||
if key == "onnxExecutionProvider" and self.onnx_session != None:
|
if key == "onnxExecutionProvider" and self.onnx_session != None:
|
||||||
if val == "CUDAExecutionProvider":
|
if val == "CUDAExecutionProvider":
|
||||||
if self.settings.gpu < 0 or self.settings.gpu >= self.gpu_num:
|
if self.settings.gpu < 0 or self.settings.gpu >= self.gpu_num:
|
||||||
self.settings.gpu = 0
|
self.settings.gpu = 0
|
||||||
provider_options = [{'device_id': self.settings.gpu}]
|
provider_options = [{"device_id": self.settings.gpu}]
|
||||||
self.onnx_session.set_providers(providers=[val], provider_options=provider_options)
|
self.onnx_session.set_providers(
|
||||||
|
providers=[val], provider_options=provider_options
|
||||||
|
)
|
||||||
if hasattr(self, "hubert_onnx"):
|
if hasattr(self, "hubert_onnx"):
|
||||||
self.hubert_onnx.set_providers(providers=[val], provider_options=provider_options)
|
self.hubert_onnx.set_providers(
|
||||||
|
providers=[val], provider_options=provider_options
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
self.onnx_session.set_providers(providers=[val])
|
self.onnx_session.set_providers(providers=[val])
|
||||||
if hasattr(self, "hubert_onnx"):
|
if hasattr(self, "hubert_onnx"):
|
||||||
@ -294,12 +341,20 @@ class RVC:
|
|||||||
print("Onnx is not enabled. Please load model.")
|
print("Onnx is not enabled. Please load model.")
|
||||||
return False
|
return False
|
||||||
elif key in self.settings.intData:
|
elif key in self.settings.intData:
|
||||||
if key == "gpu" and val >= 0 and val < self.gpu_num and self.onnx_session != None:
|
if (
|
||||||
|
key == "gpu"
|
||||||
|
and val >= 0
|
||||||
|
and val < self.gpu_num
|
||||||
|
and self.onnx_session != None
|
||||||
|
):
|
||||||
providers = self.onnx_session.get_providers()
|
providers = self.onnx_session.get_providers()
|
||||||
print("Providers:", providers)
|
print("Providers:", providers)
|
||||||
if "CUDAExecutionProvider" in providers:
|
if "CUDAExecutionProvider" in providers:
|
||||||
provider_options = [{'device_id': self.settings.gpu}]
|
provider_options = [{"device_id": self.settings.gpu}]
|
||||||
self.onnx_session.set_providers(providers=["CUDAExecutionProvider"], provider_options=provider_options)
|
self.onnx_session.set_providers(
|
||||||
|
providers=["CUDAExecutionProvider"],
|
||||||
|
provider_options=provider_options,
|
||||||
|
)
|
||||||
if key == "modelSlotIndex":
|
if key == "modelSlotIndex":
|
||||||
# self.switchModel(int(val))
|
# self.switchModel(int(val))
|
||||||
val = int(val) % 1000 # Quick hack for same slot is selected
|
val = int(val) % 1000 # Quick hack for same slot is selected
|
||||||
@ -318,7 +373,9 @@ class RVC:
|
|||||||
def get_info(self):
|
def get_info(self):
|
||||||
data = asdict(self.settings)
|
data = asdict(self.settings)
|
||||||
|
|
||||||
data["onnxExecutionProviders"] = self.onnx_session.get_providers() if self.onnx_session != None else []
|
data["onnxExecutionProviders"] = (
|
||||||
|
self.onnx_session.get_providers() if self.onnx_session != None else []
|
||||||
|
)
|
||||||
files = ["configFile", "pyTorchModelFile", "onnxModelFile"]
|
files = ["configFile", "pyTorchModelFile", "onnxModelFile"]
|
||||||
for f in files:
|
for f in files:
|
||||||
if data[f] != None and os.path.exists(data[f]):
|
if data[f] != None and os.path.exists(data[f]):
|
||||||
@ -331,22 +388,30 @@ class RVC:
|
|||||||
def get_processing_sampling_rate(self):
|
def get_processing_sampling_rate(self):
|
||||||
return self.settings.modelSamplingRate
|
return self.settings.modelSamplingRate
|
||||||
|
|
||||||
def generate_input(self, newData: any, inputSize: int, crossfadeSize: int, solaSearchFrame: int = 0):
|
def generate_input(
|
||||||
|
self, newData: any, inputSize: int, crossfadeSize: int, solaSearchFrame: int = 0
|
||||||
|
):
|
||||||
newData = newData.astype(np.float32) / 32768.0
|
newData = newData.astype(np.float32) / 32768.0
|
||||||
|
|
||||||
if hasattr(self, "audio_buffer"):
|
if hasattr(self, "audio_buffer"):
|
||||||
self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0) # 過去のデータに連結
|
self.audio_buffer = np.concatenate(
|
||||||
|
[self.audio_buffer, newData], 0
|
||||||
|
) # 過去のデータに連結
|
||||||
else:
|
else:
|
||||||
self.audio_buffer = newData
|
self.audio_buffer = newData
|
||||||
|
|
||||||
convertSize = inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
|
convertSize = (
|
||||||
|
inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
|
||||||
|
)
|
||||||
|
|
||||||
if convertSize % 128 != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
|
if convertSize % 128 != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
|
||||||
convertSize = convertSize + (128 - (convertSize % 128))
|
convertSize = convertSize + (128 - (convertSize % 128))
|
||||||
|
|
||||||
self.audio_buffer = self.audio_buffer[-1 * convertSize :] # 変換対象の部分だけ抽出
|
self.audio_buffer = self.audio_buffer[-1 * convertSize :] # 変換対象の部分だけ抽出
|
||||||
|
|
||||||
crop = self.audio_buffer[-1 * (inputSize + crossfadeSize):-1 * (crossfadeSize)] # 出力部分だけ切り出して音量を確認。(solaとの関係性について、現状は無考慮)
|
crop = self.audio_buffer[
|
||||||
|
-1 * (inputSize + crossfadeSize) : -1 * (crossfadeSize)
|
||||||
|
] # 出力部分だけ切り出して音量を確認。(solaとの関係性について、現状は無考慮)
|
||||||
rms = np.sqrt(np.square(crop).mean(axis=0))
|
rms = np.sqrt(np.square(crop).mean(axis=0))
|
||||||
vol = max(rms, self.prevVol * 0.0)
|
vol = max(rms, self.prevVol * 0.0)
|
||||||
self.prevVol = vol
|
self.prevVol = vol
|
||||||
@ -390,15 +455,34 @@ class RVC:
|
|||||||
|
|
||||||
f0 = self.settings.modelSlots[self.currentSlot].f0
|
f0 = self.settings.modelSlots[self.currentSlot].f0
|
||||||
embChannels = self.settings.modelSlots[self.currentSlot].embChannels
|
embChannels = self.settings.modelSlots[self.currentSlot].embChannels
|
||||||
audio_out = vc.pipeline(self.hubert_model, self.onnx_session, sid, audio, times, f0_up_key, f0_method,
|
audio_out = vc.pipeline(
|
||||||
file_index, file_big_npy, index_rate, if_f0, f0_file=f0_file, silence_front=self.settings.extraConvertSize / self.settings.modelSamplingRate, embChannels=embChannels)
|
self.hubert_model,
|
||||||
|
self.onnx_session,
|
||||||
|
sid,
|
||||||
|
audio,
|
||||||
|
times,
|
||||||
|
f0_up_key,
|
||||||
|
f0_method,
|
||||||
|
file_index,
|
||||||
|
file_big_npy,
|
||||||
|
index_rate,
|
||||||
|
if_f0,
|
||||||
|
f0_file=f0_file,
|
||||||
|
silence_front=self.settings.extraConvertSize
|
||||||
|
/ self.settings.modelSamplingRate,
|
||||||
|
embChannels=embChannels,
|
||||||
|
)
|
||||||
result = audio_out * np.sqrt(vol)
|
result = audio_out * np.sqrt(vol)
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def _pyTorch_inference(self, data):
|
def _pyTorch_inference(self, data):
|
||||||
if hasattr(self, "net_g") == False or self.net_g == None:
|
if hasattr(self, "net_g") == False or self.net_g == None:
|
||||||
print("[Voice Changer] No pyTorch session.", hasattr(self, "net_g"), self.net_g)
|
print(
|
||||||
|
"[Voice Changer] No pyTorch session.",
|
||||||
|
hasattr(self, "net_g"),
|
||||||
|
self.net_g,
|
||||||
|
)
|
||||||
raise NoModeLoadedException("pytorch")
|
raise NoModeLoadedException("pytorch")
|
||||||
|
|
||||||
if self.settings.gpu < 0 or (self.gpu_num == 0 and self.mps_enabled == False):
|
if self.settings.gpu < 0 or (self.gpu_num == 0 and self.mps_enabled == False):
|
||||||
@ -436,8 +520,23 @@ class RVC:
|
|||||||
f0_file = None
|
f0_file = None
|
||||||
|
|
||||||
embChannels = self.settings.modelSlots[self.currentSlot].embChannels
|
embChannels = self.settings.modelSlots[self.currentSlot].embChannels
|
||||||
audio_out = vc.pipeline(self.hubert_model, self.net_g, sid, audio, times, f0_up_key, f0_method,
|
audio_out = vc.pipeline(
|
||||||
file_index, file_big_npy, index_rate, if_f0, f0_file=f0_file, silence_front=self.settings.extraConvertSize / self.settings.modelSamplingRate, embChannels=embChannels)
|
self.hubert_model,
|
||||||
|
self.net_g,
|
||||||
|
sid,
|
||||||
|
audio,
|
||||||
|
times,
|
||||||
|
f0_up_key,
|
||||||
|
f0_method,
|
||||||
|
file_index,
|
||||||
|
file_big_npy,
|
||||||
|
index_rate,
|
||||||
|
if_f0,
|
||||||
|
f0_file=f0_file,
|
||||||
|
silence_front=self.settings.extraConvertSize
|
||||||
|
/ self.settings.modelSamplingRate,
|
||||||
|
embChannels=embChannels,
|
||||||
|
)
|
||||||
|
|
||||||
result = audio_out * np.sqrt(vol)
|
result = audio_out * np.sqrt(vol)
|
||||||
|
|
||||||
@ -445,7 +544,11 @@ class RVC:
|
|||||||
|
|
||||||
def inference(self, data):
|
def inference(self, data):
|
||||||
if self.settings.modelSlotIndex < 0:
|
if self.settings.modelSlotIndex < 0:
|
||||||
print("[Voice Changer] wait for loading model...", self.settings.modelSlotIndex, self.currentSlot)
|
print(
|
||||||
|
"[Voice Changer] wait for loading model...",
|
||||||
|
self.settings.modelSlotIndex,
|
||||||
|
self.currentSlot,
|
||||||
|
)
|
||||||
raise NoModeLoadedException("model_common")
|
raise NoModeLoadedException("model_common")
|
||||||
|
|
||||||
if self.currentSlot != self.settings.modelSlotIndex:
|
if self.currentSlot != self.settings.modelSlotIndex:
|
||||||
@ -482,7 +585,9 @@ class RVC:
|
|||||||
print("[Voice Changer] export2onnx, No pyTorch session.")
|
print("[Voice Changer] export2onnx, No pyTorch session.")
|
||||||
return {"status": "ng", "path": f""}
|
return {"status": "ng", "path": f""}
|
||||||
|
|
||||||
pyTorchModelFile = self.settings.modelSlots[self.settings.modelSlotIndex].pyTorchModelFile # inference前にexportできるようにcurrentSlotではなくslot
|
pyTorchModelFile = self.settings.modelSlots[
|
||||||
|
self.settings.modelSlotIndex
|
||||||
|
].pyTorchModelFile # inference前にexportできるようにcurrentSlotではなくslot
|
||||||
|
|
||||||
if pyTorchModelFile == None:
|
if pyTorchModelFile == None:
|
||||||
print("[Voice Changer] export2onnx, No pyTorch filepath.")
|
print("[Voice Changer] export2onnx, No pyTorch filepath.")
|
||||||
@ -490,23 +595,45 @@ class RVC:
|
|||||||
import voice_changer.RVC.export2onnx as onnxExporter
|
import voice_changer.RVC.export2onnx as onnxExporter
|
||||||
|
|
||||||
output_file = os.path.splitext(os.path.basename(pyTorchModelFile))[0] + ".onnx"
|
output_file = os.path.splitext(os.path.basename(pyTorchModelFile))[0] + ".onnx"
|
||||||
output_file_simple = os.path.splitext(os.path.basename(pyTorchModelFile))[0] + "_simple.onnx"
|
output_file_simple = (
|
||||||
|
os.path.splitext(os.path.basename(pyTorchModelFile))[0] + "_simple.onnx"
|
||||||
|
)
|
||||||
output_path = os.path.join(TMP_DIR, output_file)
|
output_path = os.path.join(TMP_DIR, output_file)
|
||||||
output_path_simple = os.path.join(TMP_DIR, output_file_simple)
|
output_path_simple = os.path.join(TMP_DIR, output_file_simple)
|
||||||
print("embChannels", self.settings.modelSlots[self.settings.modelSlotIndex].embChannels)
|
print(
|
||||||
|
"embChannels",
|
||||||
|
self.settings.modelSlots[self.settings.modelSlotIndex].embChannels,
|
||||||
|
)
|
||||||
metadata = {
|
metadata = {
|
||||||
"application": "VC_CLIENT",
|
"application": "VC_CLIENT",
|
||||||
"version": "1",
|
"version": "1",
|
||||||
"modelType": self.settings.modelSlots[self.settings.modelSlotIndex].modelType,
|
"modelType": self.settings.modelSlots[
|
||||||
"samplingRate": self.settings.modelSlots[self.settings.modelSlotIndex].samplingRate,
|
self.settings.modelSlotIndex
|
||||||
|
].modelType,
|
||||||
|
"samplingRate": self.settings.modelSlots[
|
||||||
|
self.settings.modelSlotIndex
|
||||||
|
].samplingRate,
|
||||||
"f0": self.settings.modelSlots[self.settings.modelSlotIndex].f0,
|
"f0": self.settings.modelSlots[self.settings.modelSlotIndex].f0,
|
||||||
"embChannels": self.settings.modelSlots[self.settings.modelSlotIndex].embChannels,
|
"embChannels": self.settings.modelSlots[
|
||||||
|
self.settings.modelSlotIndex
|
||||||
|
].embChannels,
|
||||||
|
"embedder": self.settings.modelSlots[self.settings.modelSlotIndex].embedder,
|
||||||
}
|
}
|
||||||
|
|
||||||
if torch.cuda.device_count() > 0:
|
if torch.cuda.device_count() > 0:
|
||||||
onnxExporter.export2onnx(pyTorchModelFile, output_path, output_path_simple, True, metadata)
|
onnxExporter.export2onnx(
|
||||||
|
pyTorchModelFile, output_path, output_path_simple, True, metadata
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
print("[Voice Changer] Warning!!! onnx export with float32. maybe size is doubled.")
|
print(
|
||||||
onnxExporter.export2onnx(pyTorchModelFile, output_path, output_path_simple, False, metadata)
|
"[Voice Changer] Warning!!! onnx export with float32. maybe size is doubled."
|
||||||
|
)
|
||||||
|
onnxExporter.export2onnx(
|
||||||
|
pyTorchModelFile, output_path, output_path_simple, False, metadata
|
||||||
|
)
|
||||||
|
|
||||||
return {"status": "ok", "path": f"/tmp/{output_file_simple}", "filename": output_file_simple}
|
return {
|
||||||
|
"status": "ok",
|
||||||
|
"path": f"/tmp/{output_file_simple}",
|
||||||
|
"filename": output_file_simple,
|
||||||
|
}
|
||||||
|
@ -9,14 +9,18 @@ import resampy
|
|||||||
|
|
||||||
|
|
||||||
from voice_changer.IORecorder import IORecorder
|
from voice_changer.IORecorder import IORecorder
|
||||||
# from voice_changer.IOAnalyzer import IOAnalyzer
|
|
||||||
|
|
||||||
from voice_changer.utils.Timer import Timer
|
from voice_changer.utils.Timer import Timer
|
||||||
from voice_changer.utils.VoiceChangerModel import VoiceChangerModel, AudioInOut
|
from voice_changer.utils.VoiceChangerModel import VoiceChangerModel, AudioInOut
|
||||||
import time
|
|
||||||
from Exceptions import NoModeLoadedException, ONNXInputArgumentException
|
from Exceptions import NoModeLoadedException, ONNXInputArgumentException
|
||||||
|
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
||||||
|
|
||||||
providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
|
providers = [
|
||||||
|
"OpenVINOExecutionProvider",
|
||||||
|
"CUDAExecutionProvider",
|
||||||
|
"DmlExecutionProvider",
|
||||||
|
"CPUExecutionProvider",
|
||||||
|
]
|
||||||
|
|
||||||
STREAM_INPUT_FILE = os.path.join(TMP_DIR, "in.wav")
|
STREAM_INPUT_FILE = os.path.join(TMP_DIR, "in.wav")
|
||||||
STREAM_OUTPUT_FILE = os.path.join(TMP_DIR, "out.wav")
|
STREAM_OUTPUT_FILE = os.path.join(TMP_DIR, "out.wav")
|
||||||
@ -25,7 +29,7 @@ STREAM_ANALYZE_FILE_HARVEST = os.path.join(TMP_DIR, "analyze-harvest.png")
|
|||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class VoiceChangerSettings():
|
class VoiceChangerSettings:
|
||||||
inputSampleRate: int = 48000 # 48000 or 24000
|
inputSampleRate: int = 48000 # 48000 or 24000
|
||||||
|
|
||||||
crossFadeOffsetRate: float = 0.1
|
crossFadeOffsetRate: float = 0.1
|
||||||
@ -41,16 +45,14 @@ class VoiceChangerSettings():
|
|||||||
floatData: list[str] = field(
|
floatData: list[str] = field(
|
||||||
default_factory=lambda: ["crossFadeOffsetRate", "crossFadeEndRate"]
|
default_factory=lambda: ["crossFadeOffsetRate", "crossFadeEndRate"]
|
||||||
)
|
)
|
||||||
strData: list[str] = field(
|
strData: list[str] = field(default_factory=lambda: [])
|
||||||
default_factory=lambda: []
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class VoiceChanger():
|
class VoiceChanger:
|
||||||
settings: VoiceChangerSettings
|
settings: VoiceChangerSettings
|
||||||
voiceChanger: VoiceChangerModel
|
voiceChanger: VoiceChangerModel
|
||||||
|
|
||||||
def __init__(self, params):
|
def __init__(self, params: VoiceChangerParams):
|
||||||
# 初期化
|
# 初期化
|
||||||
self.settings = VoiceChangerSettings()
|
self.settings = VoiceChangerSettings()
|
||||||
self.onnx_session = None
|
self.onnx_session = None
|
||||||
@ -64,9 +66,14 @@ class VoiceChanger():
|
|||||||
self.params = params
|
self.params = params
|
||||||
self.gpu_num = torch.cuda.device_count()
|
self.gpu_num = torch.cuda.device_count()
|
||||||
self.prev_audio = np.zeros(4096)
|
self.prev_audio = np.zeros(4096)
|
||||||
self.mps_enabled: bool = getattr(torch.backends, "mps", None) is not None and torch.backends.mps.is_available()
|
self.mps_enabled: bool = (
|
||||||
|
getattr(torch.backends, "mps", None) is not None
|
||||||
|
and torch.backends.mps.is_available()
|
||||||
|
)
|
||||||
|
|
||||||
print(f"VoiceChanger Initialized (GPU_NUM:{self.gpu_num}, mps_enabled:{self.mps_enabled})")
|
print(
|
||||||
|
f"VoiceChanger Initialized (GPU_NUM:{self.gpu_num}, mps_enabled:{self.mps_enabled})"
|
||||||
|
)
|
||||||
|
|
||||||
def switchModelType(self, modelType: ModelType):
|
def switchModelType(self, modelType: ModelType):
|
||||||
if hasattr(self, "voiceChanger") and self.voiceChanger != None:
|
if hasattr(self, "voiceChanger") and self.voiceChanger != None:
|
||||||
@ -77,24 +84,31 @@ class VoiceChanger():
|
|||||||
self.modelType = modelType
|
self.modelType = modelType
|
||||||
if self.modelType == "MMVCv15":
|
if self.modelType == "MMVCv15":
|
||||||
from voice_changer.MMVCv15.MMVCv15 import MMVCv15
|
from voice_changer.MMVCv15.MMVCv15 import MMVCv15
|
||||||
|
|
||||||
self.voiceChanger = MMVCv15() # type: ignore
|
self.voiceChanger = MMVCv15() # type: ignore
|
||||||
elif self.modelType == "MMVCv13":
|
elif self.modelType == "MMVCv13":
|
||||||
from voice_changer.MMVCv13.MMVCv13 import MMVCv13
|
from voice_changer.MMVCv13.MMVCv13 import MMVCv13
|
||||||
|
|
||||||
self.voiceChanger = MMVCv13()
|
self.voiceChanger = MMVCv13()
|
||||||
elif self.modelType == "so-vits-svc-40v2":
|
elif self.modelType == "so-vits-svc-40v2":
|
||||||
from voice_changer.SoVitsSvc40v2.SoVitsSvc40v2 import SoVitsSvc40v2
|
from voice_changer.SoVitsSvc40v2.SoVitsSvc40v2 import SoVitsSvc40v2
|
||||||
|
|
||||||
self.voiceChanger = SoVitsSvc40v2(self.params)
|
self.voiceChanger = SoVitsSvc40v2(self.params)
|
||||||
elif self.modelType == "so-vits-svc-40" or self.modelType == "so-vits-svc-40_c":
|
elif self.modelType == "so-vits-svc-40" or self.modelType == "so-vits-svc-40_c":
|
||||||
from voice_changer.SoVitsSvc40.SoVitsSvc40 import SoVitsSvc40
|
from voice_changer.SoVitsSvc40.SoVitsSvc40 import SoVitsSvc40
|
||||||
|
|
||||||
self.voiceChanger = SoVitsSvc40(self.params)
|
self.voiceChanger = SoVitsSvc40(self.params)
|
||||||
elif self.modelType == "DDSP-SVC":
|
elif self.modelType == "DDSP-SVC":
|
||||||
from voice_changer.DDSP_SVC.DDSP_SVC import DDSP_SVC
|
from voice_changer.DDSP_SVC.DDSP_SVC import DDSP_SVC
|
||||||
|
|
||||||
self.voiceChanger = DDSP_SVC(self.params)
|
self.voiceChanger = DDSP_SVC(self.params)
|
||||||
elif self.modelType == "RVC":
|
elif self.modelType == "RVC":
|
||||||
from voice_changer.RVC.RVC import RVC
|
from voice_changer.RVC.RVC import RVC
|
||||||
|
|
||||||
self.voiceChanger = RVC(self.params)
|
self.voiceChanger = RVC(self.params)
|
||||||
else:
|
else:
|
||||||
from voice_changer.MMVCv13.MMVCv13 import MMVCv13
|
from voice_changer.MMVCv13.MMVCv13 import MMVCv13
|
||||||
|
|
||||||
self.voiceChanger = MMVCv13()
|
self.voiceChanger = MMVCv13()
|
||||||
|
|
||||||
return {"status": "OK", "msg": "vc is switched."}
|
return {"status": "OK", "msg": "vc is switched."}
|
||||||
@ -109,7 +123,6 @@ class VoiceChanger():
|
|||||||
self,
|
self,
|
||||||
props,
|
props,
|
||||||
):
|
):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return self.voiceChanger.loadModel(props)
|
return self.voiceChanger.loadModel(props)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -143,7 +156,9 @@ class VoiceChanger():
|
|||||||
if key == "recordIO" and val == 1:
|
if key == "recordIO" and val == 1:
|
||||||
if hasattr(self, "ioRecorder"):
|
if hasattr(self, "ioRecorder"):
|
||||||
self.ioRecorder.close()
|
self.ioRecorder.close()
|
||||||
self.ioRecorder = IORecorder(STREAM_INPUT_FILE, STREAM_OUTPUT_FILE, self.settings.inputSampleRate)
|
self.ioRecorder = IORecorder(
|
||||||
|
STREAM_INPUT_FILE, STREAM_OUTPUT_FILE, self.settings.inputSampleRate
|
||||||
|
)
|
||||||
if key == "recordIO" and val == 0:
|
if key == "recordIO" and val == 0:
|
||||||
if hasattr(self, "ioRecorder"):
|
if hasattr(self, "ioRecorder"):
|
||||||
self.ioRecorder.close()
|
self.ioRecorder.close()
|
||||||
@ -174,12 +189,12 @@ class VoiceChanger():
|
|||||||
return self.get_info()
|
return self.get_info()
|
||||||
|
|
||||||
def _generate_strength(self, crossfadeSize: int):
|
def _generate_strength(self, crossfadeSize: int):
|
||||||
|
if (
|
||||||
if self.crossfadeSize != crossfadeSize or \
|
self.crossfadeSize != crossfadeSize
|
||||||
self.currentCrossFadeOffsetRate != self.settings.crossFadeOffsetRate or \
|
or self.currentCrossFadeOffsetRate != self.settings.crossFadeOffsetRate
|
||||||
self.currentCrossFadeEndRate != self.settings.crossFadeEndRate or \
|
or self.currentCrossFadeEndRate != self.settings.crossFadeEndRate
|
||||||
self.currentCrossFadeOverlapSize != self.settings.crossFadeOverlapSize:
|
or self.currentCrossFadeOverlapSize != self.settings.crossFadeOverlapSize
|
||||||
|
):
|
||||||
self.crossfadeSize = crossfadeSize
|
self.crossfadeSize = crossfadeSize
|
||||||
self.currentCrossFadeOffsetRate = self.settings.crossFadeOffsetRate
|
self.currentCrossFadeOffsetRate = self.settings.crossFadeOffsetRate
|
||||||
self.currentCrossFadeEndRate = self.settings.crossFadeEndRate
|
self.currentCrossFadeEndRate = self.settings.crossFadeEndRate
|
||||||
@ -193,30 +208,54 @@ class VoiceChanger():
|
|||||||
np_prev_strength = np.cos(percent * 0.5 * np.pi) ** 2
|
np_prev_strength = np.cos(percent * 0.5 * np.pi) ** 2
|
||||||
np_cur_strength = np.cos((1 - percent) * 0.5 * np.pi) ** 2
|
np_cur_strength = np.cos((1 - percent) * 0.5 * np.pi) ** 2
|
||||||
|
|
||||||
self.np_prev_strength = np.concatenate([np.ones(cf_offset), np_prev_strength,
|
self.np_prev_strength = np.concatenate(
|
||||||
np.zeros(crossfadeSize - cf_offset - len(np_prev_strength))])
|
[
|
||||||
self.np_cur_strength = np.concatenate([np.zeros(cf_offset), np_cur_strength, np.ones(crossfadeSize - cf_offset - len(np_cur_strength))])
|
np.ones(cf_offset),
|
||||||
|
np_prev_strength,
|
||||||
|
np.zeros(crossfadeSize - cf_offset - len(np_prev_strength)),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
self.np_cur_strength = np.concatenate(
|
||||||
|
[
|
||||||
|
np.zeros(cf_offset),
|
||||||
|
np_cur_strength,
|
||||||
|
np.ones(crossfadeSize - cf_offset - len(np_cur_strength)),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
print(f"Generated Strengths: for prev:{self.np_prev_strength.shape}, for cur:{self.np_cur_strength.shape}")
|
print(
|
||||||
|
f"Generated Strengths: for prev:{self.np_prev_strength.shape}, for cur:{self.np_cur_strength.shape}"
|
||||||
|
)
|
||||||
|
|
||||||
# ひとつ前の結果とサイズが変わるため、記録は消去する。
|
# ひとつ前の結果とサイズが変わるため、記録は消去する。
|
||||||
if hasattr(self, 'np_prev_audio1') == True:
|
if hasattr(self, "np_prev_audio1") == True:
|
||||||
delattr(self, "np_prev_audio1")
|
delattr(self, "np_prev_audio1")
|
||||||
if hasattr(self, "sola_buffer"):
|
if hasattr(self, "sola_buffer"):
|
||||||
del self.sola_buffer
|
del self.sola_buffer
|
||||||
|
|
||||||
# receivedData: tuple of short
|
# receivedData: tuple of short
|
||||||
def on_request(self, receivedData: AudioInOut) -> tuple[AudioInOut, list[Union[int, float]]]:
|
def on_request(
|
||||||
|
self, receivedData: AudioInOut
|
||||||
|
) -> tuple[AudioInOut, list[Union[int, float]]]:
|
||||||
return self.on_request_sola(receivedData)
|
return self.on_request_sola(receivedData)
|
||||||
|
|
||||||
def on_request_sola(self, receivedData: AudioInOut) -> tuple[AudioInOut, list[Union[int, float]]]:
|
def on_request_sola(
|
||||||
|
self, receivedData: AudioInOut
|
||||||
|
) -> tuple[AudioInOut, list[Union[int, float]]]:
|
||||||
try:
|
try:
|
||||||
processing_sampling_rate = self.voiceChanger.get_processing_sampling_rate()
|
processing_sampling_rate = self.voiceChanger.get_processing_sampling_rate()
|
||||||
|
|
||||||
# 前処理
|
# 前処理
|
||||||
with Timer("pre-process") as t:
|
with Timer("pre-process") as t:
|
||||||
if self.settings.inputSampleRate != processing_sampling_rate:
|
if self.settings.inputSampleRate != processing_sampling_rate:
|
||||||
newData = cast(AudioInOut, resampy.resample(receivedData, self.settings.inputSampleRate, processing_sampling_rate))
|
newData = cast(
|
||||||
|
AudioInOut,
|
||||||
|
resampy.resample(
|
||||||
|
receivedData,
|
||||||
|
self.settings.inputSampleRate,
|
||||||
|
processing_sampling_rate,
|
||||||
|
),
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
newData = receivedData
|
newData = receivedData
|
||||||
|
|
||||||
@ -226,7 +265,9 @@ class VoiceChanger():
|
|||||||
crossfade_frame = min(self.settings.crossFadeOverlapSize, block_frame)
|
crossfade_frame = min(self.settings.crossFadeOverlapSize, block_frame)
|
||||||
self._generate_strength(crossfade_frame)
|
self._generate_strength(crossfade_frame)
|
||||||
|
|
||||||
data = self.voiceChanger.generate_input(newData, block_frame, crossfade_frame, sola_search_frame)
|
data = self.voiceChanger.generate_input(
|
||||||
|
newData, block_frame, crossfade_frame, sola_search_frame
|
||||||
|
)
|
||||||
preprocess_time = t.secs
|
preprocess_time = t.secs
|
||||||
|
|
||||||
# 変換処理
|
# 変換処理
|
||||||
@ -234,15 +275,28 @@ class VoiceChanger():
|
|||||||
# Inference
|
# Inference
|
||||||
audio = self.voiceChanger.inference(data)
|
audio = self.voiceChanger.inference(data)
|
||||||
|
|
||||||
if hasattr(self, 'sola_buffer') == True:
|
if hasattr(self, "sola_buffer") == True:
|
||||||
np.set_printoptions(threshold=10000)
|
np.set_printoptions(threshold=10000)
|
||||||
audio = audio[-sola_search_frame - crossfade_frame - block_frame :]
|
audio = audio[-sola_search_frame - crossfade_frame - block_frame :]
|
||||||
# SOLA algorithm from https://github.com/yxlllc/DDSP-SVC, https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI
|
# SOLA algorithm from https://github.com/yxlllc/DDSP-SVC, https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI
|
||||||
cor_nom = np.convolve(audio[: crossfade_frame + sola_search_frame], np.flip(self.sola_buffer), 'valid')
|
cor_nom = np.convolve(
|
||||||
cor_den = np.sqrt(np.convolve(audio[: crossfade_frame + sola_search_frame] ** 2, np.ones(crossfade_frame), 'valid') + 1e-3)
|
audio[: crossfade_frame + sola_search_frame],
|
||||||
|
np.flip(self.sola_buffer),
|
||||||
|
"valid",
|
||||||
|
)
|
||||||
|
cor_den = np.sqrt(
|
||||||
|
np.convolve(
|
||||||
|
audio[: crossfade_frame + sola_search_frame] ** 2,
|
||||||
|
np.ones(crossfade_frame),
|
||||||
|
"valid",
|
||||||
|
)
|
||||||
|
+ 1e-3
|
||||||
|
)
|
||||||
sola_offset = np.argmax(cor_nom / cor_den)
|
sola_offset = np.argmax(cor_nom / cor_den)
|
||||||
|
|
||||||
output_wav = audio[sola_offset: sola_offset + block_frame].astype(np.float64)
|
output_wav = audio[sola_offset : sola_offset + block_frame].astype(
|
||||||
|
np.float64
|
||||||
|
)
|
||||||
output_wav[:crossfade_frame] *= self.np_cur_strength
|
output_wav[:crossfade_frame] *= self.np_cur_strength
|
||||||
output_wav[:crossfade_frame] += self.sola_buffer[:]
|
output_wav[:crossfade_frame] += self.sola_buffer[:]
|
||||||
|
|
||||||
@ -251,8 +305,16 @@ class VoiceChanger():
|
|||||||
print("[Voice Changer] no sola buffer. (You can ignore this.)")
|
print("[Voice Changer] no sola buffer. (You can ignore this.)")
|
||||||
result = np.zeros(4096).astype(np.int16)
|
result = np.zeros(4096).astype(np.int16)
|
||||||
|
|
||||||
if hasattr(self, 'sola_buffer') == True and sola_offset < sola_search_frame:
|
if (
|
||||||
sola_buf_org = audio[- sola_search_frame - crossfade_frame + sola_offset: -sola_search_frame + sola_offset]
|
hasattr(self, "sola_buffer") == True
|
||||||
|
and sola_offset < sola_search_frame
|
||||||
|
):
|
||||||
|
sola_buf_org = audio[
|
||||||
|
-sola_search_frame
|
||||||
|
- crossfade_frame
|
||||||
|
+ sola_offset : -sola_search_frame
|
||||||
|
+ sola_offset
|
||||||
|
]
|
||||||
self.sola_buffer = sola_buf_org * self.np_prev_strength
|
self.sola_buffer = sola_buf_org * self.np_prev_strength
|
||||||
else:
|
else:
|
||||||
self.sola_buffer = audio[-crossfade_frame:] * self.np_prev_strength
|
self.sola_buffer = audio[-crossfade_frame:] * self.np_prev_strength
|
||||||
@ -263,12 +325,20 @@ class VoiceChanger():
|
|||||||
with Timer("post-process") as t:
|
with Timer("post-process") as t:
|
||||||
result = result.astype(np.int16)
|
result = result.astype(np.int16)
|
||||||
if self.settings.inputSampleRate != processing_sampling_rate:
|
if self.settings.inputSampleRate != processing_sampling_rate:
|
||||||
outputData = cast(AudioInOut, resampy.resample(result, processing_sampling_rate, self.settings.inputSampleRate).astype(np.int16))
|
outputData = cast(
|
||||||
|
AudioInOut,
|
||||||
|
resampy.resample(
|
||||||
|
result,
|
||||||
|
processing_sampling_rate,
|
||||||
|
self.settings.inputSampleRate,
|
||||||
|
).astype(np.int16),
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
outputData = result
|
outputData = result
|
||||||
|
|
||||||
print_convert_processing(
|
print_convert_processing(
|
||||||
f" Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz")
|
f" Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz"
|
||||||
|
)
|
||||||
|
|
||||||
if self.settings.recordIO == 1:
|
if self.settings.recordIO == 1:
|
||||||
self.ioRecorder.writeInput(receivedData)
|
self.ioRecorder.writeInput(receivedData)
|
||||||
@ -281,7 +351,9 @@ class VoiceChanger():
|
|||||||
# # f" Padded!, Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz")
|
# # f" Padded!, Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz")
|
||||||
postprocess_time = t.secs
|
postprocess_time = t.secs
|
||||||
|
|
||||||
print_convert_processing(f" [fin] Input/Output size:{receivedData.shape[0]},{outputData.shape[0]}")
|
print_convert_processing(
|
||||||
|
f" [fin] Input/Output size:{receivedData.shape[0]},{outputData.shape[0]}"
|
||||||
|
)
|
||||||
perf = [preprocess_time, mainprocess_time, postprocess_time]
|
perf = [preprocess_time, mainprocess_time, postprocess_time]
|
||||||
return outputData, perf
|
return outputData, perf
|
||||||
|
|
||||||
@ -299,8 +371,9 @@ class VoiceChanger():
|
|||||||
def export2onnx(self):
|
def export2onnx(self):
|
||||||
return self.voiceChanger.export2onnx()
|
return self.voiceChanger.export2onnx()
|
||||||
|
|
||||||
|
|
||||||
##############
|
##############
|
||||||
|
|
||||||
|
|
||||||
PRINT_CONVERT_PROCESSING: bool = False
|
PRINT_CONVERT_PROCESSING: bool = False
|
||||||
# PRINT_CONVERT_PROCESSING = True
|
# PRINT_CONVERT_PROCESSING = True
|
||||||
|
|
||||||
@ -318,5 +391,7 @@ def pad_array(arr: AudioInOut, target_length: int):
|
|||||||
pad_width = target_length - current_length
|
pad_width = target_length - current_length
|
||||||
pad_left = pad_width // 2
|
pad_left = pad_width // 2
|
||||||
pad_right = pad_width - pad_left
|
pad_right = pad_width - pad_left
|
||||||
padded_arr = np.pad(arr, (pad_left, pad_right), 'constant', constant_values=(0, 0))
|
padded_arr = np.pad(
|
||||||
|
arr, (pad_left, pad_right), "constant", constant_values=(0, 0)
|
||||||
|
)
|
||||||
return padded_arr
|
return padded_arr
|
||||||
|
@ -1,12 +1,16 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from voice_changer.VoiceChanger import VoiceChanger
|
from voice_changer.VoiceChanger import VoiceChanger
|
||||||
from const import ModelType
|
from const import ModelType
|
||||||
|
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
||||||
|
|
||||||
|
|
||||||
class VoiceChangerManager():
|
class VoiceChangerManager(object):
|
||||||
|
_instance = None
|
||||||
|
voiceChanger: VoiceChanger = None
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_instance(cls, params):
|
def get_instance(cls, params: VoiceChangerParams):
|
||||||
if not hasattr(cls, "_instance"):
|
if cls._instance is None:
|
||||||
cls._instance = cls()
|
cls._instance = cls()
|
||||||
cls._instance.voiceChanger = VoiceChanger(params)
|
cls._instance.voiceChanger = VoiceChanger(params)
|
||||||
return cls._instance
|
return cls._instance
|
||||||
@ -20,7 +24,7 @@ class VoiceChangerManager():
|
|||||||
return info
|
return info
|
||||||
|
|
||||||
def get_info(self):
|
def get_info(self):
|
||||||
if hasattr(self, 'voiceChanger'):
|
if hasattr(self, "voiceChanger"):
|
||||||
info = self.voiceChanger.get_info()
|
info = self.voiceChanger.get_info()
|
||||||
info["status"] = "OK"
|
info["status"] = "OK"
|
||||||
return info
|
return info
|
||||||
@ -28,7 +32,7 @@ class VoiceChangerManager():
|
|||||||
return {"status": "ERROR", "msg": "no model loaded"}
|
return {"status": "ERROR", "msg": "no model loaded"}
|
||||||
|
|
||||||
def update_settings(self, key: str, val: any):
|
def update_settings(self, key: str, val: any):
|
||||||
if hasattr(self, 'voiceChanger'):
|
if hasattr(self, "voiceChanger"):
|
||||||
info = self.voiceChanger.update_settings(key, val)
|
info = self.voiceChanger.update_settings(key, val)
|
||||||
info["status"] = "OK"
|
info["status"] = "OK"
|
||||||
return info
|
return info
|
||||||
@ -36,7 +40,7 @@ class VoiceChangerManager():
|
|||||||
return {"status": "ERROR", "msg": "no model loaded"}
|
return {"status": "ERROR", "msg": "no model loaded"}
|
||||||
|
|
||||||
def changeVoice(self, receivedData: any):
|
def changeVoice(self, receivedData: any):
|
||||||
if hasattr(self, 'voiceChanger') == True:
|
if hasattr(self, "voiceChanger") is True:
|
||||||
return self.voiceChanger.on_request(receivedData)
|
return self.voiceChanger.on_request(receivedData)
|
||||||
else:
|
else:
|
||||||
print("Voice Change is not loaded. Did you load a correct model?")
|
print("Voice Change is not loaded. Did you load a correct model?")
|
||||||
|
11
server/voice_changer/utils/VoiceChangerParams.py
Normal file
11
server/voice_changer/utils/VoiceChangerParams.py
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class VoiceChangerParams():
|
||||||
|
content_vec_500: str
|
||||||
|
content_vec_500_onnx: str
|
||||||
|
content_vec_500_onnx_on: bool
|
||||||
|
hubert_base: str
|
||||||
|
hubert_soft: str
|
||||||
|
nsf_hifigan: str
|
Loading…
Reference in New Issue
Block a user