voice-changer/server/voice_changer/RVC/RVCModelSlotGenerator.py

183 lines
7.9 KiB
Python
Raw Normal View History

2023-06-21 03:18:51 +03:00
import os
from const import EnumInferenceTypes
from dataclasses import asdict
2023-06-21 03:18:51 +03:00
import torch
import onnxruntime
import json
2023-08-05 06:33:31 +03:00
from data.ModelSlot import RVCModelSlot
from voice_changer.VoiceChangerParamsManager import VoiceChangerParamsManager
2023-06-21 03:18:51 +03:00
from voice_changer.utils.LoadModelParams import LoadModelParams
from voice_changer.utils.ModelSlotGenerator import ModelSlotGenerator
class RVCModelSlotGenerator(ModelSlotGenerator):
@classmethod
def loadModel(cls, props: LoadModelParams):
2023-08-05 06:33:31 +03:00
vcparams = VoiceChangerParamsManager.get_instance().params
2023-06-21 03:18:51 +03:00
slotInfo: RVCModelSlot = RVCModelSlot()
for file in props.files:
if file.kind == "rvcModel":
slotInfo.modelFile = file.name
elif file.kind == "rvcIndex":
slotInfo.indexFile = file.name
slotInfo.defaultTune = 0
slotInfo.defaultIndexRatio = 0
slotInfo.defaultProtect = 0.5
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
slotInfo.name = os.path.splitext(os.path.basename(slotInfo.modelFile))[0]
2023-08-05 06:33:31 +03:00
print("RVC:: slotInfo.modelFile", slotInfo.modelFile)
2023-06-21 03:18:51 +03:00
# slotInfo.iconFile = "/assets/icons/noimage.png"
2023-08-05 06:33:31 +03:00
modelPath = os.path.join(vcparams.model_dir, str(props.slot), os.path.basename(slotInfo.modelFile))
2023-06-21 03:18:51 +03:00
if slotInfo.isONNX:
2023-08-05 06:33:31 +03:00
slotInfo = cls._setInfoByONNX(modelPath, slotInfo)
2023-06-21 03:18:51 +03:00
else:
2023-08-05 06:33:31 +03:00
slotInfo = cls._setInfoByPytorch(modelPath, slotInfo)
2023-06-21 03:18:51 +03:00
return slotInfo
@classmethod
2023-08-05 06:33:31 +03:00
def _setInfoByPytorch(cls, modelPath: str, slot: RVCModelSlot):
cpt = torch.load(modelPath, map_location="cpu")
2023-06-21 03:18:51 +03:00
config_len = len(cpt["config"])
2023-06-25 20:40:44 +03:00
version = cpt.get("version", "v1")
2023-06-25 19:10:39 +03:00
slot = RVCModelSlot(**asdict(slot))
2023-06-25 20:40:44 +03:00
if version == "voras_beta":
2023-06-25 19:06:23 +03:00
slot.f0 = True if cpt["f0"] == 1 else False
slot.modelType = EnumInferenceTypes.pyTorchVoRASbeta.value
slot.embChannels = 768
2023-06-25 19:10:39 +03:00
slot.embOutputLayer = cpt["embedder_output_layer"] if "embedder_output_layer" in cpt else 9
2023-06-25 19:06:23 +03:00
slot.useFinalProj = False
slot.embedder = cpt["embedder_name"]
if slot.embedder.endswith("768"):
slot.embedder = slot.embedder[:-3]
# if slot.embedder == "hubert":
# slot.embedder = "hubert"
# elif slot.embedder == "contentvec":
# slot.embedder = "contentvec"
# elif slot.embedder == "hubert_jp":
# slot.embedder = "hubert_jp"
2023-06-25 19:06:23 +03:00
else:
2023-08-09 10:55:59 +03:00
raise RuntimeError("[Voice Changer][setInfoByPytorch] unknown embedder")
2023-06-21 03:18:51 +03:00
2023-06-25 19:06:23 +03:00
elif config_len == 18:
2023-06-21 03:18:51 +03:00
# Original RVC
slot.f0 = True if cpt["f0"] == 1 else False
version = cpt.get("version", "v1")
if version is None or version == "v1":
slot.modelType = EnumInferenceTypes.pyTorchRVC.value if slot.f0 else EnumInferenceTypes.pyTorchRVCNono.value
slot.embChannels = 256
slot.embOutputLayer = 9
slot.useFinalProj = True
slot.embedder = "hubert_base"
2023-06-21 03:18:51 +03:00
print("[Voice Changer] Official Model(pyTorch) : v1")
else:
slot.modelType = EnumInferenceTypes.pyTorchRVCv2.value if slot.f0 else EnumInferenceTypes.pyTorchRVCv2Nono.value
slot.embChannels = 768
slot.embOutputLayer = 12
slot.useFinalProj = False
slot.embedder = "hubert_base"
2023-06-21 03:18:51 +03:00
print("[Voice Changer] Official Model(pyTorch) : v2")
else:
# DDPN RVC
slot.f0 = True if cpt["f0"] == 1 else False
slot.modelType = EnumInferenceTypes.pyTorchWebUI.value if slot.f0 else EnumInferenceTypes.pyTorchWebUINono.value
slot.embChannels = cpt["config"][17]
slot.embOutputLayer = cpt["embedder_output_layer"] if "embedder_output_layer" in cpt else 9
if slot.embChannels == 256:
slot.useFinalProj = True
else:
slot.useFinalProj = False
# DDPNモデルの情報を表示
if slot.embChannels == 256 and slot.embOutputLayer == 9 and slot.useFinalProj is True:
print("[Voice Changer] DDPN Model(pyTorch) : Official v1 like")
elif slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False:
print("[Voice Changer] DDPN Model(pyTorch): Official v2 like")
else:
print(f"[Voice Changer] DDPN Model(pyTorch): ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}")
slot.embedder = cpt["embedder_name"]
if slot.embedder.endswith("768"):
slot.embedder = slot.embedder[:-3]
2023-07-04 00:50:58 +03:00
if "speaker_info" in cpt.keys():
for k, v in cpt["speaker_info"].items():
slot.speakers[int(k)] = str(v)
2023-06-21 03:18:51 +03:00
slot.samplingRate = cpt["config"][-1]
del cpt
return slot
2023-06-21 03:18:51 +03:00
@classmethod
2023-08-05 06:33:31 +03:00
def _setInfoByONNX(cls, modelPath: str, slot: RVCModelSlot):
tmp_onnx_session = onnxruntime.InferenceSession(modelPath, providers=["CPUExecutionProvider"])
2023-06-21 03:18:51 +03:00
modelmeta = tmp_onnx_session.get_modelmeta()
try:
slot = RVCModelSlot(**asdict(slot))
2023-06-21 03:18:51 +03:00
metadata = json.loads(modelmeta.custom_metadata_map["metadata"])
# slot.modelType = metadata["modelType"]
slot.embChannels = metadata["embChannels"]
slot.embOutputLayer = metadata["embOutputLayer"] if "embOutputLayer" in metadata else 9
slot.useFinalProj = metadata["useFinalProj"] if "useFinalProj" in metadata else True if slot.embChannels == 256 else False
if slot.embChannels == 256:
slot.useFinalProj = True
else:
slot.useFinalProj = False
# ONNXモデルの情報を表示
if slot.embChannels == 256 and slot.embOutputLayer == 9 and slot.useFinalProj is True:
print("[Voice Changer] ONNX Model: Official v1 like")
elif slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False:
print("[Voice Changer] ONNX Model: Official v2 like")
else:
print(f"[Voice Changer] ONNX Model: ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}")
if "embedder" not in metadata:
slot.embedder = "hubert_base"
2023-06-21 03:18:51 +03:00
else:
slot.embedder = metadata["embedder"]
slot.f0 = metadata["f0"]
slot.modelType = EnumInferenceTypes.onnxRVC.value if slot.f0 else EnumInferenceTypes.onnxRVCNono.value
slot.samplingRate = metadata["samplingRate"]
slot.deprecated = False
2023-09-06 02:04:39 +03:00
if slot.embChannels == 256:
if metadata["version"] == "2.1":
slot.version = "v1.1" # 1.1はclipをonnx内部で実施. realtimeをdisable
else:
slot.version = "v1"
elif metadata["version"] == "2":
slot.version = "v2"
elif metadata["version"] == "2.1": # 2.1はclipをonnx内部で実施. realtimeをdisable
slot.version = "v2.1"
2023-11-03 04:17:45 +03:00
elif metadata["version"] == "2.2": # 2.1と同じ
slot.version = "v2.2"
2023-06-21 03:18:51 +03:00
except Exception as e:
slot.modelType = EnumInferenceTypes.onnxRVC.value
slot.embChannels = 256
slot.embedder = "hubert_base"
2023-06-21 03:18:51 +03:00
slot.f0 = True
slot.samplingRate = 48000
slot.deprecated = True
print("[Voice Changer] setInfoByONNX", e)
print("[Voice Changer] ############## !!!! CAUTION !!!! ####################")
print("[Voice Changer] This onnxfie is depricated. Please regenerate onnxfile.")
print("[Voice Changer] ############## !!!! CAUTION !!!! ####################")
del tmp_onnx_session
return slot