mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-01-23 13:35:12 +03:00
WIP onnx improve
This commit is contained in:
parent
93480636a3
commit
80a5ba91b8
@ -40,6 +40,8 @@ class RVCModelSlot(ModelSlot):
|
||||
sampleId: str = ""
|
||||
speakers: dict = field(default_factory=lambda: {0: "target"})
|
||||
|
||||
version:str = "v2"
|
||||
|
||||
|
||||
@dataclass
|
||||
class MMVCv13ModelSlot(ModelSlot):
|
||||
|
@ -154,6 +154,16 @@ class RVCModelSlotGenerator(ModelSlotGenerator):
|
||||
slot.samplingRate = metadata["samplingRate"]
|
||||
slot.deprecated = False
|
||||
|
||||
if slot.embChannels == 256:
|
||||
if metadata["version"] == "2.1":
|
||||
slot.version = "v1.1" # 1.1はclipをonnx内部で実施. realtimeをdisable
|
||||
else:
|
||||
slot.version = "v1"
|
||||
elif metadata["version"] == "2":
|
||||
slot.version = "v2"
|
||||
elif metadata["version"] == "2.1": # 2.1はclipをonnx内部で実施. realtimeをdisable
|
||||
slot.version = "v2.1"
|
||||
|
||||
except Exception as e:
|
||||
slot.modelType = EnumInferenceTypes.onnxRVC.value
|
||||
slot.embChannels = 256
|
||||
|
@ -20,8 +20,9 @@ class InferencerManager:
|
||||
inferencerType: EnumInferenceTypes,
|
||||
file: str,
|
||||
gpu: int,
|
||||
inferencerTypeVersion: str | None = None,
|
||||
) -> Inferencer:
|
||||
cls.currentInferencer = cls.loadInferencer(inferencerType, file, gpu)
|
||||
cls.currentInferencer = cls.loadInferencer(inferencerType, file, gpu, inferencerTypeVersion)
|
||||
return cls.currentInferencer
|
||||
|
||||
@classmethod
|
||||
@ -30,6 +31,7 @@ class InferencerManager:
|
||||
inferencerType: EnumInferenceTypes,
|
||||
file: str,
|
||||
gpu: int,
|
||||
inferencerTypeVersion: str | None = None,
|
||||
) -> Inferencer:
|
||||
if inferencerType == EnumInferenceTypes.pyTorchRVC or inferencerType == EnumInferenceTypes.pyTorchRVC.value:
|
||||
return RVCInferencer().loadModel(file, gpu)
|
||||
@ -50,8 +52,8 @@ class InferencerManager:
|
||||
elif inferencerType == EnumInferenceTypes.pyTorchWebUINono or inferencerType == EnumInferenceTypes.pyTorchWebUINono.value:
|
||||
return WebUIInferencerNono().loadModel(file, gpu)
|
||||
elif inferencerType == EnumInferenceTypes.onnxRVC or inferencerType == EnumInferenceTypes.onnxRVC.value:
|
||||
return OnnxRVCInferencer().loadModel(file, gpu)
|
||||
return OnnxRVCInferencer().loadModel(file, gpu, inferencerTypeVersion)
|
||||
elif inferencerType == EnumInferenceTypes.onnxRVCNono or inferencerType == EnumInferenceTypes.onnxRVCNono.value:
|
||||
return OnnxRVCInferencerNono().loadModel(file, gpu)
|
||||
return OnnxRVCInferencerNono().loadModel(file, gpu, inferencerTypeVersion)
|
||||
else:
|
||||
raise RuntimeError("[Voice Changer] Inferencer not found", inferencerType)
|
||||
|
@ -7,7 +7,7 @@ import numpy as np
|
||||
|
||||
|
||||
class OnnxRVCInferencer(Inferencer):
|
||||
def loadModel(self, file: str, gpu: int):
|
||||
def loadModel(self, file: str, gpu: int, inferencerTypeVersion: str | None = None):
|
||||
self.setProps(EnumInferenceTypes.onnxRVC, file, True, gpu)
|
||||
(
|
||||
onnxProviders,
|
||||
@ -26,6 +26,9 @@ class OnnxRVCInferencer(Inferencer):
|
||||
self.isHalf = True
|
||||
|
||||
self.model = onnx_session
|
||||
|
||||
self.inferencerTypeVersion = inferencerTypeVersion
|
||||
|
||||
return self
|
||||
|
||||
def infer(
|
||||
@ -66,7 +69,14 @@ class OnnxRVCInferencer(Inferencer):
|
||||
},
|
||||
)
|
||||
|
||||
return torch.tensor(np.array(audio1))
|
||||
if self.inferencerTypeVersion == "v2.1" or self.inferencerTypeVersion == "v1.1":
|
||||
res = audio1[0]
|
||||
else:
|
||||
res = np.array(audio1)[0][0, 0]
|
||||
res = np.clip(res, -1.0, 1.0)
|
||||
return torch.tensor(res)
|
||||
|
||||
# return torch.tensor(np.array(audio1))
|
||||
|
||||
def getInferencerInfo(self):
|
||||
inferencer = super().getInferencerInfo()
|
||||
|
@ -6,8 +6,8 @@ from voice_changer.RVC.inferencer.OnnxRVCInferencer import OnnxRVCInferencer
|
||||
|
||||
|
||||
class OnnxRVCInferencerNono(OnnxRVCInferencer):
|
||||
def loadModel(self, file: str, gpu: int):
|
||||
super().loadModel(file, gpu)
|
||||
def loadModel(self, file: str, gpu: int, inferencerTypeVersion: str | None = None):
|
||||
super().loadModel(file, gpu, inferencerTypeVersion)
|
||||
self.setProps(EnumInferenceTypes.onnxRVCNono, file, self.isHalf, gpu)
|
||||
return self
|
||||
|
||||
@ -39,4 +39,9 @@ class OnnxRVCInferencerNono(OnnxRVCInferencer):
|
||||
},
|
||||
)
|
||||
|
||||
return torch.tensor(np.array(audio1))
|
||||
if self.inferencerTypeVersion == "v2.1" or self.inferencerTypeVersion == "v1.1":
|
||||
res = audio1[0]
|
||||
else:
|
||||
res = np.array(audio1)[0][0, 0]
|
||||
res = np.clip(res, -1.0, 1.0)
|
||||
return torch.tensor(res)
|
||||
|
@ -35,4 +35,8 @@ class RVCInferencer(Inferencer):
|
||||
sid: torch.Tensor,
|
||||
convert_length: int | None,
|
||||
) -> torch.Tensor:
|
||||
return self.model.infer(feats, pitch_length, pitch, pitchf, sid, convert_length=convert_length)
|
||||
res = self.model.infer(feats, pitch_length, pitch, pitchf, sid, convert_length=convert_length)
|
||||
res = res[0][0, 0].to(dtype=torch.float32)
|
||||
res = torch.clip(res, -1.0, 1.0)
|
||||
return res
|
||||
|
||||
|
@ -35,4 +35,7 @@ class RVCInferencerNono(Inferencer):
|
||||
sid: torch.Tensor,
|
||||
convert_length: int | None,
|
||||
) -> torch.Tensor:
|
||||
return self.model.infer(feats, pitch_length, sid, convert_length=convert_length)
|
||||
res = self.model.infer(feats, pitch_length, sid, convert_length=convert_length)
|
||||
res = res[0][0, 0].to(dtype=torch.float32)
|
||||
res = torch.clip(res, -1.0, 1.0)
|
||||
return res
|
||||
|
@ -34,4 +34,8 @@ class RVCInferencerv2(Inferencer):
|
||||
sid: torch.Tensor,
|
||||
convert_length: int | None,
|
||||
) -> torch.Tensor:
|
||||
return self.model.infer(feats, pitch_length, pitch, pitchf, sid, convert_length=convert_length)
|
||||
res = self.model.infer(feats, pitch_length, pitch, pitchf, sid, convert_length=convert_length)
|
||||
res = res[0][0, 0].to(dtype=torch.float32)
|
||||
res = torch.clip(res, -1.0, 1.0)
|
||||
return res
|
||||
|
||||
|
@ -35,4 +35,7 @@ class RVCInferencerv2Nono(Inferencer):
|
||||
sid: torch.Tensor,
|
||||
convert_length: int | None,
|
||||
) -> torch.Tensor:
|
||||
return self.model.infer(feats, pitch_length, sid, convert_length=convert_length)
|
||||
res = self.model.infer(feats, pitch_length, sid, convert_length=convert_length)
|
||||
res = res[0][0, 0].to(dtype=torch.float32)
|
||||
res = torch.clip(res, -1.0, 1.0)
|
||||
return res
|
@ -35,4 +35,8 @@ class WebUIInferencer(Inferencer):
|
||||
sid: torch.Tensor,
|
||||
convert_length: int | None,
|
||||
) -> torch.Tensor:
|
||||
return self.model.infer(feats, pitch_length, pitch, pitchf, sid, convert_length=convert_length)
|
||||
res = self.model.infer(feats, pitch_length, pitch, pitchf, sid, convert_length=convert_length)
|
||||
res = res[0][0, 0].to(dtype=torch.float32)
|
||||
res = torch.clip(res, -1.0, 1.0)
|
||||
return res
|
||||
|
||||
|
@ -35,4 +35,7 @@ class WebUIInferencerNono(Inferencer):
|
||||
sid: torch.Tensor,
|
||||
convert_length: int | None,
|
||||
) -> torch.Tensor:
|
||||
return self.model.infer(feats, pitch_length, sid, convert_length=convert_length)
|
||||
res = self.model.infer(feats, pitch_length, sid, convert_length=convert_length)
|
||||
res = res[0][0, 0].to(dtype=torch.float32)
|
||||
res = torch.clip(res, -1.0, 1.0)
|
||||
return res
|
@ -64,5 +64,7 @@ class SynthesizerTrnMs768NSFsid_ONNX(nn.Module):
|
||||
m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths)
|
||||
z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask
|
||||
z = self.flow(z_p, x_mask, g=g, reverse=True)
|
||||
o = self.dec.infer_realtime((z * x_mask)[:, :, :max_len], nsff0, g=g, convert_length=convert_length)
|
||||
return o, x_mask, (z, z_p, m_p, logs_p)
|
||||
# o = self.dec.infer_realtime((z * x_mask)[:, :, :max_len], nsff0, g=g, convert_length=convert_length)
|
||||
o = self.dec((z * x_mask)[:, :, :max_len], nsff0, g=g)
|
||||
o = torch.clip(o[0, 0], -1.0, 1.0)
|
||||
return o
|
@ -37,7 +37,7 @@ def export2onnx(gpu: int, modelSlot: RVCModelSlot):
|
||||
output_path_simple = os.path.join(TMP_DIR, output_file_simple)
|
||||
metadata = {
|
||||
"application": "VC_CLIENT",
|
||||
"version": "2",
|
||||
"version": "2.1",
|
||||
"modelType": modelSlot.modelType,
|
||||
"samplingRate": modelSlot.samplingRate,
|
||||
"f0": modelSlot.f0,
|
||||
|
@ -118,14 +118,8 @@ class Pipeline(object):
|
||||
try:
|
||||
with torch.no_grad():
|
||||
with autocast(enabled=self.isHalf):
|
||||
audio1 = (
|
||||
torch.clip(
|
||||
self.inferencer.infer(feats, p_len, pitch, pitchf, sid, out_size)[0][0, 0].to(dtype=torch.float32),
|
||||
-1.0,
|
||||
1.0,
|
||||
)
|
||||
* 32767.5
|
||||
).data.to(dtype=torch.int16)
|
||||
audio1 = self.inferencer.infer(feats, p_len, pitch, pitchf, sid, out_size)
|
||||
audio1 = (audio1 * 32767.5).data.to(dtype=torch.int16)
|
||||
return audio1
|
||||
except RuntimeError as e:
|
||||
if "HALF" in e.__str__().upper():
|
||||
|
@ -19,7 +19,7 @@ def createPipeline(params: VoiceChangerParams, modelSlot: RVCModelSlot, gpu: int
|
||||
# Inferencer 生成
|
||||
try:
|
||||
modelPath = os.path.join(params.model_dir, str(modelSlot.slotIndex), os.path.basename(modelSlot.modelFile))
|
||||
inferencer = InferencerManager.getInferencer(modelSlot.modelType, modelPath, gpu)
|
||||
inferencer = InferencerManager.getInferencer(modelSlot.modelType, modelPath, gpu, modelSlot.version)
|
||||
except Exception as e:
|
||||
print("[Voice Changer] exception! loading inferencer", e)
|
||||
traceback.print_exc()
|
||||
|
Loading…
Reference in New Issue
Block a user