mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-01-23 21:45:00 +03:00
WIP onnx improve
This commit is contained in:
parent
93480636a3
commit
80a5ba91b8
@ -40,6 +40,8 @@ class RVCModelSlot(ModelSlot):
|
|||||||
sampleId: str = ""
|
sampleId: str = ""
|
||||||
speakers: dict = field(default_factory=lambda: {0: "target"})
|
speakers: dict = field(default_factory=lambda: {0: "target"})
|
||||||
|
|
||||||
|
version:str = "v2"
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class MMVCv13ModelSlot(ModelSlot):
|
class MMVCv13ModelSlot(ModelSlot):
|
||||||
|
@ -154,6 +154,16 @@ class RVCModelSlotGenerator(ModelSlotGenerator):
|
|||||||
slot.samplingRate = metadata["samplingRate"]
|
slot.samplingRate = metadata["samplingRate"]
|
||||||
slot.deprecated = False
|
slot.deprecated = False
|
||||||
|
|
||||||
|
if slot.embChannels == 256:
|
||||||
|
if metadata["version"] == "2.1":
|
||||||
|
slot.version = "v1.1" # 1.1はclipをonnx内部で実施. realtimeをdisable
|
||||||
|
else:
|
||||||
|
slot.version = "v1"
|
||||||
|
elif metadata["version"] == "2":
|
||||||
|
slot.version = "v2"
|
||||||
|
elif metadata["version"] == "2.1": # 2.1はclipをonnx内部で実施. realtimeをdisable
|
||||||
|
slot.version = "v2.1"
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
slot.modelType = EnumInferenceTypes.onnxRVC.value
|
slot.modelType = EnumInferenceTypes.onnxRVC.value
|
||||||
slot.embChannels = 256
|
slot.embChannels = 256
|
||||||
|
@ -20,8 +20,9 @@ class InferencerManager:
|
|||||||
inferencerType: EnumInferenceTypes,
|
inferencerType: EnumInferenceTypes,
|
||||||
file: str,
|
file: str,
|
||||||
gpu: int,
|
gpu: int,
|
||||||
|
inferencerTypeVersion: str | None = None,
|
||||||
) -> Inferencer:
|
) -> Inferencer:
|
||||||
cls.currentInferencer = cls.loadInferencer(inferencerType, file, gpu)
|
cls.currentInferencer = cls.loadInferencer(inferencerType, file, gpu, inferencerTypeVersion)
|
||||||
return cls.currentInferencer
|
return cls.currentInferencer
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -30,6 +31,7 @@ class InferencerManager:
|
|||||||
inferencerType: EnumInferenceTypes,
|
inferencerType: EnumInferenceTypes,
|
||||||
file: str,
|
file: str,
|
||||||
gpu: int,
|
gpu: int,
|
||||||
|
inferencerTypeVersion: str | None = None,
|
||||||
) -> Inferencer:
|
) -> Inferencer:
|
||||||
if inferencerType == EnumInferenceTypes.pyTorchRVC or inferencerType == EnumInferenceTypes.pyTorchRVC.value:
|
if inferencerType == EnumInferenceTypes.pyTorchRVC or inferencerType == EnumInferenceTypes.pyTorchRVC.value:
|
||||||
return RVCInferencer().loadModel(file, gpu)
|
return RVCInferencer().loadModel(file, gpu)
|
||||||
@ -50,8 +52,8 @@ class InferencerManager:
|
|||||||
elif inferencerType == EnumInferenceTypes.pyTorchWebUINono or inferencerType == EnumInferenceTypes.pyTorchWebUINono.value:
|
elif inferencerType == EnumInferenceTypes.pyTorchWebUINono or inferencerType == EnumInferenceTypes.pyTorchWebUINono.value:
|
||||||
return WebUIInferencerNono().loadModel(file, gpu)
|
return WebUIInferencerNono().loadModel(file, gpu)
|
||||||
elif inferencerType == EnumInferenceTypes.onnxRVC or inferencerType == EnumInferenceTypes.onnxRVC.value:
|
elif inferencerType == EnumInferenceTypes.onnxRVC or inferencerType == EnumInferenceTypes.onnxRVC.value:
|
||||||
return OnnxRVCInferencer().loadModel(file, gpu)
|
return OnnxRVCInferencer().loadModel(file, gpu, inferencerTypeVersion)
|
||||||
elif inferencerType == EnumInferenceTypes.onnxRVCNono or inferencerType == EnumInferenceTypes.onnxRVCNono.value:
|
elif inferencerType == EnumInferenceTypes.onnxRVCNono or inferencerType == EnumInferenceTypes.onnxRVCNono.value:
|
||||||
return OnnxRVCInferencerNono().loadModel(file, gpu)
|
return OnnxRVCInferencerNono().loadModel(file, gpu, inferencerTypeVersion)
|
||||||
else:
|
else:
|
||||||
raise RuntimeError("[Voice Changer] Inferencer not found", inferencerType)
|
raise RuntimeError("[Voice Changer] Inferencer not found", inferencerType)
|
||||||
|
@ -7,7 +7,7 @@ import numpy as np
|
|||||||
|
|
||||||
|
|
||||||
class OnnxRVCInferencer(Inferencer):
|
class OnnxRVCInferencer(Inferencer):
|
||||||
def loadModel(self, file: str, gpu: int):
|
def loadModel(self, file: str, gpu: int, inferencerTypeVersion: str | None = None):
|
||||||
self.setProps(EnumInferenceTypes.onnxRVC, file, True, gpu)
|
self.setProps(EnumInferenceTypes.onnxRVC, file, True, gpu)
|
||||||
(
|
(
|
||||||
onnxProviders,
|
onnxProviders,
|
||||||
@ -26,6 +26,9 @@ class OnnxRVCInferencer(Inferencer):
|
|||||||
self.isHalf = True
|
self.isHalf = True
|
||||||
|
|
||||||
self.model = onnx_session
|
self.model = onnx_session
|
||||||
|
|
||||||
|
self.inferencerTypeVersion = inferencerTypeVersion
|
||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def infer(
|
def infer(
|
||||||
@ -66,7 +69,14 @@ class OnnxRVCInferencer(Inferencer):
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
return torch.tensor(np.array(audio1))
|
if self.inferencerTypeVersion == "v2.1" or self.inferencerTypeVersion == "v1.1":
|
||||||
|
res = audio1[0]
|
||||||
|
else:
|
||||||
|
res = np.array(audio1)[0][0, 0]
|
||||||
|
res = np.clip(res, -1.0, 1.0)
|
||||||
|
return torch.tensor(res)
|
||||||
|
|
||||||
|
# return torch.tensor(np.array(audio1))
|
||||||
|
|
||||||
def getInferencerInfo(self):
|
def getInferencerInfo(self):
|
||||||
inferencer = super().getInferencerInfo()
|
inferencer = super().getInferencerInfo()
|
||||||
|
@ -6,8 +6,8 @@ from voice_changer.RVC.inferencer.OnnxRVCInferencer import OnnxRVCInferencer
|
|||||||
|
|
||||||
|
|
||||||
class OnnxRVCInferencerNono(OnnxRVCInferencer):
|
class OnnxRVCInferencerNono(OnnxRVCInferencer):
|
||||||
def loadModel(self, file: str, gpu: int):
|
def loadModel(self, file: str, gpu: int, inferencerTypeVersion: str | None = None):
|
||||||
super().loadModel(file, gpu)
|
super().loadModel(file, gpu, inferencerTypeVersion)
|
||||||
self.setProps(EnumInferenceTypes.onnxRVCNono, file, self.isHalf, gpu)
|
self.setProps(EnumInferenceTypes.onnxRVCNono, file, self.isHalf, gpu)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
@ -39,4 +39,9 @@ class OnnxRVCInferencerNono(OnnxRVCInferencer):
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
return torch.tensor(np.array(audio1))
|
if self.inferencerTypeVersion == "v2.1" or self.inferencerTypeVersion == "v1.1":
|
||||||
|
res = audio1[0]
|
||||||
|
else:
|
||||||
|
res = np.array(audio1)[0][0, 0]
|
||||||
|
res = np.clip(res, -1.0, 1.0)
|
||||||
|
return torch.tensor(res)
|
||||||
|
@ -35,4 +35,8 @@ class RVCInferencer(Inferencer):
|
|||||||
sid: torch.Tensor,
|
sid: torch.Tensor,
|
||||||
convert_length: int | None,
|
convert_length: int | None,
|
||||||
) -> torch.Tensor:
|
) -> torch.Tensor:
|
||||||
return self.model.infer(feats, pitch_length, pitch, pitchf, sid, convert_length=convert_length)
|
res = self.model.infer(feats, pitch_length, pitch, pitchf, sid, convert_length=convert_length)
|
||||||
|
res = res[0][0, 0].to(dtype=torch.float32)
|
||||||
|
res = torch.clip(res, -1.0, 1.0)
|
||||||
|
return res
|
||||||
|
|
||||||
|
@ -35,4 +35,7 @@ class RVCInferencerNono(Inferencer):
|
|||||||
sid: torch.Tensor,
|
sid: torch.Tensor,
|
||||||
convert_length: int | None,
|
convert_length: int | None,
|
||||||
) -> torch.Tensor:
|
) -> torch.Tensor:
|
||||||
return self.model.infer(feats, pitch_length, sid, convert_length=convert_length)
|
res = self.model.infer(feats, pitch_length, sid, convert_length=convert_length)
|
||||||
|
res = res[0][0, 0].to(dtype=torch.float32)
|
||||||
|
res = torch.clip(res, -1.0, 1.0)
|
||||||
|
return res
|
||||||
|
@ -34,4 +34,8 @@ class RVCInferencerv2(Inferencer):
|
|||||||
sid: torch.Tensor,
|
sid: torch.Tensor,
|
||||||
convert_length: int | None,
|
convert_length: int | None,
|
||||||
) -> torch.Tensor:
|
) -> torch.Tensor:
|
||||||
return self.model.infer(feats, pitch_length, pitch, pitchf, sid, convert_length=convert_length)
|
res = self.model.infer(feats, pitch_length, pitch, pitchf, sid, convert_length=convert_length)
|
||||||
|
res = res[0][0, 0].to(dtype=torch.float32)
|
||||||
|
res = torch.clip(res, -1.0, 1.0)
|
||||||
|
return res
|
||||||
|
|
||||||
|
@ -35,4 +35,7 @@ class RVCInferencerv2Nono(Inferencer):
|
|||||||
sid: torch.Tensor,
|
sid: torch.Tensor,
|
||||||
convert_length: int | None,
|
convert_length: int | None,
|
||||||
) -> torch.Tensor:
|
) -> torch.Tensor:
|
||||||
return self.model.infer(feats, pitch_length, sid, convert_length=convert_length)
|
res = self.model.infer(feats, pitch_length, sid, convert_length=convert_length)
|
||||||
|
res = res[0][0, 0].to(dtype=torch.float32)
|
||||||
|
res = torch.clip(res, -1.0, 1.0)
|
||||||
|
return res
|
@ -35,4 +35,8 @@ class WebUIInferencer(Inferencer):
|
|||||||
sid: torch.Tensor,
|
sid: torch.Tensor,
|
||||||
convert_length: int | None,
|
convert_length: int | None,
|
||||||
) -> torch.Tensor:
|
) -> torch.Tensor:
|
||||||
return self.model.infer(feats, pitch_length, pitch, pitchf, sid, convert_length=convert_length)
|
res = self.model.infer(feats, pitch_length, pitch, pitchf, sid, convert_length=convert_length)
|
||||||
|
res = res[0][0, 0].to(dtype=torch.float32)
|
||||||
|
res = torch.clip(res, -1.0, 1.0)
|
||||||
|
return res
|
||||||
|
|
||||||
|
@ -35,4 +35,7 @@ class WebUIInferencerNono(Inferencer):
|
|||||||
sid: torch.Tensor,
|
sid: torch.Tensor,
|
||||||
convert_length: int | None,
|
convert_length: int | None,
|
||||||
) -> torch.Tensor:
|
) -> torch.Tensor:
|
||||||
return self.model.infer(feats, pitch_length, sid, convert_length=convert_length)
|
res = self.model.infer(feats, pitch_length, sid, convert_length=convert_length)
|
||||||
|
res = res[0][0, 0].to(dtype=torch.float32)
|
||||||
|
res = torch.clip(res, -1.0, 1.0)
|
||||||
|
return res
|
@ -64,5 +64,7 @@ class SynthesizerTrnMs768NSFsid_ONNX(nn.Module):
|
|||||||
m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths)
|
m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths)
|
||||||
z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask
|
z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask
|
||||||
z = self.flow(z_p, x_mask, g=g, reverse=True)
|
z = self.flow(z_p, x_mask, g=g, reverse=True)
|
||||||
o = self.dec.infer_realtime((z * x_mask)[:, :, :max_len], nsff0, g=g, convert_length=convert_length)
|
# o = self.dec.infer_realtime((z * x_mask)[:, :, :max_len], nsff0, g=g, convert_length=convert_length)
|
||||||
return o, x_mask, (z, z_p, m_p, logs_p)
|
o = self.dec((z * x_mask)[:, :, :max_len], nsff0, g=g)
|
||||||
|
o = torch.clip(o[0, 0], -1.0, 1.0)
|
||||||
|
return o
|
@ -37,7 +37,7 @@ def export2onnx(gpu: int, modelSlot: RVCModelSlot):
|
|||||||
output_path_simple = os.path.join(TMP_DIR, output_file_simple)
|
output_path_simple = os.path.join(TMP_DIR, output_file_simple)
|
||||||
metadata = {
|
metadata = {
|
||||||
"application": "VC_CLIENT",
|
"application": "VC_CLIENT",
|
||||||
"version": "2",
|
"version": "2.1",
|
||||||
"modelType": modelSlot.modelType,
|
"modelType": modelSlot.modelType,
|
||||||
"samplingRate": modelSlot.samplingRate,
|
"samplingRate": modelSlot.samplingRate,
|
||||||
"f0": modelSlot.f0,
|
"f0": modelSlot.f0,
|
||||||
|
@ -118,14 +118,8 @@ class Pipeline(object):
|
|||||||
try:
|
try:
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
with autocast(enabled=self.isHalf):
|
with autocast(enabled=self.isHalf):
|
||||||
audio1 = (
|
audio1 = self.inferencer.infer(feats, p_len, pitch, pitchf, sid, out_size)
|
||||||
torch.clip(
|
audio1 = (audio1 * 32767.5).data.to(dtype=torch.int16)
|
||||||
self.inferencer.infer(feats, p_len, pitch, pitchf, sid, out_size)[0][0, 0].to(dtype=torch.float32),
|
|
||||||
-1.0,
|
|
||||||
1.0,
|
|
||||||
)
|
|
||||||
* 32767.5
|
|
||||||
).data.to(dtype=torch.int16)
|
|
||||||
return audio1
|
return audio1
|
||||||
except RuntimeError as e:
|
except RuntimeError as e:
|
||||||
if "HALF" in e.__str__().upper():
|
if "HALF" in e.__str__().upper():
|
||||||
|
@ -19,7 +19,7 @@ def createPipeline(params: VoiceChangerParams, modelSlot: RVCModelSlot, gpu: int
|
|||||||
# Inferencer 生成
|
# Inferencer 生成
|
||||||
try:
|
try:
|
||||||
modelPath = os.path.join(params.model_dir, str(modelSlot.slotIndex), os.path.basename(modelSlot.modelFile))
|
modelPath = os.path.join(params.model_dir, str(modelSlot.slotIndex), os.path.basename(modelSlot.modelFile))
|
||||||
inferencer = InferencerManager.getInferencer(modelSlot.modelType, modelPath, gpu)
|
inferencer = InferencerManager.getInferencer(modelSlot.modelType, modelPath, gpu, modelSlot.version)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("[Voice Changer] exception! loading inferencer", e)
|
print("[Voice Changer] exception! loading inferencer", e)
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
Loading…
Reference in New Issue
Block a user