WIP onnx improve

This commit is contained in:
w-okada 2023-09-06 08:04:39 +09:00
parent 93480636a3
commit 80a5ba91b8
15 changed files with 72 additions and 26 deletions

View File

@ -40,6 +40,8 @@ class RVCModelSlot(ModelSlot):
sampleId: str = "" sampleId: str = ""
speakers: dict = field(default_factory=lambda: {0: "target"}) speakers: dict = field(default_factory=lambda: {0: "target"})
version:str = "v2"
@dataclass @dataclass
class MMVCv13ModelSlot(ModelSlot): class MMVCv13ModelSlot(ModelSlot):

View File

@ -154,6 +154,16 @@ class RVCModelSlotGenerator(ModelSlotGenerator):
slot.samplingRate = metadata["samplingRate"] slot.samplingRate = metadata["samplingRate"]
slot.deprecated = False slot.deprecated = False
if slot.embChannels == 256:
if metadata["version"] == "2.1":
slot.version = "v1.1" # 1.1はclipをonnx内部で実施. realtimeをdisable
else:
slot.version = "v1"
elif metadata["version"] == "2":
slot.version = "v2"
elif metadata["version"] == "2.1": # 2.1はclipをonnx内部で実施. realtimeをdisable
slot.version = "v2.1"
except Exception as e: except Exception as e:
slot.modelType = EnumInferenceTypes.onnxRVC.value slot.modelType = EnumInferenceTypes.onnxRVC.value
slot.embChannels = 256 slot.embChannels = 256

View File

@ -20,8 +20,9 @@ class InferencerManager:
inferencerType: EnumInferenceTypes, inferencerType: EnumInferenceTypes,
file: str, file: str,
gpu: int, gpu: int,
inferencerTypeVersion: str | None = None,
) -> Inferencer: ) -> Inferencer:
cls.currentInferencer = cls.loadInferencer(inferencerType, file, gpu) cls.currentInferencer = cls.loadInferencer(inferencerType, file, gpu, inferencerTypeVersion)
return cls.currentInferencer return cls.currentInferencer
@classmethod @classmethod
@ -30,6 +31,7 @@ class InferencerManager:
inferencerType: EnumInferenceTypes, inferencerType: EnumInferenceTypes,
file: str, file: str,
gpu: int, gpu: int,
inferencerTypeVersion: str | None = None,
) -> Inferencer: ) -> Inferencer:
if inferencerType == EnumInferenceTypes.pyTorchRVC or inferencerType == EnumInferenceTypes.pyTorchRVC.value: if inferencerType == EnumInferenceTypes.pyTorchRVC or inferencerType == EnumInferenceTypes.pyTorchRVC.value:
return RVCInferencer().loadModel(file, gpu) return RVCInferencer().loadModel(file, gpu)
@ -50,8 +52,8 @@ class InferencerManager:
elif inferencerType == EnumInferenceTypes.pyTorchWebUINono or inferencerType == EnumInferenceTypes.pyTorchWebUINono.value: elif inferencerType == EnumInferenceTypes.pyTorchWebUINono or inferencerType == EnumInferenceTypes.pyTorchWebUINono.value:
return WebUIInferencerNono().loadModel(file, gpu) return WebUIInferencerNono().loadModel(file, gpu)
elif inferencerType == EnumInferenceTypes.onnxRVC or inferencerType == EnumInferenceTypes.onnxRVC.value: elif inferencerType == EnumInferenceTypes.onnxRVC or inferencerType == EnumInferenceTypes.onnxRVC.value:
return OnnxRVCInferencer().loadModel(file, gpu) return OnnxRVCInferencer().loadModel(file, gpu, inferencerTypeVersion)
elif inferencerType == EnumInferenceTypes.onnxRVCNono or inferencerType == EnumInferenceTypes.onnxRVCNono.value: elif inferencerType == EnumInferenceTypes.onnxRVCNono or inferencerType == EnumInferenceTypes.onnxRVCNono.value:
return OnnxRVCInferencerNono().loadModel(file, gpu) return OnnxRVCInferencerNono().loadModel(file, gpu, inferencerTypeVersion)
else: else:
raise RuntimeError("[Voice Changer] Inferencer not found", inferencerType) raise RuntimeError("[Voice Changer] Inferencer not found", inferencerType)

View File

@ -7,7 +7,7 @@ import numpy as np
class OnnxRVCInferencer(Inferencer): class OnnxRVCInferencer(Inferencer):
def loadModel(self, file: str, gpu: int): def loadModel(self, file: str, gpu: int, inferencerTypeVersion: str | None = None):
self.setProps(EnumInferenceTypes.onnxRVC, file, True, gpu) self.setProps(EnumInferenceTypes.onnxRVC, file, True, gpu)
( (
onnxProviders, onnxProviders,
@ -26,6 +26,9 @@ class OnnxRVCInferencer(Inferencer):
self.isHalf = True self.isHalf = True
self.model = onnx_session self.model = onnx_session
self.inferencerTypeVersion = inferencerTypeVersion
return self return self
def infer( def infer(
@ -66,7 +69,14 @@ class OnnxRVCInferencer(Inferencer):
}, },
) )
return torch.tensor(np.array(audio1)) if self.inferencerTypeVersion == "v2.1" or self.inferencerTypeVersion == "v1.1":
res = audio1[0]
else:
res = np.array(audio1)[0][0, 0]
res = np.clip(res, -1.0, 1.0)
return torch.tensor(res)
# return torch.tensor(np.array(audio1))
def getInferencerInfo(self): def getInferencerInfo(self):
inferencer = super().getInferencerInfo() inferencer = super().getInferencerInfo()

View File

@ -6,8 +6,8 @@ from voice_changer.RVC.inferencer.OnnxRVCInferencer import OnnxRVCInferencer
class OnnxRVCInferencerNono(OnnxRVCInferencer): class OnnxRVCInferencerNono(OnnxRVCInferencer):
def loadModel(self, file: str, gpu: int): def loadModel(self, file: str, gpu: int, inferencerTypeVersion: str | None = None):
super().loadModel(file, gpu) super().loadModel(file, gpu, inferencerTypeVersion)
self.setProps(EnumInferenceTypes.onnxRVCNono, file, self.isHalf, gpu) self.setProps(EnumInferenceTypes.onnxRVCNono, file, self.isHalf, gpu)
return self return self
@ -39,4 +39,9 @@ class OnnxRVCInferencerNono(OnnxRVCInferencer):
}, },
) )
return torch.tensor(np.array(audio1)) if self.inferencerTypeVersion == "v2.1" or self.inferencerTypeVersion == "v1.1":
res = audio1[0]
else:
res = np.array(audio1)[0][0, 0]
res = np.clip(res, -1.0, 1.0)
return torch.tensor(res)

View File

@ -35,4 +35,8 @@ class RVCInferencer(Inferencer):
sid: torch.Tensor, sid: torch.Tensor,
convert_length: int | None, convert_length: int | None,
) -> torch.Tensor: ) -> torch.Tensor:
return self.model.infer(feats, pitch_length, pitch, pitchf, sid, convert_length=convert_length) res = self.model.infer(feats, pitch_length, pitch, pitchf, sid, convert_length=convert_length)
res = res[0][0, 0].to(dtype=torch.float32)
res = torch.clip(res, -1.0, 1.0)
return res

View File

@ -35,4 +35,7 @@ class RVCInferencerNono(Inferencer):
sid: torch.Tensor, sid: torch.Tensor,
convert_length: int | None, convert_length: int | None,
) -> torch.Tensor: ) -> torch.Tensor:
return self.model.infer(feats, pitch_length, sid, convert_length=convert_length) res = self.model.infer(feats, pitch_length, sid, convert_length=convert_length)
res = res[0][0, 0].to(dtype=torch.float32)
res = torch.clip(res, -1.0, 1.0)
return res

View File

@ -34,4 +34,8 @@ class RVCInferencerv2(Inferencer):
sid: torch.Tensor, sid: torch.Tensor,
convert_length: int | None, convert_length: int | None,
) -> torch.Tensor: ) -> torch.Tensor:
return self.model.infer(feats, pitch_length, pitch, pitchf, sid, convert_length=convert_length) res = self.model.infer(feats, pitch_length, pitch, pitchf, sid, convert_length=convert_length)
res = res[0][0, 0].to(dtype=torch.float32)
res = torch.clip(res, -1.0, 1.0)
return res

View File

@ -35,4 +35,7 @@ class RVCInferencerv2Nono(Inferencer):
sid: torch.Tensor, sid: torch.Tensor,
convert_length: int | None, convert_length: int | None,
) -> torch.Tensor: ) -> torch.Tensor:
return self.model.infer(feats, pitch_length, sid, convert_length=convert_length) res = self.model.infer(feats, pitch_length, sid, convert_length=convert_length)
res = res[0][0, 0].to(dtype=torch.float32)
res = torch.clip(res, -1.0, 1.0)
return res

View File

@ -35,4 +35,8 @@ class WebUIInferencer(Inferencer):
sid: torch.Tensor, sid: torch.Tensor,
convert_length: int | None, convert_length: int | None,
) -> torch.Tensor: ) -> torch.Tensor:
return self.model.infer(feats, pitch_length, pitch, pitchf, sid, convert_length=convert_length) res = self.model.infer(feats, pitch_length, pitch, pitchf, sid, convert_length=convert_length)
res = res[0][0, 0].to(dtype=torch.float32)
res = torch.clip(res, -1.0, 1.0)
return res

View File

@ -35,4 +35,7 @@ class WebUIInferencerNono(Inferencer):
sid: torch.Tensor, sid: torch.Tensor,
convert_length: int | None, convert_length: int | None,
) -> torch.Tensor: ) -> torch.Tensor:
return self.model.infer(feats, pitch_length, sid, convert_length=convert_length) res = self.model.infer(feats, pitch_length, sid, convert_length=convert_length)
res = res[0][0, 0].to(dtype=torch.float32)
res = torch.clip(res, -1.0, 1.0)
return res

View File

@ -64,5 +64,7 @@ class SynthesizerTrnMs768NSFsid_ONNX(nn.Module):
m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths) m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths)
z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask
z = self.flow(z_p, x_mask, g=g, reverse=True) z = self.flow(z_p, x_mask, g=g, reverse=True)
o = self.dec.infer_realtime((z * x_mask)[:, :, :max_len], nsff0, g=g, convert_length=convert_length) # o = self.dec.infer_realtime((z * x_mask)[:, :, :max_len], nsff0, g=g, convert_length=convert_length)
return o, x_mask, (z, z_p, m_p, logs_p) o = self.dec((z * x_mask)[:, :, :max_len], nsff0, g=g)
o = torch.clip(o[0, 0], -1.0, 1.0)
return o

View File

@ -37,7 +37,7 @@ def export2onnx(gpu: int, modelSlot: RVCModelSlot):
output_path_simple = os.path.join(TMP_DIR, output_file_simple) output_path_simple = os.path.join(TMP_DIR, output_file_simple)
metadata = { metadata = {
"application": "VC_CLIENT", "application": "VC_CLIENT",
"version": "2", "version": "2.1",
"modelType": modelSlot.modelType, "modelType": modelSlot.modelType,
"samplingRate": modelSlot.samplingRate, "samplingRate": modelSlot.samplingRate,
"f0": modelSlot.f0, "f0": modelSlot.f0,

View File

@ -118,14 +118,8 @@ class Pipeline(object):
try: try:
with torch.no_grad(): with torch.no_grad():
with autocast(enabled=self.isHalf): with autocast(enabled=self.isHalf):
audio1 = ( audio1 = self.inferencer.infer(feats, p_len, pitch, pitchf, sid, out_size)
torch.clip( audio1 = (audio1 * 32767.5).data.to(dtype=torch.int16)
self.inferencer.infer(feats, p_len, pitch, pitchf, sid, out_size)[0][0, 0].to(dtype=torch.float32),
-1.0,
1.0,
)
* 32767.5
).data.to(dtype=torch.int16)
return audio1 return audio1
except RuntimeError as e: except RuntimeError as e:
if "HALF" in e.__str__().upper(): if "HALF" in e.__str__().upper():

View File

@ -19,7 +19,7 @@ def createPipeline(params: VoiceChangerParams, modelSlot: RVCModelSlot, gpu: int
# Inferencer 生成 # Inferencer 生成
try: try:
modelPath = os.path.join(params.model_dir, str(modelSlot.slotIndex), os.path.basename(modelSlot.modelFile)) modelPath = os.path.join(params.model_dir, str(modelSlot.slotIndex), os.path.basename(modelSlot.modelFile))
inferencer = InferencerManager.getInferencer(modelSlot.modelType, modelPath, gpu) inferencer = InferencerManager.getInferencer(modelSlot.modelType, modelPath, gpu, modelSlot.version)
except Exception as e: except Exception as e:
print("[Voice Changer] exception! loading inferencer", e) print("[Voice Changer] exception! loading inferencer", e)
traceback.print_exc() traceback.print_exc()