voice-changer/server/voice_changer/RVC/inferencer/OnnxRVCInferencerNono.py
2023-05-03 13:14:00 +09:00

60 lines
1.8 KiB
Python

import torch
from torch import device
import onnxruntime
from const import EnumInferenceTypes
import numpy as np
from voice_changer.RVC.inferencer.OnnxRVCInferencer import OnnxRVCInferencer
providers = ["CPUExecutionProvider"]
class OnnxRVCInferencerNono(OnnxRVCInferencer):
def loadModel(self, file: str, dev: device, isHalf: bool = True):
super().setProps(EnumInferenceTypes.onnxRVC, file, dev, isHalf)
# ort_options = onnxruntime.SessionOptions()
# ort_options.intra_op_num_threads = 8
onnx_session = onnxruntime.InferenceSession(
self.onnx_model, providers=providers
)
# check half-precision
first_input_type = self.onnx_session.get_inputs()[0].type
if first_input_type == "tensor(float)":
self.isHalf = False
else:
self.isHalf = True
self.model = onnx_session
return self
def infer(
self,
feats: torch.Tensor,
pitch_length: torch.Tensor,
pitch: torch.Tensor | None,
pitchf: torch.Tensor | None,
sid: torch.Tensor,
) -> torch.Tensor:
if self.isHalf:
audio1 = self.model.run(
["audio"],
{
"feats": feats.cpu().numpy().astype(np.float16),
"p_len": pitch_length.cpu().numpy().astype(np.int64),
"sid": sid.cpu().numpy().astype(np.int64),
},
)
else:
audio1 = self.model.run(
["audio"],
{
"feats": feats.cpu().numpy().astype(np.float32),
"p_len": pitch_length.cpu().numpy().astype(np.int64),
"sid": sid.cpu().numpy().astype(np.int64),
},
)
return torch.tensor(np.array(audio1))