voice-changer/server/voice_changer/RVC/ModelWrapper.py

60 lines
2.2 KiB
Python
Raw Normal View History

2023-04-07 21:11:37 +03:00
import onnxruntime
import torch
import numpy as np
# providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
providers = ["CPUExecutionProvider"]
class ModelWrapper:
2023-04-14 10:38:08 +03:00
def __init__(self, onnx_model):
2023-04-07 21:11:37 +03:00
self.onnx_model = onnx_model
# ort_options = onnxruntime.SessionOptions()
# ort_options.intra_op_num_threads = 8
self.onnx_session = onnxruntime.InferenceSession(
self.onnx_model,
providers=providers
)
# input_info = s
2023-04-14 10:38:08 +03:00
first_input_type = self.onnx_session.get_inputs()[0].type
if first_input_type == "tensor(float)":
self.is_half = False
else:
self.is_half = True
2023-04-07 21:11:37 +03:00
def set_providers(self, providers, provider_options=[{}]):
self.onnx_session.set_providers(providers=providers, provider_options=provider_options)
def get_providers(self):
return self.onnx_session.get_providers()
def infer(self, feats, p_len, pitch, pitchf, sid):
if self.is_half:
2023-04-09 04:42:01 +03:00
# print("feats", feats.cpu().numpy().dtype)
# print("p_len", p_len.cpu().numpy().dtype)
# print("pitch", pitch.cpu().numpy().dtype)
# print("pitchf", pitchf.cpu().numpy().dtype)
# print("sid", sid.cpu().numpy().dtype)
2023-04-07 21:11:37 +03:00
audio1 = self.onnx_session.run(
["audio"],
{
"feats": feats.cpu().numpy().astype(np.float16),
"p_len": p_len.cpu().numpy().astype(np.int64),
"pitch": pitch.cpu().numpy().astype(np.int64),
"pitchf": pitchf.cpu().numpy().astype(np.float32),
"sid": sid.cpu().numpy().astype(np.int64),
})
else:
audio1 = self.onnx_session.run(
["audio"],
{
2023-04-14 10:38:08 +03:00
"feats": feats.cpu().numpy().astype(np.float32),
"p_len": p_len.cpu().numpy().astype(np.int64),
"pitch": pitch.cpu().numpy().astype(np.int64),
"pitchf": pitchf.cpu().numpy().astype(np.float32),
"sid": sid.cpu().numpy().astype(np.int64),
2023-04-07 21:11:37 +03:00
})
return torch.tensor(np.array(audio1))