voice-changer/server/voice_changer/RVC/ModelWrapper.py

131 lines
4.5 KiB
Python
Raw Normal View History

2023-04-07 21:11:37 +03:00
import onnxruntime
import torch
import numpy as np
import json
2023-04-28 02:46:34 +03:00
2023-04-07 21:11:37 +03:00
# providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
providers = ["CPUExecutionProvider"]
class ModelWrapper:
2023-04-14 10:38:08 +03:00
def __init__(self, onnx_model):
2023-04-07 21:11:37 +03:00
self.onnx_model = onnx_model
# ort_options = onnxruntime.SessionOptions()
# ort_options.intra_op_num_threads = 8
self.onnx_session = onnxruntime.InferenceSession(
2023-04-28 02:46:34 +03:00
self.onnx_model, providers=providers
2023-04-07 21:11:37 +03:00
)
# input_info = s
2023-04-14 10:38:08 +03:00
first_input_type = self.onnx_session.get_inputs()[0].type
if first_input_type == "tensor(float)":
self.is_half = False
else:
self.is_half = True
modelmeta = self.onnx_session.get_modelmeta()
try:
metadata = json.loads(modelmeta.custom_metadata_map["metadata"])
self.samplingRate = metadata["samplingRate"]
self.f0 = metadata["f0"]
2023-04-24 11:39:31 +03:00
self.embChannels = metadata["embChannels"]
2023-04-25 10:15:13 +03:00
self.modelType = metadata["modelType"]
self.deprecated = False
2023-04-28 02:46:34 +03:00
self.embedder = (
metadata["embedder"] if "embedder" in metadata else "hubert_base"
)
print(
f"[Voice Changer] Onnx metadata: sr:{self.samplingRate}, f0:{self.f0}, embedder:{self.embedder}"
)
except:
2023-04-24 21:03:38 +03:00
self.samplingRate = 48000
self.f0 = True
2023-04-24 21:03:38 +03:00
self.embChannels = 256
2023-04-25 10:15:13 +03:00
self.modelType = 0
self.deprecated = True
2023-04-27 17:38:25 +03:00
self.embedder = "hubert_base"
2023-04-28 02:46:34 +03:00
print(
"[Voice Changer] ############## !!!! CAUTION !!!! ####################"
)
print(
"[Voice Changer] This onnx's version is depricated. Please regenerate onnxfile. Fallback to default"
)
print(
f"[Voice Changer] Onnx metadata: sr:{self.samplingRate}, f0:{self.f0}"
)
print(
"[Voice Changer] ############## !!!! CAUTION !!!! ####################"
)
def getSamplingRate(self):
return self.samplingRate
def getF0(self):
return self.f0
2023-04-07 21:11:37 +03:00
2023-04-24 11:39:31 +03:00
def getEmbChannels(self):
return self.embChannels
2023-04-25 10:15:13 +03:00
def getModelType(self):
return self.modelType
def getDeprecated(self):
return self.deprecated
2023-04-27 17:38:25 +03:00
def getEmbedder(self):
return self.embedder
2023-04-07 21:11:37 +03:00
def set_providers(self, providers, provider_options=[{}]):
2023-04-28 02:46:34 +03:00
self.onnx_session.set_providers(
providers=providers, provider_options=provider_options
)
2023-04-07 21:11:37 +03:00
def get_providers(self):
return self.onnx_session.get_providers()
def infer_pitchless(self, feats, p_len, sid):
2023-04-07 21:11:37 +03:00
if self.is_half:
audio1 = self.onnx_session.run(
["audio"],
{
"feats": feats.cpu().numpy().astype(np.float16),
"p_len": p_len.cpu().numpy().astype(np.int64),
"sid": sid.cpu().numpy().astype(np.int64),
2023-04-28 02:46:34 +03:00
},
)
else:
audio1 = self.onnx_session.run(
["audio"],
{
"feats": feats.cpu().numpy().astype(np.float32),
"p_len": p_len.cpu().numpy().astype(np.int64),
"sid": sid.cpu().numpy().astype(np.int64),
2023-04-28 02:46:34 +03:00
},
)
return torch.tensor(np.array(audio1))
2023-04-07 21:11:37 +03:00
def infer(self, feats, p_len, pitch, pitchf, sid):
if self.is_half:
2023-04-07 21:11:37 +03:00
audio1 = self.onnx_session.run(
["audio"],
{
"feats": feats.cpu().numpy().astype(np.float16),
"p_len": p_len.cpu().numpy().astype(np.int64),
"pitch": pitch.cpu().numpy().astype(np.int64),
"pitchf": pitchf.cpu().numpy().astype(np.float32),
"sid": sid.cpu().numpy().astype(np.int64),
2023-04-28 02:46:34 +03:00
},
)
2023-04-07 21:11:37 +03:00
else:
audio1 = self.onnx_session.run(
["audio"],
{
2023-04-14 10:38:08 +03:00
"feats": feats.cpu().numpy().astype(np.float32),
"p_len": p_len.cpu().numpy().astype(np.int64),
"pitch": pitch.cpu().numpy().astype(np.int64),
"pitchf": pitchf.cpu().numpy().astype(np.float32),
"sid": sid.cpu().numpy().astype(np.int64),
2023-04-28 02:46:34 +03:00
},
)
2023-04-07 21:11:37 +03:00
return torch.tensor(np.array(audio1))