voice-changer/server/voice_changer/DiffusionSVC/onnxExporter/export2onnx.py
2023-07-17 07:21:06 +09:00

126 lines
4.3 KiB
Python

import os
import json
import torch
from onnxsim import simplify
import onnx
from const import TMP_DIR, EnumInferenceTypes
from data.ModelSlot import DiffusionSVCModelSlot
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
def export2onnx(gpu: int, modelSlot: DiffusionSVCModelSlot):
modelFile = modelSlot.modelFile
output_file = os.path.splitext(os.path.basename(modelFile))[0] + ".onnx"
output_file_simple = os.path.splitext(os.path.basename(modelFile))[0] + "_simple.onnx"
output_path = os.path.join(TMP_DIR, output_file)
output_path_simple = os.path.join(TMP_DIR, output_file_simple)
metadata = {
"application": "VC_CLIENT",
"version": "3",
"voiceChangerType": modelSlot.voiceChangerType,
"modelType": modelSlot.modelType,
"samplingRate": modelSlot.samplingRate,
"embChannels": modelSlot.embChannels,
"embedder": modelSlot.embedder
}
gpuMomory = DeviceManager.get_instance().getDeviceMemory(gpu)
print(f"[Voice Changer] exporting onnx... gpu_id:{gpu} gpu_mem:{gpuMomory}")
if gpuMomory > 0:
_export2onnx(modelFile, output_path, output_path_simple, True, metadata)
else:
print("[Voice Changer] Warning!!! onnx export with float32. maybe size is doubled.")
_export2onnx(modelFile, output_path, output_path_simple, False, metadata)
return output_file_simple
def _export2onnx(input_model, output_model, output_model_simple, is_half, metadata):
cpt = torch.load(input_model, map_location="cpu")
if is_half:
dev = torch.device("cuda", index=0)
else:
dev = torch.device("cpu")
# EnumInferenceTypesのままだとシリアライズできないのでテキスト化
if metadata["modelType"] == EnumInferenceTypes.pyTorchRVC.value:
net_g_onnx = SynthesizerTrnMs256NSFsid_ONNX(*cpt["config"], is_half=is_half)
elif metadata["modelType"] == EnumInferenceTypes.pyTorchWebUI.value:
net_g_onnx = SynthesizerTrnMsNSFsid_webui_ONNX(**cpt["params"], is_half=is_half)
elif metadata["modelType"] == EnumInferenceTypes.pyTorchRVCNono.value:
net_g_onnx = SynthesizerTrnMs256NSFsid_nono_ONNX(*cpt["config"])
elif metadata["modelType"] == EnumInferenceTypes.pyTorchWebUINono.value:
net_g_onnx = SynthesizerTrnMsNSFsidNono_webui_ONNX(**cpt["params"])
elif metadata["modelType"] == EnumInferenceTypes.pyTorchRVCv2.value:
net_g_onnx = SynthesizerTrnMs768NSFsid_ONNX(*cpt["config"], is_half=is_half)
elif metadata["modelType"] == EnumInferenceTypes.pyTorchRVCv2Nono.value:
net_g_onnx = SynthesizerTrnMs768NSFsid_nono_ONNX(*cpt["config"])
else:
print(
"unknwon::::: ",
metadata["modelType"],
EnumInferenceTypes.pyTorchRVCv2.value,
)
net_g_onnx.eval().to(dev)
net_g_onnx.load_state_dict(cpt["weight"], strict=False)
if is_half:
net_g_onnx = net_g_onnx.half()
if is_half:
feats = torch.HalfTensor(1, 2192, metadata["embChannels"]).to(dev)
else:
feats = torch.FloatTensor(1, 2192, metadata["embChannels"]).to(dev)
p_len = torch.LongTensor([2192]).to(dev)
sid = torch.LongTensor([0]).to(dev)
if metadata["f0"] is True:
pitch = torch.zeros(1, 2192, dtype=torch.int64).to(dev)
pitchf = torch.FloatTensor(1, 2192).to(dev)
input_names = ["feats", "p_len", "pitch", "pitchf", "sid"]
inputs = (
feats,
p_len,
pitch,
pitchf,
sid,
)
else:
input_names = ["feats", "p_len", "sid"]
inputs = (
feats,
p_len,
sid,
)
output_names = [
"audio",
]
torch.onnx.export(
net_g_onnx,
inputs,
output_model,
dynamic_axes={
"feats": [1],
"pitch": [1],
"pitchf": [1],
},
do_constant_folding=False,
opset_version=17,
verbose=False,
input_names=input_names,
output_names=output_names,
)
model_onnx2 = onnx.load(output_model)
model_simp, check = simplify(model_onnx2)
meta = model_simp.metadata_props.add()
meta.key = "metadata"
meta.value = json.dumps(metadata)
onnx.save(model_simp, output_model_simple)