voice-changer/server/voice_changer/RVC/ModelSlotGenerator.py

213 lines
7.2 KiB
Python
Raw Normal View History

2023-05-02 14:57:12 +03:00
from const import EnumEmbedderTypes, EnumInferenceTypes
from voice_changer.RVC.ModelSlot import ModelSlot
import torch
import onnxruntime
import json
2023-05-14 22:24:58 +03:00
import os
2023-05-02 14:57:12 +03:00
2023-05-14 22:24:58 +03:00
def generateModelSlot(slotDir: str):
modelSlot = ModelSlot()
2023-05-15 00:28:34 +03:00
if os.path.exists(slotDir) is False:
2023-05-14 22:24:58 +03:00
return modelSlot
paramFile = os.path.join(slotDir, "params.json")
with open(paramFile, "r") as f:
params = json.load(f)
modelSlot.modelFile = os.path.join(
slotDir, os.path.basename(params["files"]["rvcModel"])
)
if "rvcFeature" in params["files"]:
modelSlot.featureFile = os.path.join(
slotDir, os.path.basename(params["files"]["rvcFeature"])
)
else:
modelSlot.featureFile = None
if "rvcIndex" in params["files"]:
modelSlot.indexFile = os.path.join(
slotDir, os.path.basename(params["files"]["rvcIndex"])
)
else:
modelSlot.indexFile = None
2023-05-17 06:37:35 +03:00
modelSlot.defaultTune = params["defaultTune"] if "defaultTune" in params else 0
modelSlot.defaultIndexRatio = (
params["defaultIndexRatio"] if "defaultIndexRatio" in params else 0
)
2023-05-16 04:38:23 +03:00
modelSlot.name = params["name"] if "name" in params else None
modelSlot.description = params["description"] if "description" in params else None
modelSlot.credit = params["credit"] if "credit" in params else None
modelSlot.termsOfUseUrl = (
params["termsOfUseUrl"] if "termsOfUseUrl" in params else None
)
2023-05-14 22:24:58 +03:00
modelSlot.isONNX = modelSlot.modelFile.endswith(".onnx")
if modelSlot.isONNX:
_setInfoByONNX(modelSlot)
else:
_setInfoByPytorch(modelSlot)
return modelSlot
2023-05-08 19:01:20 +03:00
def _setInfoByPytorch(slot: ModelSlot):
cpt = torch.load(slot.modelFile, map_location="cpu")
2023-05-02 14:57:12 +03:00
config_len = len(cpt["config"])
2023-05-24 10:56:23 +03:00
2023-05-02 14:57:12 +03:00
if config_len == 18:
2023-05-24 10:56:23 +03:00
# Original RVC
2023-05-02 14:57:12 +03:00
slot.f0 = True if cpt["f0"] == 1 else False
2023-05-20 09:54:00 +03:00
version = cpt.get("version", "v1")
2023-05-21 05:50:28 +03:00
if version is None or version == "v1":
2023-05-20 09:54:00 +03:00
slot.modelType = (
EnumInferenceTypes.pyTorchRVC
if slot.f0
else EnumInferenceTypes.pyTorchRVCNono
)
slot.embChannels = 256
2023-05-24 14:49:24 +03:00
slot.embOutputLayer = 9
2023-05-24 10:56:23 +03:00
slot.useFinalProj = True
2023-05-20 09:54:00 +03:00
slot.embedder = EnumEmbedderTypes.hubert
else:
slot.modelType = (
EnumInferenceTypes.pyTorchRVCv2
if slot.f0
else EnumInferenceTypes.pyTorchRVCv2Nono
)
slot.embChannels = 768
2023-05-24 14:49:24 +03:00
slot.embOutputLayer = 12
2023-05-24 10:56:23 +03:00
slot.useFinalProj = False
2023-05-20 09:54:00 +03:00
slot.embedder = EnumEmbedderTypes.hubert
2023-05-02 14:57:12 +03:00
else:
2023-05-24 10:56:23 +03:00
# DDPN RVC
2023-05-02 14:57:12 +03:00
slot.f0 = True if cpt["f0"] == 1 else False
slot.modelType = (
EnumInferenceTypes.pyTorchWebUI
if slot.f0
else EnumInferenceTypes.pyTorchWebUINono
)
slot.embChannels = cpt["config"][17]
2023-05-24 14:49:24 +03:00
slot.embOutputLayer = (
2023-05-24 10:56:23 +03:00
cpt["embedder_output_layer"] if "embedder_output_layer" in cpt else 9
)
if slot.embChannels == 256:
slot.useFinalProj = True
else:
slot.useFinalProj = False
# DDPNモデルの情報を表示
if (
slot.embChannels == 256
2023-05-24 14:49:24 +03:00
and slot.embOutputLayer == 9
2023-05-24 10:56:23 +03:00
and slot.useFinalProj is True
):
print("[Voice Changer] DDPN Model: Original v1 like")
elif (
slot.embChannels == 768
2023-05-24 14:49:24 +03:00
and slot.embOutputLayer == 12
2023-05-24 10:56:23 +03:00
and slot.useFinalProj is False
):
print("[Voice Changer] DDPN Model: Original v2 like")
else:
print(
2023-05-24 14:49:24 +03:00
f"[Voice Changer] DDPN Model: ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}"
2023-05-24 10:56:23 +03:00
)
2023-05-02 14:57:12 +03:00
slot.embedder = cpt["embedder_name"]
if slot.embedder.endswith("768"):
slot.embedder = slot.embedder[:-3]
2023-05-03 07:14:00 +03:00
if slot.embedder == EnumEmbedderTypes.hubert.value:
slot.embedder = EnumEmbedderTypes.hubert
elif slot.embedder == EnumEmbedderTypes.contentvec.value:
slot.embedder = EnumEmbedderTypes.contentvec
elif slot.embedder == EnumEmbedderTypes.hubert_jp.value:
slot.embedder = EnumEmbedderTypes.hubert_jp
else:
raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder")
2023-05-02 14:57:12 +03:00
slot.samplingRate = cpt["config"][-1]
del cpt
2023-05-08 19:01:20 +03:00
def _setInfoByONNX(slot: ModelSlot):
2023-05-02 14:57:12 +03:00
tmp_onnx_session = onnxruntime.InferenceSession(
2023-05-08 19:01:20 +03:00
slot.modelFile, providers=["CPUExecutionProvider"]
2023-05-02 14:57:12 +03:00
)
modelmeta = tmp_onnx_session.get_modelmeta()
try:
metadata = json.loads(modelmeta.custom_metadata_map["metadata"])
2023-05-03 11:12:40 +03:00
# slot.modelType = metadata["modelType"]
2023-05-02 14:57:12 +03:00
slot.embChannels = metadata["embChannels"]
2023-05-03 07:14:00 +03:00
2023-05-24 14:49:24 +03:00
slot.embOutputLayer = (
metadata["embOutputLayer"] if "embOutputLayer" in metadata else 9
)
slot.useFinalProj = (
metadata["useFinalProj"]
if "useFinalProj" in metadata
else True
if slot.embChannels == 256
else False
2023-05-24 10:56:23 +03:00
)
if slot.embChannels == 256:
slot.useFinalProj = True
else:
slot.useFinalProj = False
2023-05-24 14:49:24 +03:00
# ONNXモデルの情報を表示
if (
slot.embChannels == 256
and slot.embOutputLayer == 9
and slot.useFinalProj is True
):
print("[Voice Changer] ONNX Model: Original v1 like")
elif (
slot.embChannels == 768
and slot.embOutputLayer == 12
and slot.useFinalProj is False
):
print("[Voice Changer] ONNX Model: Original v2 like")
else:
print(
f"[Voice Changer] ONNX Model: ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}"
)
2023-05-24 10:56:23 +03:00
2023-05-03 07:14:00 +03:00
if "embedder" not in metadata:
slot.embedder = EnumEmbedderTypes.hubert
elif metadata["embedder"] == EnumEmbedderTypes.hubert.value:
slot.embedder = EnumEmbedderTypes.hubert
elif metadata["embedder"] == EnumEmbedderTypes.contentvec.value:
slot.embedder = EnumEmbedderTypes.contentvec
elif metadata["embedder"] == EnumEmbedderTypes.hubert_jp.value:
slot.embedder = EnumEmbedderTypes.hubert_jp
else:
raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder")
2023-05-02 14:57:12 +03:00
slot.f0 = metadata["f0"]
slot.modelType = (
EnumInferenceTypes.onnxRVC if slot.f0 else EnumInferenceTypes.onnxRVCNono
)
slot.samplingRate = metadata["samplingRate"]
slot.deprecated = False
2023-05-03 07:14:00 +03:00
except Exception as e:
2023-05-02 14:57:12 +03:00
slot.modelType = EnumInferenceTypes.onnxRVC
slot.embChannels = 256
slot.embedder = EnumEmbedderTypes.hubert
slot.f0 = True
slot.samplingRate = 48000
slot.deprecated = True
2023-05-03 07:14:00 +03:00
print("[Voice Changer] setInfoByONNX", e)
2023-05-02 14:57:12 +03:00
print("[Voice Changer] ############## !!!! CAUTION !!!! ####################")
print("[Voice Changer] This onnxfie is depricated. Please regenerate onnxfile.")
print("[Voice Changer] ############## !!!! CAUTION !!!! ####################")
del tmp_onnx_session