WIP: configure diffusion svc

This commit is contained in:
w-okada 2023-07-15 10:01:42 +09:00
parent 3ffacaed97
commit 01291dc4e3
6 changed files with 39 additions and 106 deletions

View File

@ -112,7 +112,9 @@ class DiffusionSVCModelSlot(ModelSlot):
sampleId: str = ""
defaultTune: int = 0
kstep: int = 100
defaultKstep: int = 20
defaultSpeedup: int = 10
kStepMax: int = 100
speakers: dict = field(default_factory=lambda: {1: "user"})
embedder: EmbedderType = "hubert_base"
samplingRate: int = 44100

View File

@ -42,7 +42,7 @@ class DiffusionSVC(VoiceChangerModel):
# その他の設定
self.settings.tran = self.slotInfo.defaultTune
self.settings.dstId = self.slotInfo.dstId
self.settings.kstep = self.slotInfo.kstep
self.settings.kstep = self.slotInfo.defaultKstep
print("[Voice Changer] [DiffusionSVC] Initializing... done")
@ -190,18 +190,18 @@ class DiffusionSVC(VoiceChangerModel):
# "filename": output_file_simple,
# }
# def get_model_current(self):
# return [
# {
# "key": "defaultTune",
# "val": self.settings.tran,
# },
# {
# "key": "defaultIndexRatio",
# "val": self.settings.indexRatio,
# },
# {
# "key": "defaultProtect",
# "val": self.settings.protect,
# },
# ]
def get_model_current(self):
return [
{
"key": "defaultTune",
"val": self.settings.tran,
},
{
"key": "defaultKstep",
"val": self.settings.kstep,
},
{
"key": "defaultSpeedup",
"val": self.settings.speedup,
},
]

View File

@ -1,11 +1,11 @@
import os
from const import EnumInferenceTypes
from dataclasses import asdict
import torch
import onnxruntime
import json
from data.ModelSlot import DiffusionSVCModelSlot, ModelSlot, RVCModelSlot
from voice_changer.DiffusionSVC.inferencer.diffusion_svc_model.diffusion.unit2mel import load_model_vocoder_from_combo
from voice_changer.utils.LoadModelParams import LoadModelParams
from voice_changer.utils.ModelSlotGenerator import ModelSlotGenerator
@ -23,90 +23,16 @@ class DiffusionSVCModelSlotGenerator(ModelSlotGenerator):
slotInfo.iconFile = "/assets/icons/noimage.png"
slotInfo.embChannels = 768
# if slotInfo.isONNX:
# slotInfo = cls._setInfoByONNX(slotInfo)
# else:
# slotInfo = cls._setInfoByPytorch(slotInfo)
if slotInfo.isONNX:
slotInfo = cls._setInfoByONNX(slotInfo)
else:
slotInfo = cls._setInfoByPytorch(slotInfo)
return slotInfo
@classmethod
def _setInfoByPytorch(cls, slot: ModelSlot):
cpt = torch.load(slot.modelFile, map_location="cpu")
config_len = len(cpt["config"])
version = cpt.get("version", "v1")
slot = RVCModelSlot(**asdict(slot))
if version == "voras_beta":
slot.f0 = True if cpt["f0"] == 1 else False
slot.modelType = EnumInferenceTypes.pyTorchVoRASbeta.value
slot.embChannels = 768
slot.embOutputLayer = cpt["embedder_output_layer"] if "embedder_output_layer" in cpt else 9
slot.useFinalProj = False
slot.embedder = cpt["embedder_name"]
if slot.embedder.endswith("768"):
slot.embedder = slot.embedder[:-3]
# if slot.embedder == "hubert":
# slot.embedder = "hubert"
# elif slot.embedder == "contentvec":
# slot.embedder = "contentvec"
# elif slot.embedder == "hubert_jp":
# slot.embedder = "hubert_jp"
else:
raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder")
elif config_len == 18:
# Original RVC
slot.f0 = True if cpt["f0"] == 1 else False
version = cpt.get("version", "v1")
if version is None or version == "v1":
slot.modelType = EnumInferenceTypes.pyTorchRVC.value if slot.f0 else EnumInferenceTypes.pyTorchRVCNono.value
slot.embChannels = 256
slot.embOutputLayer = 9
slot.useFinalProj = True
slot.embedder = "hubert_base"
print("[Voice Changer] Official Model(pyTorch) : v1")
else:
slot.modelType = EnumInferenceTypes.pyTorchRVCv2.value if slot.f0 else EnumInferenceTypes.pyTorchRVCv2Nono.value
slot.embChannels = 768
slot.embOutputLayer = 12
slot.useFinalProj = False
slot.embedder = "hubert_base"
print("[Voice Changer] Official Model(pyTorch) : v2")
else:
# DDPN RVC
slot.f0 = True if cpt["f0"] == 1 else False
slot.modelType = EnumInferenceTypes.pyTorchWebUI.value if slot.f0 else EnumInferenceTypes.pyTorchWebUINono.value
slot.embChannels = cpt["config"][17]
slot.embOutputLayer = cpt["embedder_output_layer"] if "embedder_output_layer" in cpt else 9
if slot.embChannels == 256:
slot.useFinalProj = True
else:
slot.useFinalProj = False
# DDPNモデルの情報を表示
if slot.embChannels == 256 and slot.embOutputLayer == 9 and slot.useFinalProj is True:
print("[Voice Changer] DDPN Model(pyTorch) : Official v1 like")
elif slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False:
print("[Voice Changer] DDPN Model(pyTorch): Official v2 like")
else:
print(f"[Voice Changer] DDPN Model(pyTorch): ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}")
slot.embedder = cpt["embedder_name"]
if slot.embedder.endswith("768"):
slot.embedder = slot.embedder[:-3]
if "speaker_info" in cpt.keys():
for k, v in cpt["speaker_info"].items():
slot.speakers[int(k)] = str(v)
slot.samplingRate = cpt["config"][-1]
del cpt
def _setInfoByPytorch(cls, slot: DiffusionSVCModelSlot):
diff_model, diff_args, naive_model, naive_args, vocoder = load_model_vocoder_from_combo(slot.modelFile, device="cpu")
slot.kStepMax = diff_args.model.k_step_max
return slot
@classmethod

View File

@ -11,7 +11,8 @@ class DiffusionSVCSettings:
silentThreshold: float = 0.00001
extraConvertSize: int = 1024 * 4
kstep: int = 100
kstep: int = 20
speedup: int = 10
silenceFront: int = 1 # 0:off, 1:on
modelSamplingRate: int = 44100

View File

@ -39,9 +39,13 @@ class PitchExtractorManager(Protocol):
elif pitchExtractorType == "crepe_full":
return CrepeOnnxPitchExtractor(pitchExtractorType, cls.params.crepe_onnx_full, gpu)
elif pitchExtractorType == "rmvpe":
print("pitchExtractorType", pitchExtractorType)
return RMVPEPitchExtractor(cls.params.rmvpe, gpu)
else:
# return hubert as default
raise RuntimeError(
"[Voice Changer] PitchExctractor not found", pitchExtractorType
)
print("[Voice Changer] PitchExctractor not found", pitchExtractorType)
print(" fallback to dio")
return DioPitchExtractor()
# raise RuntimeError(
# "[Voice Changer] PitchExctractor not found", pitchExtractorType
# )

View File

@ -39,6 +39,6 @@ class PitchExtractorManager(Protocol):
return CrepeOnnxPitchExtractor(pitchExtractorType, cls.params.crepe_onnx_full, gpu)
else:
# return hubert as default
raise RuntimeError(
"[Voice Changer] PitchExctractor not found", pitchExtractorType
)
print("[Voice Changer] PitchExctractor not found", pitchExtractorType)
print(" fallback to dio")
return DioPitchExtractor()