voice-changer/server/voice_changer/DiffusionSVC/DiffusionSVCModelSlotGenerator.py

57 lines
2.2 KiB
Python
Raw Normal View History

2023-07-12 18:59:48 +03:00
import os
2023-08-09 10:55:59 +03:00
from data.ModelSlot import DiffusionSVCModelSlot, ModelSlot
2023-07-15 04:01:42 +03:00
from voice_changer.DiffusionSVC.inferencer.diffusion_svc_model.diffusion.unit2mel import load_model_vocoder_from_combo
2023-08-05 07:24:11 +03:00
from voice_changer.VoiceChangerParamsManager import VoiceChangerParamsManager
2023-07-12 18:59:48 +03:00
from voice_changer.utils.LoadModelParams import LoadModelParams
from voice_changer.utils.ModelSlotGenerator import ModelSlotGenerator
2023-08-05 07:24:11 +03:00
2023-07-21 12:25:28 +03:00
def get_divisors(n):
divisors = []
for i in range(1, int(n**0.5)+1):
if n % i == 0:
divisors.append(i)
if i != n // i:
2023-08-09 10:55:59 +03:00
divisors.append(n // i)
2023-07-21 12:25:28 +03:00
return sorted(divisors)
2023-07-12 18:59:48 +03:00
class DiffusionSVCModelSlotGenerator(ModelSlotGenerator):
@classmethod
def loadModel(cls, props: LoadModelParams):
slotInfo: DiffusionSVCModelSlot = DiffusionSVCModelSlot()
for file in props.files:
if file.kind == "diffusionSVCModel":
slotInfo.modelFile = file.name
slotInfo.defaultTune = 0
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
slotInfo.name = os.path.splitext(os.path.basename(slotInfo.modelFile))[0]
2023-07-21 12:25:28 +03:00
# slotInfo.iconFile = "/assets/icons/noimage.png"
2023-07-13 21:33:04 +03:00
slotInfo.embChannels = 768
2023-08-05 07:24:11 +03:00
slotInfo.slotIndex = props.slot
2023-07-12 18:59:48 +03:00
2023-07-15 04:01:42 +03:00
if slotInfo.isONNX:
slotInfo = cls._setInfoByONNX(slotInfo)
else:
slotInfo = cls._setInfoByPytorch(slotInfo)
2023-07-12 18:59:48 +03:00
return slotInfo
@classmethod
2023-07-15 04:01:42 +03:00
def _setInfoByPytorch(cls, slot: DiffusionSVCModelSlot):
2023-08-05 07:24:11 +03:00
vcparams = VoiceChangerParamsManager.get_instance().params
modelPath = os.path.join(vcparams.model_dir, str(slot.slotIndex), os.path.basename(slot.modelFile))
2023-08-06 01:09:32 +03:00
2023-08-05 07:24:11 +03:00
diff_model, diff_args, naive_model, naive_args = load_model_vocoder_from_combo(modelPath, device="cpu")
2023-07-15 04:01:42 +03:00
slot.kStepMax = diff_args.model.k_step_max
slot.nLayers = diff_args.model.n_layers
slot.nnLayers = naive_args.model.n_layers
2023-07-21 12:25:28 +03:00
slot.defaultKstep = slot.kStepMax
divs = get_divisors(slot.defaultKstep)
slot.defaultSpeedup = divs[-2]
slot.speakers = {(x+1): f"user{x+1}" for x in range(diff_args.model.n_spk)}
2023-07-12 18:59:48 +03:00
return slot
@classmethod
def _setInfoByONNX(cls, slot: ModelSlot):
return slot