From 01291dc4e35c9fbe96ba2360b59b51c0e3394b74 Mon Sep 17 00:00:00 2001 From: w-okada Date: Sat, 15 Jul 2023 10:01:42 +0900 Subject: [PATCH] WIP: configure diffusion svc --- server/data/ModelSlot.py | 4 +- .../DiffusionSVC/DiffusionSVC.py | 32 +++---- .../DiffusionSVCModelSlotGenerator.py | 90 ++----------------- .../DiffusionSVC/DiffusionSVCSettings.py | 3 +- .../pitchExtractor/PitchExtractorManager.py | 10 ++- .../pitchExtractor/PitchExtractorManager.py | 6 +- 6 files changed, 39 insertions(+), 106 deletions(-) diff --git a/server/data/ModelSlot.py b/server/data/ModelSlot.py index 00a2df13..609f34ad 100644 --- a/server/data/ModelSlot.py +++ b/server/data/ModelSlot.py @@ -112,7 +112,9 @@ class DiffusionSVCModelSlot(ModelSlot): sampleId: str = "" defaultTune: int = 0 - kstep: int = 100 + defaultKstep: int = 20 + defaultSpeedup: int = 10 + kStepMax: int = 100 speakers: dict = field(default_factory=lambda: {1: "user"}) embedder: EmbedderType = "hubert_base" samplingRate: int = 44100 diff --git a/server/voice_changer/DiffusionSVC/DiffusionSVC.py b/server/voice_changer/DiffusionSVC/DiffusionSVC.py index 6dfbbe74..0ff32590 100644 --- a/server/voice_changer/DiffusionSVC/DiffusionSVC.py +++ b/server/voice_changer/DiffusionSVC/DiffusionSVC.py @@ -42,7 +42,7 @@ class DiffusionSVC(VoiceChangerModel): # その他の設定 self.settings.tran = self.slotInfo.defaultTune self.settings.dstId = self.slotInfo.dstId - self.settings.kstep = self.slotInfo.kstep + self.settings.kstep = self.slotInfo.defaultKstep print("[Voice Changer] [DiffusionSVC] Initializing... done") @@ -190,18 +190,18 @@ class DiffusionSVC(VoiceChangerModel): # "filename": output_file_simple, # } - # def get_model_current(self): - # return [ - # { - # "key": "defaultTune", - # "val": self.settings.tran, - # }, - # { - # "key": "defaultIndexRatio", - # "val": self.settings.indexRatio, - # }, - # { - # "key": "defaultProtect", - # "val": self.settings.protect, - # }, - # ] + def get_model_current(self): + return [ + { + "key": "defaultTune", + "val": self.settings.tran, + }, + { + "key": "defaultKstep", + "val": self.settings.kstep, + }, + { + "key": "defaultSpeedup", + "val": self.settings.speedup, + }, + ] diff --git a/server/voice_changer/DiffusionSVC/DiffusionSVCModelSlotGenerator.py b/server/voice_changer/DiffusionSVC/DiffusionSVCModelSlotGenerator.py index 3ff1aa39..3940846d 100644 --- a/server/voice_changer/DiffusionSVC/DiffusionSVCModelSlotGenerator.py +++ b/server/voice_changer/DiffusionSVC/DiffusionSVCModelSlotGenerator.py @@ -1,11 +1,11 @@ import os from const import EnumInferenceTypes from dataclasses import asdict -import torch import onnxruntime import json from data.ModelSlot import DiffusionSVCModelSlot, ModelSlot, RVCModelSlot +from voice_changer.DiffusionSVC.inferencer.diffusion_svc_model.diffusion.unit2mel import load_model_vocoder_from_combo from voice_changer.utils.LoadModelParams import LoadModelParams from voice_changer.utils.ModelSlotGenerator import ModelSlotGenerator @@ -23,90 +23,16 @@ class DiffusionSVCModelSlotGenerator(ModelSlotGenerator): slotInfo.iconFile = "/assets/icons/noimage.png" slotInfo.embChannels = 768 - # if slotInfo.isONNX: - # slotInfo = cls._setInfoByONNX(slotInfo) - # else: - # slotInfo = cls._setInfoByPytorch(slotInfo) + if slotInfo.isONNX: + slotInfo = cls._setInfoByONNX(slotInfo) + else: + slotInfo = cls._setInfoByPytorch(slotInfo) return slotInfo @classmethod - def _setInfoByPytorch(cls, slot: ModelSlot): - cpt = torch.load(slot.modelFile, map_location="cpu") - config_len = len(cpt["config"]) - version = cpt.get("version", "v1") - - slot = RVCModelSlot(**asdict(slot)) - - if version == "voras_beta": - slot.f0 = True if cpt["f0"] == 1 else False - slot.modelType = EnumInferenceTypes.pyTorchVoRASbeta.value - slot.embChannels = 768 - slot.embOutputLayer = cpt["embedder_output_layer"] if "embedder_output_layer" in cpt else 9 - slot.useFinalProj = False - - slot.embedder = cpt["embedder_name"] - if slot.embedder.endswith("768"): - slot.embedder = slot.embedder[:-3] - - # if slot.embedder == "hubert": - # slot.embedder = "hubert" - # elif slot.embedder == "contentvec": - # slot.embedder = "contentvec" - # elif slot.embedder == "hubert_jp": - # slot.embedder = "hubert_jp" - else: - raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder") - - elif config_len == 18: - # Original RVC - slot.f0 = True if cpt["f0"] == 1 else False - version = cpt.get("version", "v1") - if version is None or version == "v1": - slot.modelType = EnumInferenceTypes.pyTorchRVC.value if slot.f0 else EnumInferenceTypes.pyTorchRVCNono.value - slot.embChannels = 256 - slot.embOutputLayer = 9 - slot.useFinalProj = True - slot.embedder = "hubert_base" - print("[Voice Changer] Official Model(pyTorch) : v1") - else: - slot.modelType = EnumInferenceTypes.pyTorchRVCv2.value if slot.f0 else EnumInferenceTypes.pyTorchRVCv2Nono.value - slot.embChannels = 768 - slot.embOutputLayer = 12 - slot.useFinalProj = False - slot.embedder = "hubert_base" - print("[Voice Changer] Official Model(pyTorch) : v2") - - else: - # DDPN RVC - slot.f0 = True if cpt["f0"] == 1 else False - slot.modelType = EnumInferenceTypes.pyTorchWebUI.value if slot.f0 else EnumInferenceTypes.pyTorchWebUINono.value - slot.embChannels = cpt["config"][17] - slot.embOutputLayer = cpt["embedder_output_layer"] if "embedder_output_layer" in cpt else 9 - if slot.embChannels == 256: - slot.useFinalProj = True - else: - slot.useFinalProj = False - - # DDPNモデルの情報を表示 - if slot.embChannels == 256 and slot.embOutputLayer == 9 and slot.useFinalProj is True: - print("[Voice Changer] DDPN Model(pyTorch) : Official v1 like") - elif slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False: - print("[Voice Changer] DDPN Model(pyTorch): Official v2 like") - else: - print(f"[Voice Changer] DDPN Model(pyTorch): ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}") - - slot.embedder = cpt["embedder_name"] - if slot.embedder.endswith("768"): - slot.embedder = slot.embedder[:-3] - - if "speaker_info" in cpt.keys(): - for k, v in cpt["speaker_info"].items(): - slot.speakers[int(k)] = str(v) - - slot.samplingRate = cpt["config"][-1] - - del cpt - + def _setInfoByPytorch(cls, slot: DiffusionSVCModelSlot): + diff_model, diff_args, naive_model, naive_args, vocoder = load_model_vocoder_from_combo(slot.modelFile, device="cpu") + slot.kStepMax = diff_args.model.k_step_max return slot @classmethod diff --git a/server/voice_changer/DiffusionSVC/DiffusionSVCSettings.py b/server/voice_changer/DiffusionSVC/DiffusionSVCSettings.py index 0a12af85..bc413b25 100644 --- a/server/voice_changer/DiffusionSVC/DiffusionSVCSettings.py +++ b/server/voice_changer/DiffusionSVC/DiffusionSVCSettings.py @@ -11,7 +11,8 @@ class DiffusionSVCSettings: silentThreshold: float = 0.00001 extraConvertSize: int = 1024 * 4 - kstep: int = 100 + kstep: int = 20 + speedup: int = 10 silenceFront: int = 1 # 0:off, 1:on modelSamplingRate: int = 44100 diff --git a/server/voice_changer/DiffusionSVC/pitchExtractor/PitchExtractorManager.py b/server/voice_changer/DiffusionSVC/pitchExtractor/PitchExtractorManager.py index 42d29142..561ad2e5 100644 --- a/server/voice_changer/DiffusionSVC/pitchExtractor/PitchExtractorManager.py +++ b/server/voice_changer/DiffusionSVC/pitchExtractor/PitchExtractorManager.py @@ -39,9 +39,13 @@ class PitchExtractorManager(Protocol): elif pitchExtractorType == "crepe_full": return CrepeOnnxPitchExtractor(pitchExtractorType, cls.params.crepe_onnx_full, gpu) elif pitchExtractorType == "rmvpe": + print("pitchExtractorType", pitchExtractorType) return RMVPEPitchExtractor(cls.params.rmvpe, gpu) else: # return hubert as default - raise RuntimeError( - "[Voice Changer] PitchExctractor not found", pitchExtractorType - ) + print("[Voice Changer] PitchExctractor not found", pitchExtractorType) + print(" fallback to dio") + return DioPitchExtractor() + # raise RuntimeError( + # "[Voice Changer] PitchExctractor not found", pitchExtractorType + # ) diff --git a/server/voice_changer/RVC/pitchExtractor/PitchExtractorManager.py b/server/voice_changer/RVC/pitchExtractor/PitchExtractorManager.py index 5a82a767..c37a99b9 100644 --- a/server/voice_changer/RVC/pitchExtractor/PitchExtractorManager.py +++ b/server/voice_changer/RVC/pitchExtractor/PitchExtractorManager.py @@ -39,6 +39,6 @@ class PitchExtractorManager(Protocol): return CrepeOnnxPitchExtractor(pitchExtractorType, cls.params.crepe_onnx_full, gpu) else: # return hubert as default - raise RuntimeError( - "[Voice Changer] PitchExctractor not found", pitchExtractorType - ) + print("[Voice Changer] PitchExctractor not found", pitchExtractorType) + print(" fallback to dio") + return DioPitchExtractor()