From 8121c3a8494785b5fe8aa1be4e00bc91633a035c Mon Sep 17 00:00:00 2001 From: wataru Date: Fri, 28 Apr 2023 07:36:08 +0900 Subject: [PATCH] WIP: refactoring --- server/voice_changer/RVC/ModelSlot.py | 17 ++ server/voice_changer/RVC/RVC.py | 191 +++++------------- server/voice_changer/RVC/RVCSettings.py | 44 ++++ server/voice_changer/utils/LoadModelParams.py | 12 +- 4 files changed, 120 insertions(+), 144 deletions(-) create mode 100644 server/voice_changer/RVC/ModelSlot.py create mode 100644 server/voice_changer/RVC/RVCSettings.py diff --git a/server/voice_changer/RVC/ModelSlot.py b/server/voice_changer/RVC/ModelSlot.py new file mode 100644 index 00000000..14619910 --- /dev/null +++ b/server/voice_changer/RVC/ModelSlot.py @@ -0,0 +1,17 @@ +from dataclasses import dataclass +from voice_changer.RVC.const import RVC_MODEL_TYPE_RVC + + +@dataclass +class ModelSlot: + pyTorchModelFile: str = "" + onnxModelFile: str = "" + featureFile: str = "" + indexFile: str = "" + defaultTrans: int = 0 + modelType: int = RVC_MODEL_TYPE_RVC + samplingRate: int = -1 + f0: bool = True + embChannels: int = 256 + deprecated: bool = False + embedder: str = "hubert_base" # "hubert_base", "contentvec", "distilhubert" diff --git a/server/voice_changer/RVC/RVC.py b/server/voice_changer/RVC/RVC.py index 4ae4d828..89bdfd3a 100644 --- a/server/voice_changer/RVC/RVC.py +++ b/server/voice_changer/RVC/RVC.py @@ -4,10 +4,11 @@ import json import resampy from voice_changer.RVC.ModelWrapper import ModelWrapper from Exceptions import NoModeLoadedException +from voice_changer.RVC.RVCSettings import RVCSettings from voice_changer.utils.LoadModelParams import LoadModelParams from voice_changer.utils.VoiceChangerParams import VoiceChangerParams -from dataclasses import dataclass, asdict, field +from dataclasses import asdict import numpy as np import torch @@ -44,62 +45,6 @@ providers = [ ] -@dataclass -class ModelSlot: - pyTorchModelFile: str = "" - onnxModelFile: str = "" - featureFile: str = "" - indexFile: str = "" - defaultTrans: int = 0 - modelType: int = RVC_MODEL_TYPE_RVC - samplingRate: int = -1 - f0: bool = True - embChannels: int = 256 - deprecated: bool = False - embedder: str = "hubert_base" # "hubert_base", "contentvec", "distilhubert" - - -@dataclass -class RVCSettings: - gpu: int = 0 - dstId: int = 0 - - f0Detector: str = "pm" # pm or harvest - tran: int = 20 - silentThreshold: float = 0.00001 - extraConvertSize: int = 1024 * 32 - clusterInferRatio: float = 0.1 - - framework: str = "PyTorch" # PyTorch or ONNX - pyTorchModelFile: str = "" - onnxModelFile: str = "" - configFile: str = "" - modelSlots: list[ModelSlot] = field( - default_factory=lambda: [ModelSlot(), ModelSlot(), ModelSlot()] - ) - indexRatio: float = 0 - rvcQuality: int = 0 - silenceFront: int = 1 # 0:off, 1:on - modelSamplingRate: int = 48000 - modelSlotIndex: int = -1 - - speakers: dict[str, int] = field(default_factory=lambda: {}) - - # ↓mutableな物だけ列挙 - intData = [ - "gpu", - "dstId", - "tran", - "extraConvertSize", - "rvcQuality", - "modelSamplingRate", - "silenceFront", - "modelSlotIndex", - ] - floatData = ["silentThreshold", "indexRatio"] - strData = ["framework", "f0Detector"] - - class RVC: def __init__(self, params: VoiceChangerParams): self.initialLoad = True @@ -123,41 +68,44 @@ class RVC: print("mps: ", self.mps_enabled) def loadModel(self, props: LoadModelParams): + """ + loadModelはスロットへのエントリ(推論向けにはロードしない)。 + 例外的に、まだ一つも推論向けにロードされていない場合は、ロードする。 + """ self.is_half = props.isHalf tmp_slot = props.slot params_str = props.params params = json.loads(params_str) - newSlot = asdict(self.settings.modelSlots[tmp_slot]) - newSlot.update( - { - "pyTorchModelFile": props.files.pyTorchModelFilename, - "onnxModelFile": props.files.onnxModelFilename, - "featureFile": props.files.featureFilename, - "indexFile": props.files.indexFilename, - "defaultTrans": params["trans"], - } + self.settings.modelSlots[ + tmp_slot + ].pyTorchModelFile = props.files.pyTorchModelFilename + self.settings.modelSlots[tmp_slot].onnxModelFile = props.files.onnxModelFilename + self.settings.modelSlots[tmp_slot].featureFile = props.files.featureFilename + self.settings.modelSlots[tmp_slot].indexFile = props.files.indexFilename + self.settings.modelSlots[tmp_slot].defaultTrans = params["trans"] + + isONNX = ( + True + if self.settings.modelSlots[tmp_slot].onnxModelFile is not None + else False ) - self.settings.modelSlots[tmp_slot] = ModelSlot(**newSlot) - print("[Voice Changer] RVC loading... slot:", tmp_slot) - - # Load metadata - if ( - self.settings.modelSlots[tmp_slot].pyTorchModelFile is not None - and self.settings.modelSlots[tmp_slot].pyTorchModelFile != "" - ): - self._setInfoByPytorch( - tmp_slot, self.settings.modelSlots[tmp_slot].pyTorchModelFile - ) - if ( - self.settings.modelSlots[tmp_slot].onnxModelFile is not None - and self.settings.modelSlots[tmp_slot].onnxModelFile != "" - ): + # メタデータ設定 + if isONNX: self._setInfoByONNX( tmp_slot, self.settings.modelSlots[tmp_slot].onnxModelFile ) + else: + self._setInfoByPytorch( + tmp_slot, self.settings.modelSlots[tmp_slot].pyTorchModelFile + ) + print( + f"[Voice Changer] RVC loading... slot:{tmp_slot}", + asdict(self.settings.modelSlots[tmp_slot]), + ) + # hubertロード try: hubert_path = self.params.hubert_base models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task( @@ -173,6 +121,7 @@ class RVC: except Exception as e: print("EXCEPTION during loading hubert/contentvec model", e) + # 初回のみロード if self.initialLoad or tmp_slot == self.currentSlot: self.prepareModel(tmp_slot) self.settings.modelSlotIndex = tmp_slot @@ -197,7 +146,6 @@ class RVC: self.settings.modelSlots[slot].embedder = self.settings.modelSlots[ slot ].embedder[:-3] - print("embedder....", self.settings.modelSlots[slot].embedder) self.settings.modelSlots[slot].f0 = True if cpt["f0"] == 1 else False self.settings.modelSlots[slot].samplingRate = cpt["config"][-1] @@ -208,84 +156,55 @@ class RVC: tmp_onnx_session = ModelWrapper(file) self.settings.modelSlots[slot].modelType = tmp_onnx_session.getModelType() self.settings.modelSlots[slot].embChannels = tmp_onnx_session.getEmbChannels() + self.settings.modelSlots[slot].embedder = tmp_onnx_session.getEmbedder() self.settings.modelSlots[slot].f0 = tmp_onnx_session.getF0() self.settings.modelSlots[slot].samplingRate = tmp_onnx_session.getSamplingRate() self.settings.modelSlots[slot].deprecated = tmp_onnx_session.getDeprecated() - self.settings.modelSlots[slot].embedder = tmp_onnx_session.getEmbedder() - print("embedder....", self.settings.modelSlots[slot].embedder) def prepareModel(self, slot: int): print("[Voice Changer] Prepare Model of slot:", slot) - pyTorchModelFile = self.settings.modelSlots[slot].pyTorchModelFile onnxModelFile = self.settings.modelSlots[slot].onnxModelFile - # PyTorchモデル生成 - if pyTorchModelFile != None and pyTorchModelFile != "": + isONNX = ( + True if self.settings.modelSlots[slot].onnxModelFile is not None else False + ) + + if isONNX: + print("[Voice Changer] Loading ONNX Model...") + self.next_onnx_session = ModelWrapper(onnxModelFile) + self.next_net_g = None + else: print("[Voice Changer] Loading Pytorch Model...") - cpt = torch.load(pyTorchModelFile, map_location="cpu") - """ - (1) オリジナルとrvc-webuiのモデル判定 ⇒ config全体の形状 - ■ ノーマル256 - [1025, 32, 192, 192, 768, 2, 6, 3, 0, '1', [3, 7, 11], [[1, 3, 5], [1, 3, 5], [1, 3, 5]], [10, 6, 2, 2, 2], 512, [16, 16, 4, 4, 4], 109, 256, 48000] - ■ ノーマル 768対応 - [1025, 32, 192, 192, 768, 2, 6, 3, 0, '1', [3, 7, 11], [[1, 3, 5], [1, 3, 5], [1, 3, 5]], [10, 6, 2, 2, 2], 512, [16, 16, 4, 4, 4], 109, 256, 768, 48000] - ⇒ 18: オリジナル, 19: rvc-webui - - (2-1) オリジナルのノーマルorPitchレス判定 ⇒ ckp["f0"]で判定 - 0: ピッチレス, 1:ノーマル - - (2-2) rvc-webuiの、(256 or 768) x (ノーマルor pitchレス)判定 ⇒ 256, or 768 は17番目の要素で判定。, ノーマルor pitchレスはckp["f0"]で判定 - """ + torchModelSlot = self.settings.modelSlots[slot] + cpt = torch.load(torchModelSlot.pyTorchModelFile, map_location="cpu") if ( - self.settings.modelSlots[slot].modelType == RVC_MODEL_TYPE_RVC - and self.settings.modelSlots[slot].f0 is True + torchModelSlot.modelType == RVC_MODEL_TYPE_RVC + and torchModelSlot.f0 is True ): net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=self.is_half) elif ( - self.settings.modelSlots[slot].modelType == RVC_MODEL_TYPE_RVC - and self.settings.modelSlots[slot].f0 is False + torchModelSlot.modelType == RVC_MODEL_TYPE_RVC + and torchModelSlot.f0 is False ): net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) elif ( - self.settings.modelSlots[slot].modelType == RVC_MODEL_TYPE_WEBUI - and self.settings.modelSlots[slot].f0 is True + torchModelSlot.modelType == RVC_MODEL_TYPE_WEBUI + and torchModelSlot.f0 is True ): net_g = SynthesizerTrnMsNSFsid_webui( **cpt["params"], is_half=self.is_half ) - elif ( - self.settings.modelSlots[slot].modelType == RVC_MODEL_TYPE_WEBUI - and self.settings.modelSlots[slot].f0 is False - ): + else: net_g = SynthesizerTrnMsNSFsidNono_webui( **cpt["params"], is_half=self.is_half ) - else: - print("unknwon") - net_g.eval() net_g.load_state_dict(cpt["weight"], strict=False) + if self.is_half: net_g = net_g.half() + self.next_net_g = net_g - else: - print("[Voice Changer] Skip Loading Pytorch Model...") - self.next_net_g = None - - # ONNXモデル生成 - if onnxModelFile != None and onnxModelFile != "": - print("[Voice Changer] Loading ONNX Model...") - self.next_onnx_session = ModelWrapper(onnxModelFile) - # self.settings.modelSlots[slot].samplingRateOnnx = self.next_onnx_session.getSamplingRate() - # self.settings.modelSlots[slot].f0Onnx = self.next_onnx_session.getF0() - # self.settings.modelSlots[slot].embChannelsOnnx = self.next_onnx_session.getEmbChannels() - - # # ONNXがある場合は、ONNXの設定を優先 - # self.settings.modelSlots[slot].samplingRate = self.settings.modelSlots[slot].samplingRateOnnx - # self.settings.modelSlots[slot].f0 = self.settings.modelSlots[slot].f0Onnx - # self.settings.modelSlots[slot].embChannels = self.settings.modelSlots[slot].embChannelsOnnx - else: - print("[Voice Changer] Skip Loading ONNX Model...") self.next_onnx_session = None self.next_feature_file = self.settings.modelSlots[slot].featureFile @@ -295,15 +214,11 @@ class RVC: self.next_framework = ( "ONNX" if self.next_onnx_session is not None else "PyTorch" ) - print( - "[Voice Changer] Prepare done.", - ) + print("[Voice Changer] Prepare done.") return self.get_info() def switchModel(self): - print( - "[Voice Changer] Switching model..", - ) + print("[Voice Changer] Switching model..") # del self.net_g # del self.onnx_session self.net_g = self.next_net_g diff --git a/server/voice_changer/RVC/RVCSettings.py b/server/voice_changer/RVC/RVCSettings.py new file mode 100644 index 00000000..f645c448 --- /dev/null +++ b/server/voice_changer/RVC/RVCSettings.py @@ -0,0 +1,44 @@ +from dataclasses import dataclass, field + +from voice_changer.RVC.ModelSlot import ModelSlot + + +@dataclass +class RVCSettings: + gpu: int = 0 + dstId: int = 0 + + f0Detector: str = "pm" # pm or harvest + tran: int = 20 + silentThreshold: float = 0.00001 + extraConvertSize: int = 1024 * 32 + clusterInferRatio: float = 0.1 + + framework: str = "PyTorch" # PyTorch or ONNX + pyTorchModelFile: str = "" + onnxModelFile: str = "" + configFile: str = "" + modelSlots: list[ModelSlot] = field( + default_factory=lambda: [ModelSlot(), ModelSlot(), ModelSlot()] + ) + indexRatio: float = 0 + rvcQuality: int = 0 + silenceFront: int = 1 # 0:off, 1:on + modelSamplingRate: int = 48000 + modelSlotIndex: int = -1 + + speakers: dict[str, int] = field(default_factory=lambda: {}) + + # ↓mutableな物だけ列挙 + intData = [ + "gpu", + "dstId", + "tran", + "extraConvertSize", + "rvcQuality", + "modelSamplingRate", + "silenceFront", + "modelSlotIndex", + ] + floatData = ["silentThreshold", "indexRatio"] + strData = ["framework", "f0Detector"] diff --git a/server/voice_changer/utils/LoadModelParams.py b/server/voice_changer/utils/LoadModelParams.py index dd99f8ec..e68698bc 100644 --- a/server/voice_changer/utils/LoadModelParams.py +++ b/server/voice_changer/utils/LoadModelParams.py @@ -3,12 +3,12 @@ from dataclasses import dataclass @dataclass class FilePaths: - configFilename: str - pyTorchModelFilename: str - onnxModelFilename: str - clusterTorchModelFilename: str - featureFilename: str - indexFilename: str + configFilename: str | None + pyTorchModelFilename: str | None + onnxModelFilename: str | None + clusterTorchModelFilename: str | None + featureFilename: str | None + indexFilename: str | None @dataclass