diff --git a/server/restapi/MMVC_Rest_Fileuploader.py b/server/restapi/MMVC_Rest_Fileuploader.py index b81f17cf..41c3055c 100644 --- a/server/restapi/MMVC_Rest_Fileuploader.py +++ b/server/restapi/MMVC_Rest_Fileuploader.py @@ -63,7 +63,7 @@ class MMVC_Rest_Fileuploader: def post_update_settings( self, key: str = Form(...), val: Union[int, str, float] = Form(...) ): - print("post_update_settings", key, val) + print("[Voice Changer] update configuration:", key, val) info = self.voiceChangerManager.update_settings(key, val) json_compatible_item_data = jsonable_encoder(info) return JSONResponse(content=json_compatible_item_data) diff --git a/server/voice_changer/RVC/ModelSlotGenerator.py b/server/voice_changer/RVC/ModelSlotGenerator.py index 58563bc5..9d36a5c6 100644 --- a/server/voice_changer/RVC/ModelSlotGenerator.py +++ b/server/voice_changer/RVC/ModelSlotGenerator.py @@ -70,7 +70,7 @@ def _setInfoByONNX(slot: ModelSlot, file: str): try: metadata = json.loads(modelmeta.custom_metadata_map["metadata"]) - slot.modelType = metadata["modelType"] + # slot.modelType = metadata["modelType"] slot.embChannels = metadata["embChannels"] if "embedder" not in metadata: diff --git a/server/voice_changer/RVC/RVC.py b/server/voice_changer/RVC/RVC.py index b0c13c41..ab190ca6 100644 --- a/server/voice_changer/RVC/RVC.py +++ b/server/voice_changer/RVC/RVC.py @@ -1,5 +1,6 @@ import sys import os +from Exceptions import NoModeLoadedException from voice_changer.RVC.ModelSlot import ModelSlot from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager @@ -42,7 +43,7 @@ import torch import traceback import faiss -from const import TMP_DIR, UPLOAD_DIR, EnumEmbedderTypes +from const import UPLOAD_DIR, EnumEmbedderTypes from voice_changer.RVC.custom_vc_infer_pipeline import VC @@ -89,9 +90,6 @@ class RVC: f"[Voice Changer] RVC new model is uploaded,{target_slot_idx}", asdict(modelSlot), ) - """ - [Voice Changer] RVC new model is uploaded,0 {'pyTorchModelFile': 'upload_dir/0/kurage.pth', 'onnxModelFile': None, 'featureFile': None, 'indexFile': None, 'defaultTrans': 16, 'isONNX': False, 'modelType': , 'samplingRate': 48000, 'f0': True, 'embChannels': 768, 'deprecated': False, 'embedder': 'hubert-base-japanese'} - """ # 初回のみロード if self.initialLoad: @@ -139,7 +137,6 @@ class RVC: # Embedder 生成 try: - print("AFASFDAFDAFDASDFASDFSADFASDFA", half, self.settings.gpu) embedder = EmbedderManager.getEmbedder( modelSlot.embedder, emmbedderFilename, @@ -179,8 +176,14 @@ class RVC: def prepareModel(self, slot: int): if slot < 0: return self.get_info() - print("[Voice Changer] Prepare Model of slot:", slot) modelSlot = self.settings.modelSlots[slot] + inferencerFilename = ( + modelSlot.onnxModelFile if modelSlot.isONNX else modelSlot.pyTorchModelFile + ) + if inferencerFilename == "": + return self.get_info() + + print("[Voice Changer] Prepare Model of slot:", slot) # Inferencer, embedderのロード inferencer, embedder = self.createPipeline(modelSlot) @@ -240,13 +243,13 @@ class RVC: and self.embedder.isHalf == half ): print( - "NOT NEED CHAGE TO NEW PIPELINE!!!!!!!!!!!!!!!!!!!!!!!!!!!", + "USE EXSISTING PIPELINE", half, ) self.embedder.setDevice(dev) self.inferencer.setDevice(dev) else: - print("CHAGE TO NEW PIPELINE!!!!!!!!!!!!!!!!!!!!!!!!!!!", half) + print("CHAGE TO NEW PIPELINE", half) self.prepareModel(self.settings.modelSlotIndex) elif key in self.settings.floatData: setattr(self.settings, key, float(val)) @@ -307,7 +310,9 @@ class RVC: ) raise NoModeLoadedException("model_common") if self.needSwitch: - print(f"Switch model {self.currentSlot} -> {self.settings.modelSlotIndex}") + print( + f"[Voice Changer] Switch model {self.currentSlot} -> {self.settings.modelSlotIndex}" + ) self.currentSlot = self.settings.modelSlotIndex self.switchModel() self.needSwitch = False @@ -363,8 +368,8 @@ class RVC: return result def __del__(self): - del self.net_g - del self.onnx_session + del self.inferencer + del self.embedder print("---------- REMOVING ---------------") @@ -383,57 +388,17 @@ class RVC: pass def export2onnx(self): - if hasattr(self, "net_g") is False or self.net_g is None: - print("[Voice Changer] export2onnx, No pyTorch session.") - return {"status": "ng", "path": ""} + modelSlot = self.settings.modelSlots[self.settings.modelSlotIndex] + pyTorchModelFile = modelSlot.pyTorchModelFile - pyTorchModelFile = self.settings.modelSlots[ - self.settings.modelSlotIndex - ].pyTorchModelFile # inference前にexportできるようにcurrentSlotではなくslot - - if pyTorchModelFile is None: + # PyTorchのファイルが存在しない場合はエラーを返す + if pyTorchModelFile is None or pyTorchModelFile == "": print("[Voice Changer] export2onnx, No pyTorch filepath.") return {"status": "ng", "path": ""} + import voice_changer.RVC.export2onnx as onnxExporter - output_file = os.path.splitext(os.path.basename(pyTorchModelFile))[0] + ".onnx" - output_file_simple = ( - os.path.splitext(os.path.basename(pyTorchModelFile))[0] + "_simple.onnx" - ) - output_path = os.path.join(TMP_DIR, output_file) - output_path_simple = os.path.join(TMP_DIR, output_file_simple) - print( - "embChannels", - self.settings.modelSlots[self.settings.modelSlotIndex].embChannels, - ) - metadata = { - "application": "VC_CLIENT", - "version": "1", - "modelType": self.settings.modelSlots[ - self.settings.modelSlotIndex - ].modelType, - "samplingRate": self.settings.modelSlots[ - self.settings.modelSlotIndex - ].samplingRate, - "f0": self.settings.modelSlots[self.settings.modelSlotIndex].f0, - "embChannels": self.settings.modelSlots[ - self.settings.modelSlotIndex - ].embChannels, - "embedder": self.settings.modelSlots[self.settings.modelSlotIndex].embedder, - } - - if torch.cuda.device_count() > 0: - onnxExporter.export2onnx( - pyTorchModelFile, output_path, output_path_simple, True, metadata - ) - else: - print( - "[Voice Changer] Warning!!! onnx export with float32. maybe size is doubled." - ) - onnxExporter.export2onnx( - pyTorchModelFile, output_path, output_path_simple, False, metadata - ) - + output_file_simple = onnxExporter.export2onnx(modelSlot) return { "status": "ok", "path": f"/tmp/{output_file_simple}", diff --git a/server/voice_changer/RVC/custom_vc_infer_pipeline.py b/server/voice_changer/RVC/custom_vc_infer_pipeline.py index 70a7da49..ac79f678 100644 --- a/server/voice_changer/RVC/custom_vc_infer_pipeline.py +++ b/server/voice_changer/RVC/custom_vc_infer_pipeline.py @@ -55,7 +55,7 @@ class VC(object): pitchf, device=self.device, dtype=torch.float ).unsqueeze(0) - # tensor + # tensor型調整 feats = torch.from_numpy(audio_pad) if self.is_half is True: feats = feats.half() @@ -94,11 +94,9 @@ class VC(object): torch.from_numpy(npy).unsqueeze(0).to(self.device) * index_rate + (1 - index_rate) * feats ) - - # feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1) - # ピッチ抽出 + # ピッチサイズ調整 p_len = audio_pad.shape[0] // self.window if feats.shape[1] < p_len: p_len = feats.shape[1] @@ -108,47 +106,23 @@ class VC(object): p_len = torch.tensor([p_len], device=self.device).long() # 推論実行 - with torch.no_grad(): - audio1 = ( - (inferencer.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0] * 32768) - .data.cpu() - .float() - .numpy() - .astype(np.int16) - ) - - # if pitch is not None: - # print("INFERENCE 1 ") - # audio1 = ( - # ( - # inferencer.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0] - # * 32768 - # ) - # .data.cpu() - # .float() - # .numpy() - # .astype(np.int16) - # ) - # else: - # if hasattr(inferencer, "infer_pitchless"): - # print("INFERENCE 2 ") - - # audio1 = ( - # (inferencer.infer_pitchless(feats, p_len, sid)[0][0, 0] * 32768) - # .data.cpu() - # .float() - # .numpy() - # .astype(np.int16) - # ) - # else: - # print("INFERENCE 3 ") - # audio1 = ( - # (inferencer.infer(feats, p_len, sid)[0][0, 0] * 32768) - # .data.cpu() - # .float() - # .numpy() - # .astype(np.int16) - # ) + try: + with torch.no_grad(): + audio1 = ( + ( + inferencer.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0] + * 32768 + ) + .data.cpu() + .float() + .numpy() + .astype(np.int16) + ) + except RuntimeError as e: + if "HALF" in e.__str__().upper(): + raise HalfPrecisionChangingException() + else: + raise e del feats, p_len, padding_mask torch.cuda.empty_cache() diff --git a/server/voice_changer/RVC/export2onnx.py b/server/voice_changer/RVC/export2onnx.py index 1b3bb2e9..af968de6 100644 --- a/server/voice_changer/RVC/export2onnx.py +++ b/server/voice_changer/RVC/export2onnx.py @@ -1,7 +1,10 @@ +import os import json import torch from onnxsim import simplify import onnx +from const import TMP_DIR, EnumInferenceTypes +from voice_changer.RVC.ModelSlot import ModelSlot from voice_changer.RVC.onnx.SynthesizerTrnMs256NSFsid_ONNX import ( SynthesizerTrnMs256NSFsid_ONNX, @@ -15,24 +18,60 @@ from voice_changer.RVC.onnx.SynthesizerTrnMsNSFsidNono_webui_ONNX import ( from voice_changer.RVC.onnx.SynthesizerTrnMsNSFsid_webui_ONNX import ( SynthesizerTrnMsNSFsid_webui_ONNX, ) -from .const import RVC_MODEL_TYPE_RVC, RVC_MODEL_TYPE_WEBUI -def export2onnx(input_model, output_model, output_model_simple, is_half, metadata): +def export2onnx(modelSlot: ModelSlot): + pyTorchModelFile = modelSlot.pyTorchModelFile + + output_file = os.path.splitext(os.path.basename(pyTorchModelFile))[0] + ".onnx" + output_file_simple = ( + os.path.splitext(os.path.basename(pyTorchModelFile))[0] + "_simple.onnx" + ) + output_path = os.path.join(TMP_DIR, output_file) + output_path_simple = os.path.join(TMP_DIR, output_file_simple) + metadata = { + "application": "VC_CLIENT", + "version": "2", + # ↓EnumInferenceTypesのままだとシリアライズできないのでテキスト化 + "modelType": modelSlot.modelType.value, + "samplingRate": modelSlot.samplingRate, + "f0": modelSlot.f0, + "embChannels": modelSlot.embChannels, + "embedder": modelSlot.embedder.value, + } + + if torch.cuda.device_count() > 0: + _export2onnx(pyTorchModelFile, output_path, output_path_simple, True, metadata) + else: + print( + "[Voice Changer] Warning!!! onnx export with float32. maybe size is doubled." + ) + _export2onnx(pyTorchModelFile, output_path, output_path_simple, False, metadata) + return output_file_simple + + +def _export2onnx(input_model, output_model, output_model_simple, is_half, metadata): cpt = torch.load(input_model, map_location="cpu") if is_half: dev = torch.device("cuda", index=0) else: dev = torch.device("cpu") - if metadata["f0"] is True and metadata["modelType"] == RVC_MODEL_TYPE_RVC: + # EnumInferenceTypesのままだとシリアライズできないのでテキスト化 + if metadata["modelType"] == EnumInferenceTypes.pyTorchRVC.value: net_g_onnx = SynthesizerTrnMs256NSFsid_ONNX(*cpt["config"], is_half=is_half) - elif metadata["f0"] is True and metadata["modelType"] == RVC_MODEL_TYPE_WEBUI: + elif metadata["modelType"] == EnumInferenceTypes.pyTorchWebUI.value: net_g_onnx = SynthesizerTrnMsNSFsid_webui_ONNX(**cpt["params"], is_half=is_half) - elif metadata["f0"] is False and metadata["modelType"] == RVC_MODEL_TYPE_RVC: + elif metadata["modelType"] == EnumInferenceTypes.pyTorchRVCNono.value: net_g_onnx = SynthesizerTrnMs256NSFsid_nono_ONNX(*cpt["config"]) - elif metadata["f0"] is False and metadata["modelType"] == RVC_MODEL_TYPE_WEBUI: + elif metadata["modelType"] == EnumInferenceTypes.pyTorchWebUINono.value: net_g_onnx = SynthesizerTrnMsNSFsidNono_webui_ONNX(**cpt["params"]) + else: + print( + "unknwon::::: ", + metadata["modelType"], + EnumInferenceTypes.pyTorchWebUI.value, + ) net_g_onnx.eval().to(dev) net_g_onnx.load_state_dict(cpt["weight"], strict=False) diff --git a/server/voice_changer/RVC/inferencer/OnnxRVCInferencer.py b/server/voice_changer/RVC/inferencer/OnnxRVCInferencer.py index 91873b89..91d60200 100644 --- a/server/voice_changer/RVC/inferencer/OnnxRVCInferencer.py +++ b/server/voice_changer/RVC/inferencer/OnnxRVCInferencer.py @@ -24,6 +24,7 @@ class OnnxRVCInferencer(Inferencer): self.isHalf = True self.model = onnx_session + self.setDevice(dev) return self def infer( @@ -37,9 +38,9 @@ class OnnxRVCInferencer(Inferencer): if pitch is None or pitchf is None: raise RuntimeError("[Voice Changer] Pitch or Pitchf is not found.") - print("INFER1", self.model.get_providers()) - print("INFER2", self.model.get_provider_options()) - print("INFER3", self.model.get_session_options()) + # print("INFER1", self.model.get_providers()) + # print("INFER2", self.model.get_provider_options()) + # print("INFER3", self.model.get_session_options()) if self.isHalf: audio1 = self.model.run( ["audio"], diff --git a/server/voice_changer/RVC/inferencer/RVCInferencer.py b/server/voice_changer/RVC/inferencer/RVCInferencer.py index 91bbf636..d62f89bb 100644 --- a/server/voice_changer/RVC/inferencer/RVCInferencer.py +++ b/server/voice_changer/RVC/inferencer/RVCInferencer.py @@ -11,6 +11,7 @@ from infer_pack.models import ( # type:ignore class RVCInferencer(Inferencer): def loadModel(self, file: str, dev: device, isHalf: bool = True): super().setProps(EnumInferenceTypes.pyTorchRVC, file, dev, isHalf) + print("load inf", file) cpt = torch.load(file, map_location="cpu") model = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=isHalf)