diff --git a/server/.vscode/settings.json b/server/.vscode/settings.json index 88a7b800..810d061e 100644 --- a/server/.vscode/settings.json +++ b/server/.vscode/settings.json @@ -9,7 +9,7 @@ "editor.formatOnSave": true // ファイル保存時に自動フォーマット }, "flake8.args": [ - "--ignore=E501" + "--ignore=E501,E402,W503" // "--max-line-length=150", // "--max-complexity=20" ] diff --git a/server/Exceptions.py b/server/Exceptions.py index 82819c8d..2fc4a21e 100644 --- a/server/Exceptions.py +++ b/server/Exceptions.py @@ -1,12 +1,13 @@ - class NoModeLoadedException(Exception): def __init__(self, framework): self.framework = framework def __str__(self): - return repr(f"No model for {self.framework} loaded. Please confirm the model uploaded.") + return repr( + f"No model for {self.framework} loaded. Please confirm the model uploaded." + ) class ONNXInputArgumentException(Exception): def __str__(self): - return repr(f"ONNX received invalid argument.") + return repr("ONNX received invalid argument.") diff --git a/server/const.py b/server/const.py index fe877166..7f8e406c 100644 --- a/server/const.py +++ b/server/const.py @@ -4,7 +4,15 @@ import tempfile from typing import Literal, TypeAlias -ModelType: TypeAlias = Literal['MMVCv15', 'MMVCv13', 'so-vits-svc-40v2', 'so-vits-svc-40', 'so-vits-svc-40_c', 'DDSP-SVC', 'RVC'] +ModelType: TypeAlias = Literal[ + "MMVCv15", + "MMVCv13", + "so-vits-svc-40v2", + "so-vits-svc-40", + "so-vits-svc-40_c", + "DDSP-SVC", + "RVC", +] ERROR_NO_ONNX_SESSION = "ERROR_NO_ONNX_SESSION" @@ -13,19 +21,45 @@ tmpdir = tempfile.TemporaryDirectory() # print("generate tmpdir:::",tmpdir) SSL_KEY_DIR = os.path.join(tmpdir.name, "keys") if hasattr(sys, "_MEIPASS") else "keys" MODEL_DIR = os.path.join(tmpdir.name, "logs") if hasattr(sys, "_MEIPASS") else "logs" -UPLOAD_DIR = os.path.join(tmpdir.name, "upload_dir") if hasattr(sys, "_MEIPASS") else "upload_dir" -NATIVE_CLIENT_FILE_WIN = os.path.join(sys._MEIPASS, "voice-changer-native-client.exe") if hasattr(sys, "_MEIPASS") else "voice-changer-native-client" -NATIVE_CLIENT_FILE_MAC = os.path.join(sys._MEIPASS, "voice-changer-native-client.app", "Contents", "MacOS", - "voice-changer-native-client") if hasattr(sys, "_MEIPASS") else "voice-changer-native-client" +UPLOAD_DIR = ( + os.path.join(tmpdir.name, "upload_dir") + if hasattr(sys, "_MEIPASS") + else "upload_dir" +) +NATIVE_CLIENT_FILE_WIN = ( + os.path.join(sys._MEIPASS, "voice-changer-native-client.exe") # type: ignore + if hasattr(sys, "_MEIPASS") + else "voice-changer-native-client" +) +NATIVE_CLIENT_FILE_MAC = ( + os.path.join( + sys._MEIPASS, # type: ignore + "voice-changer-native-client.app", + "Contents", + "MacOS", + "voice-changer-native-client", + ) + if hasattr(sys, "_MEIPASS") + else "voice-changer-native-client" +) -HUBERT_ONNX_MODEL_PATH = os.path.join(sys._MEIPASS, "model_hubert/hubert_simple.onnx") if hasattr(sys, - "_MEIPASS") else "model_hubert/hubert_simple.onnx" +HUBERT_ONNX_MODEL_PATH = ( + os.path.join(sys._MEIPASS, "model_hubert/hubert_simple.onnx") # type: ignore + if hasattr(sys, "_MEIPASS") + else "model_hubert/hubert_simple.onnx" +) -TMP_DIR = os.path.join(tmpdir.name, "tmp_dir") if hasattr(sys, "_MEIPASS") else "tmp_dir" +TMP_DIR = ( + os.path.join(tmpdir.name, "tmp_dir") if hasattr(sys, "_MEIPASS") else "tmp_dir" +) os.makedirs(TMP_DIR, exist_ok=True) def getFrontendPath(): - frontend_path = os.path.join(sys._MEIPASS, "dist") if hasattr(sys, "_MEIPASS") else "../client/demo/dist" + frontend_path = ( + os.path.join(sys._MEIPASS, "dist") + if hasattr(sys, "_MEIPASS") + else "../client/demo/dist" + ) return frontend_path diff --git a/server/restapi/MMVC_Rest_Fileuploader.py b/server/restapi/MMVC_Rest_Fileuploader.py index fa1b5a93..a0b247eb 100644 --- a/server/restapi/MMVC_Rest_Fileuploader.py +++ b/server/restapi/MMVC_Rest_Fileuploader.py @@ -10,6 +10,9 @@ from restapi.mods.FileUploader import upload_file, concat_file_chunks from voice_changer.VoiceChangerManager import VoiceChangerManager from const import MODEL_DIR, UPLOAD_DIR, ModelType +from voice_changer.utils.LoadModelParams import FilePaths, LoadModelParams + +from dataclasses import fields os.makedirs(UPLOAD_DIR, exist_ok=True) os.makedirs(MODEL_DIR, exist_ok=True) @@ -30,12 +33,6 @@ class MMVC_Rest_Fileuploader: "/update_settings", self.post_update_settings, methods=["POST"] ) self.router.add_api_route("/load_model", self.post_load_model, methods=["POST"]) - self.router.add_api_route( - "/load_model_for_train", self.post_load_model_for_train, methods=["POST"] - ) - self.router.add_api_route( - "/extract_voices", self.post_extract_voices, methods=["POST"] - ) self.router.add_api_route("/model_type", self.post_model_type, methods=["POST"]) self.router.add_api_route("/model_type", self.get_model_type, methods=["GET"]) self.router.add_api_route("/onnx", self.get_onnx, methods=["GET"]) @@ -80,74 +77,42 @@ class MMVC_Rest_Fileuploader: isHalf: bool = Form(...), params: str = Form(...), ): - props = { - "slot": slot, - "isHalf": isHalf, - "files": { - "configFilename": configFilename, - "pyTorchModelFilename": pyTorchModelFilename, - "onnxModelFilename": onnxModelFilename, - "clusterTorchModelFilename": clusterTorchModelFilename, - "featureFilename": featureFilename, - "indexFilename": indexFilename, - }, - "params": params, - } + files = FilePaths( + configFilename=configFilename, + pyTorchModelFilename=pyTorchModelFilename, + onnxModelFilename=onnxModelFilename, + clusterTorchModelFilename=clusterTorchModelFilename, + featureFilename=featureFilename, + indexFilename=indexFilename, + ) + props: LoadModelParams = LoadModelParams( + slot=slot, isHalf=isHalf, params=params, files=files + ) + # Change Filepath - for key, val in props["files"].items(): + for field in fields(props.files): + key = field.name + val = getattr(props.files, key) if val != "-": uploadPath = os.path.join(UPLOAD_DIR, val) storeDir = os.path.join(UPLOAD_DIR, f"{slot}") os.makedirs(storeDir, exist_ok=True) storePath = os.path.join(storeDir, val) shutil.move(uploadPath, storePath) - props["files"][key] = storePath + setattr(props.files, key, storePath) else: - props["files"][key] = None - # print("---------------------------------------------------2>", props) + setattr(props.files, key, None) info = self.voiceChangerManager.loadModel(props) json_compatible_item_data = jsonable_encoder(info) return JSONResponse(content=json_compatible_item_data) - # return {"load": f"{configFilePath}, {pyTorchModelFilePath}, {onnxModelFilePath}"} - def post_load_model_for_train( - self, - modelGFilename: str = Form(...), - modelGFilenameChunkNum: int = Form(...), - modelDFilename: str = Form(...), - modelDFilenameChunkNum: int = Form(...), - ): - modelGFilePath = concat_file_chunks( - UPLOAD_DIR, modelGFilename, modelGFilenameChunkNum, MODEL_DIR - ) - modelDFilePath = concat_file_chunks( - UPLOAD_DIR, modelDFilename, modelDFilenameChunkNum, MODEL_DIR - ) - return {"File saved": f"{modelGFilePath}, {modelDFilePath}"} - - def post_extract_voices( - self, - zipFilename: str = Form(...), - zipFileChunkNum: int = Form(...), - ): - zipFilePath = concat_file_chunks( - UPLOAD_DIR, zipFilename, zipFileChunkNum, UPLOAD_DIR - ) - shutil.unpack_archive(zipFilePath, "MMVC_Trainer/dataset/textful/") - return {"Zip file unpacked": f"{zipFilePath}"} - - def post_model_type( - self, - modelType: ModelType = Form(...), - ): + def post_model_type(self, modelType: ModelType = Form(...)): info = self.voiceChangerManager.switchModelType(modelType) json_compatible_item_data = jsonable_encoder(info) return JSONResponse(content=json_compatible_item_data) - def get_model_type( - self, - ): + def get_model_type(self): info = self.voiceChangerManager.getModelType() json_compatible_item_data = jsonable_encoder(info) return JSONResponse(content=json_compatible_item_data) diff --git a/server/sio/MMVC_Namespace.py b/server/sio/MMVC_Namespace.py index f19293f6..ce4088f7 100644 --- a/server/sio/MMVC_Namespace.py +++ b/server/sio/MMVC_Namespace.py @@ -30,7 +30,6 @@ class MMVC_Namespace(socketio.AsyncNamespace): else: unpackedData = np.array(struct.unpack('<%sh' % (len(data) // struct.calcsize('= self.gpu_num: self.settings.gpu = 0 @@ -345,7 +344,7 @@ class RVC: key == "gpu" and val >= 0 and val < self.gpu_num - and self.onnx_session != None + and self.onnx_session is not None ): providers = self.onnx_session.get_providers() print("Providers:", providers) @@ -374,11 +373,11 @@ class RVC: data = asdict(self.settings) data["onnxExecutionProviders"] = ( - self.onnx_session.get_providers() if self.onnx_session != None else [] + self.onnx_session.get_providers() if self.onnx_session is not None else [] ) files = ["configFile", "pyTorchModelFile", "onnxModelFile"] for f in files: - if data[f] != None and os.path.exists(data[f]): + if data[f] is not None and os.path.exists(data[f]): data[f] = os.path.basename(data[f]) else: data[f] = "" @@ -477,7 +476,7 @@ class RVC: return result def _pyTorch_inference(self, data): - if hasattr(self, "net_g") == False or self.net_g == None: + if hasattr(self, "net_g") is False or self.net_g is None: print( "[Voice Changer] No pyTorch session.", hasattr(self, "net_g"), @@ -485,7 +484,7 @@ class RVC: ) raise NoModeLoadedException("pytorch") - if self.settings.gpu < 0 or (self.gpu_num == 0 and self.mps_enabled == False): + if self.settings.gpu < 0 or (self.gpu_num == 0 and self.mps_enabled is False): dev = torch.device("cpu") elif self.mps_enabled: dev = torch.device("mps") diff --git a/server/voice_changer/RVC/const.py b/server/voice_changer/RVC/const.py index 561b9d4e..205a9ef3 100644 --- a/server/voice_changer/RVC/const.py +++ b/server/voice_changer/RVC/const.py @@ -1,10 +1,2 @@ -# RVC_MODEL_TYPE_NORMAL = 0 -# RVC_MODEL_TYPE_PITCHLESS = 1 -# RVC_MODEL_TYPE_WEBUI_256_NORMAL = 2 -# RVC_MODEL_TYPE_WEBUI_256_PITCHLESS = 3 -# RVC_MODEL_TYPE_WEBUI_768_NORMAL = 4 -# RVC_MODEL_TYPE_WEBUI_768_PITCHLESS = 5 -# RVC_MODEL_TYPE_UNKNOWN = 99 - RVC_MODEL_TYPE_RVC = 0 RVC_MODEL_TYPE_WEBUI = 1 diff --git a/server/voice_changer/VoiceChanger.py b/server/voice_changer/VoiceChanger.py index f60045ec..f38aa4e0 100755 --- a/server/voice_changer/VoiceChanger.py +++ b/server/voice_changer/VoiceChanger.py @@ -1,4 +1,4 @@ -from typing import Any, Callable, Optional, Protocol, TypeAlias, Union, cast +from typing import Any, Union, cast from const import TMP_DIR, ModelType import torch import os @@ -9,6 +9,7 @@ import resampy from voice_changer.IORecorder import IORecorder +from voice_changer.utils.LoadModelParams import LoadModelParams from voice_changer.utils.Timer import Timer from voice_changer.utils.VoiceChangerModel import VoiceChangerModel, AudioInOut @@ -24,8 +25,6 @@ providers = [ STREAM_INPUT_FILE = os.path.join(TMP_DIR, "in.wav") STREAM_OUTPUT_FILE = os.path.join(TMP_DIR, "out.wav") -STREAM_ANALYZE_FILE_DIO = os.path.join(TMP_DIR, "analyze-dio.png") -STREAM_ANALYZE_FILE_HARVEST = os.path.join(TMP_DIR, "analyze-harvest.png") @dataclass @@ -51,18 +50,20 @@ class VoiceChangerSettings: class VoiceChanger: settings: VoiceChangerSettings voiceChanger: VoiceChangerModel + ioRecorder: IORecorder + sola_buffer: AudioInOut def __init__(self, params: VoiceChangerParams): # 初期化 self.settings = VoiceChangerSettings() self.onnx_session = None - self.currentCrossFadeOffsetRate = 0 - self.currentCrossFadeEndRate = 0 + self.currentCrossFadeOffsetRate = 0.0 + self.currentCrossFadeEndRate = 0.0 self.currentCrossFadeOverlapSize = 0 # setting self.crossfadeSize = 0 # calculated self.voiceChanger = None - self.modelType = None + self.modelType: ModelType | None = None self.params = params self.gpu_num = torch.cuda.device_count() self.prev_audio = np.zeros(4096) @@ -76,7 +77,7 @@ class VoiceChanger: ) def switchModelType(self, modelType: ModelType): - if hasattr(self, "voiceChanger") and self.voiceChanger != None: + if hasattr(self, "voiceChanger") and self.voiceChanger is not None: # return {"status": "ERROR", "msg": "vc is already selected. currently re-select is not implemented"} del self.voiceChanger self.voiceChanger = None @@ -114,34 +115,18 @@ class VoiceChanger: return {"status": "OK", "msg": "vc is switched."} def getModelType(self): - if self.modelType != None: + if self.modelType is not None: return {"status": "OK", "vc": self.modelType} else: return {"status": "OK", "vc": "none"} - def loadModel( - self, - props, - ): + def loadModel(self, props: LoadModelParams): try: return self.voiceChanger.loadModel(props) except Exception as e: print("[Voice Changer] Model Load Error! Check your model is valid.", e) return {"status": "NG"} - # try: - # if self.modelType == "MMVCv15" or self.modelType == "MMVCv13": - # return self.voiceChanger.loadModel(config, pyTorch_model_file, onnx_model_file) - # elif self.modelType == "so-vits-svc-40" or self.modelType == "so-vits-svc-40_c" or self.modelType == "so-vits-svc-40v2": - # return self.voiceChanger.loadModel(config, pyTorch_model_file, onnx_model_file, clusterTorchModel) - # elif self.modelType == "RVC": - # return self.voiceChanger.loadModel(slot, config, pyTorch_model_file, onnx_model_file, feature_file, index_file, is_half) - # else: - # return self.voiceChanger.loadModel(config, pyTorch_model_file, onnx_model_file, clusterTorchModel) - # except Exception as e: - # print("[Voice Changer] Model Load Error! Check your model is valid.", e) - # return {"status": "NG"} - def get_info(self): data = asdict(self.settings) if hasattr(self, "voiceChanger"): @@ -167,14 +152,6 @@ class VoiceChanger: if hasattr(self, "ioRecorder"): self.ioRecorder.close() - # if hasattr(self, "ioAnalyzer") == False: - # self.ioAnalyzer = IOAnalyzer() - - # try: - # self.ioAnalyzer.analyze(STREAM_INPUT_FILE, STREAM_ANALYZE_FILE_DIO, STREAM_ANALYZE_FILE_HARVEST, self.settings.inputSampleRate) - - # except Exception as e: - # print("recordIO exception", e) elif key in self.settings.floatData: setattr(self.settings, key, float(val)) elif key in self.settings.strData: @@ -182,10 +159,10 @@ class VoiceChanger: else: if hasattr(self, "voiceChanger"): ret = self.voiceChanger.update_settings(key, val) - if ret == False: + if ret is False: print(f"{key} is not mutable variable or unknown variable!") else: - print(f"voice changer is not initialized!") + print("voice changer is not initialized!") return self.get_info() def _generate_strength(self, crossfadeSize: int): @@ -228,9 +205,9 @@ class VoiceChanger: ) # ひとつ前の結果とサイズが変わるため、記録は消去する。 - if hasattr(self, "np_prev_audio1") == True: + if hasattr(self, "np_prev_audio1") is True: delattr(self, "np_prev_audio1") - if hasattr(self, "sola_buffer"): + if hasattr(self, "sola_buffer") is True: del self.sola_buffer # receivedData: tuple of short @@ -275,9 +252,14 @@ class VoiceChanger: # Inference audio = self.voiceChanger.inference(data) - if hasattr(self, "sola_buffer") == True: + if hasattr(self, "sola_buffer") is True: np.set_printoptions(threshold=10000) - audio = audio[-sola_search_frame - crossfade_frame - block_frame :] + audio_offset = -1 * ( + sola_search_frame + crossfade_frame + block_frame + ) + audio = audio[audio_offset:] + a = 0 + audio = audio[a:] # SOLA algorithm from https://github.com/yxlllc/DDSP-SVC, https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI cor_nom = np.convolve( audio[: crossfade_frame + sola_search_frame], @@ -292,11 +274,9 @@ class VoiceChanger: ) + 1e-3 ) - sola_offset = np.argmax(cor_nom / cor_den) - - output_wav = audio[sola_offset : sola_offset + block_frame].astype( - np.float64 - ) + sola_offset = int(np.argmax(cor_nom / cor_den)) + sola_end = sola_offset + block_frame + output_wav = audio[sola_offset:sola_end].astype(np.float64) output_wav[:crossfade_frame] *= self.np_cur_strength output_wav[:crossfade_frame] += self.sola_buffer[:] @@ -306,15 +286,12 @@ class VoiceChanger: result = np.zeros(4096).astype(np.int16) if ( - hasattr(self, "sola_buffer") == True + hasattr(self, "sola_buffer") is True and sola_offset < sola_search_frame ): - sola_buf_org = audio[ - -sola_search_frame - - crossfade_frame - + sola_offset : -sola_search_frame - + sola_offset - ] + offset = -1 * (sola_search_frame + crossfade_frame - sola_offset) + end = -1 * (sola_search_frame - sola_offset) + sola_buf_org = audio[offset:end] self.sola_buffer = sola_buf_org * self.np_prev_strength else: self.sola_buffer = audio[-crossfade_frame:] * self.np_prev_strength @@ -379,7 +356,7 @@ PRINT_CONVERT_PROCESSING: bool = False def print_convert_processing(mess: str): - if PRINT_CONVERT_PROCESSING == True: + if PRINT_CONVERT_PROCESSING is True: print(mess) diff --git a/server/voice_changer/VoiceChangerManager.py b/server/voice_changer/VoiceChangerManager.py index 03bb6149..080afa7c 100644 --- a/server/voice_changer/VoiceChangerManager.py +++ b/server/voice_changer/VoiceChangerManager.py @@ -1,6 +1,8 @@ import numpy as np from voice_changer.VoiceChanger import VoiceChanger from const import ModelType +from voice_changer.utils.LoadModelParams import LoadModelParams +from voice_changer.utils.VoiceChangerModel import AudioInOut from voice_changer.utils.VoiceChangerParams import VoiceChangerParams @@ -15,7 +17,7 @@ class VoiceChangerManager(object): cls._instance.voiceChanger = VoiceChanger(params) return cls._instance - def loadModel(self, props): + def loadModel(self, props: LoadModelParams): info = self.voiceChanger.loadModel(props) if hasattr(info, "status") and info["status"] == "NG": return info @@ -31,7 +33,7 @@ class VoiceChangerManager(object): else: return {"status": "ERROR", "msg": "no model loaded"} - def update_settings(self, key: str, val: any): + def update_settings(self, key: str, val: str | int | float): if hasattr(self, "voiceChanger"): info = self.voiceChanger.update_settings(key, val) info["status"] = "OK" @@ -39,7 +41,7 @@ class VoiceChangerManager(object): else: return {"status": "ERROR", "msg": "no model loaded"} - def changeVoice(self, receivedData: any): + def changeVoice(self, receivedData: AudioInOut): if hasattr(self, "voiceChanger") is True: return self.voiceChanger.on_request(receivedData) else: diff --git a/server/voice_changer/utils/LoadModelParams.py b/server/voice_changer/utils/LoadModelParams.py new file mode 100644 index 00000000..dd99f8ec --- /dev/null +++ b/server/voice_changer/utils/LoadModelParams.py @@ -0,0 +1,19 @@ +from dataclasses import dataclass + + +@dataclass +class FilePaths: + configFilename: str + pyTorchModelFilename: str + onnxModelFilename: str + clusterTorchModelFilename: str + featureFilename: str + indexFilename: str + + +@dataclass +class LoadModelParams: + slot: int + isHalf: bool + files: FilePaths + params: str diff --git a/server/voice_changer/utils/VoiceChangerModel.py b/server/voice_changer/utils/VoiceChangerModel.py index cebe1a99..d2ca6410 100644 --- a/server/voice_changer/utils/VoiceChangerModel.py +++ b/server/voice_changer/utils/VoiceChangerModel.py @@ -1,14 +1,30 @@ -from typing import Any, Callable, Protocol, TypeAlias +from typing import Any, Protocol, TypeAlias import numpy as np +from voice_changer.utils.LoadModelParams import LoadModelParams + AudioInOut: TypeAlias = np.ndarray[Any, np.dtype[np.int16]] class VoiceChangerModel(Protocol): - loadModel: Callable[..., dict[str, Any]] - def get_processing_sampling_rate(self) -> int: ... - def get_info(self) -> dict[str, Any]: ... - def inference(self, data: tuple[Any, ...]) -> Any: ... - def generate_input(self, newData: AudioInOut, inputSize: int, crossfadeSize: int) -> tuple[Any, ...]: ... - def update_settings(self, key: str, val: Any) -> bool: ... + # loadModel: Callable[..., dict[str, Any]] + def loadModel(self, params: LoadModelParams): + ... + + def get_processing_sampling_rate(self) -> int: + ... + + def get_info(self) -> dict[str, Any]: + ... + + def inference(self, data: tuple[Any, ...]) -> Any: + ... + + def generate_input( + self, newData: AudioInOut, inputSize: int, crossfadeSize: int + ) -> tuple[Any, ...]: + ... + + def update_settings(self, key: str, val: Any) -> bool: + ...