From c96609640d2b08cbfcf1d7f3745e66288b1e3fbf Mon Sep 17 00:00:00 2001 From: wataru Date: Fri, 28 Apr 2023 13:49:40 +0900 Subject: [PATCH] WIP: refactoring --- server/MMVCServerSIO.py | 3 + server/misc/log_control.py | 39 ++-- server/voice_changer/MMVCv13/MMVCv13.py | 162 ++++++++++----- .../voice_changer/MMVCv13/TrainerFunctions.py | 192 +++++++++++------- server/voice_changer/VoiceChanger.py | 1 + 5 files changed, 249 insertions(+), 148 deletions(-) diff --git a/server/MMVCServerSIO.py b/server/MMVCServerSIO.py index 3a5ff55c..808a3bcc 100755 --- a/server/MMVCServerSIO.py +++ b/server/MMVCServerSIO.py @@ -16,6 +16,9 @@ from restapi.MMVC_Rest import MMVC_Rest from const import NATIVE_CLIENT_FILE_MAC, NATIVE_CLIENT_FILE_WIN, SSL_KEY_DIR import subprocess import multiprocessing as mp +from misc.log_control import setup_loggers + +setup_loggers() def setupArgParser(): diff --git a/server/misc/log_control.py b/server/misc/log_control.py index 6cf4cb36..20d26982 100644 --- a/server/misc/log_control.py +++ b/server/misc/log_control.py @@ -8,32 +8,31 @@ class UvicornSuppressFilter(logging.Filter): return False -# logger = logging.getLogger("uvicorn.error") -# logger.addFilter(UvicornSuppressFilter()) +def setup_loggers(): + # logger = logging.getLogger("uvicorn.error") + # logger.addFilter(UvicornSuppressFilter()) -logger = logging.getLogger("fairseq.tasks.hubert_pretraining") -logger.addFilter(UvicornSuppressFilter()) + logger = logging.getLogger("fairseq.tasks.hubert_pretraining") + logger.addFilter(UvicornSuppressFilter()) -logger = logging.getLogger("fairseq.models.hubert.hubert") -logger.addFilter(UvicornSuppressFilter()) + logger = logging.getLogger("fairseq.models.hubert.hubert") + logger.addFilter(UvicornSuppressFilter()) -logger = logging.getLogger("fairseq.tasks.text_to_speech") -logger.addFilter(UvicornSuppressFilter()) + logger = logging.getLogger("fairseq.tasks.text_to_speech") + logger.addFilter(UvicornSuppressFilter()) + logger = logging.getLogger("numba.core.ssa") + logger.addFilter(UvicornSuppressFilter()) -logger = logging.getLogger("numba.core.ssa") -logger.addFilter(UvicornSuppressFilter()) + logger = logging.getLogger("numba.core.interpreter") + logger.addFilter(UvicornSuppressFilter()) -logger = logging.getLogger("numba.core.interpreter") -logger.addFilter(UvicornSuppressFilter()) + logger = logging.getLogger("numba.core.byteflow") + logger.addFilter(UvicornSuppressFilter()) -logger = logging.getLogger("numba.core.byteflow") -logger.addFilter(UvicornSuppressFilter()) + # logger.propagate = False + logger = logging.getLogger("multipart.multipart") + logger.propagate = False -# logger.propagate = False - -logger = logging.getLogger("multipart.multipart") -logger.propagate = False - -logging.getLogger('asyncio').setLevel(logging.WARNING) + logging.getLogger("asyncio").setLevel(logging.WARNING) diff --git a/server/voice_changer/MMVCv13/MMVCv13.py b/server/voice_changer/MMVCv13/MMVCv13.py index 8794f54b..8f6c9d33 100644 --- a/server/voice_changer/MMVCv13/MMVCv13.py +++ b/server/voice_changer/MMVCv13/MMVCv13.py @@ -1,6 +1,10 @@ import sys import os -if sys.platform.startswith('darwin'): + +from voice_changer.utils.LoadModelParams import LoadModelParams +from voice_changer.utils.VoiceChangerModel import AudioInOut + +if sys.platform.startswith("darwin"): baseDir = [x for x in sys.path if x.endswith("Contents/MacOS")] if len(baseDir) != 1: print("baseDir should be only one ", baseDir) @@ -12,23 +16,32 @@ else: sys.path.append(modulePath) -from dataclasses import dataclass, asdict +from dataclasses import dataclass, asdict, field import numpy as np import torch import onnxruntime -import pyworld as pw -from symbols import symbols -from models import SynthesizerTrn -from voice_changer.MMVCv13.TrainerFunctions import TextAudioSpeakerCollate, spectrogram_torch, load_checkpoint, get_hparams_from_file +from symbols import symbols # type:ignore +from models import SynthesizerTrn # type:ignore +from voice_changer.MMVCv13.TrainerFunctions import ( + TextAudioSpeakerCollate, + spectrogram_torch, + load_checkpoint, + get_hparams_from_file, +) from Exceptions import NoModeLoadedException -providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"] +providers = [ + "OpenVINOExecutionProvider", + "CUDAExecutionProvider", + "DmlExecutionProvider", + "CPUExecutionProvider", +] @dataclass -class MMVCv13Settings(): +class MMVCv13Settings: gpu: int = 0 srcId: int = 0 dstId: int = 101 @@ -40,11 +53,13 @@ class MMVCv13Settings(): # ↓mutableな物だけ列挙 intData = ["gpu", "srcId", "dstId"] - floatData = [] + floatData: list[str] = field(default_factory=lambda: []) strData = ["framework"] class MMVCv13: + audio_buffer: AudioInOut | None = None + def __init__(self): self.settings = MMVCv13Settings() self.net_g = None @@ -53,51 +68,62 @@ class MMVCv13: self.gpu_num = torch.cuda.device_count() self.text_norm = torch.LongTensor([0, 6, 0]) - def loadModel(self, props): - self.settings.configFile = props["files"]["configFilename"] + def loadModel(self, props: LoadModelParams): + self.settings.configFile = props.files.configFilename self.hps = get_hparams_from_file(self.settings.configFile) - self.settings.pyTorchModelFile = props["files"]["pyTorchModelFilename"] - self.settings.onnxModelFile = props["files"]["onnxModelFilename"] + self.settings.pyTorchModelFile = props.files.pyTorchModelFilename + self.settings.onnxModelFile = props.files.onnxModelFilename # PyTorchモデル生成 - if self.settings.pyTorchModelFile != None: + if self.settings.pyTorchModelFile is not None: self.net_g = SynthesizerTrn( len(symbols), self.hps.data.filter_length // 2 + 1, self.hps.train.segment_size // self.hps.data.hop_length, n_speakers=self.hps.data.n_speakers, - **self.hps.model) + **self.hps.model + ) self.net_g.eval() load_checkpoint(self.settings.pyTorchModelFile, self.net_g, None) # ONNXモデル生成 - if self.settings.onnxModelFile != None: + if self.settings.onnxModelFile is not None: ort_options = onnxruntime.SessionOptions() ort_options.intra_op_num_threads = 8 self.onnx_session = onnxruntime.InferenceSession( - self.settings.onnxModelFile, - providers=providers + self.settings.onnxModelFile, providers=providers ) return self.get_info() - def update_settings(self, key: str, val: any): - if key == "onnxExecutionProvider" and self.onnx_session != None: + def update_settings(self, key: str, val: int | float | str): + if key == "onnxExecutionProvider" and self.onnx_session is not None: if val == "CUDAExecutionProvider": if self.settings.gpu < 0 or self.settings.gpu >= self.gpu_num: self.settings.gpu = 0 - provider_options = [{'device_id': self.settings.gpu}] - self.onnx_session.set_providers(providers=[val], provider_options=provider_options) + provider_options = [{"device_id": self.settings.gpu}] + self.onnx_session.set_providers( + providers=[val], provider_options=provider_options + ) else: self.onnx_session.set_providers(providers=[val]) elif key in self.settings.intData: - setattr(self.settings, key, int(val)) - if key == "gpu" and val >= 0 and val < self.gpu_num and self.onnx_session != None: + val = int(val) + setattr(self.settings, key, val) + if ( + key == "gpu" + and val >= 0 + and val < self.gpu_num + and self.onnx_session is not None + ): providers = self.onnx_session.get_providers() print("Providers:", providers) if "CUDAExecutionProvider" in providers: - provider_options = [{'device_id': self.settings.gpu}] - self.onnx_session.set_providers(providers=["CUDAExecutionProvider"], provider_options=provider_options) + provider_options = [{"device_id": self.settings.gpu}] + self.onnx_session.set_providers( + providers=["CUDAExecutionProvider"], + provider_options=provider_options, + ) elif key in self.settings.floatData: setattr(self.settings, key, float(val)) elif key in self.settings.strData: @@ -110,10 +136,12 @@ class MMVCv13: def get_info(self): data = asdict(self.settings) - data["onnxExecutionProviders"] = self.onnx_session.get_providers() if self.onnx_session != None else [] + data["onnxExecutionProviders"] = ( + self.onnx_session.get_providers() if self.onnx_session is not None else [] + ) files = ["configFile", "pyTorchModelFile", "onnxModelFile"] for f in files: - if data[f] != None and os.path.exists(data[f]): + if data[f] is not None and os.path.exists(data[f]): data[f] = os.path.basename(data[f]) else: data[f] = "" @@ -121,22 +149,35 @@ class MMVCv13: return data def get_processing_sampling_rate(self): - if hasattr(self, "hps") == False: + if hasattr(self, "hps") is False: raise NoModeLoadedException("config") return self.hps.data.sampling_rate - def _get_spec(self, audio: any): - spec = spectrogram_torch(audio, self.hps.data.filter_length, - self.hps.data.sampling_rate, self.hps.data.hop_length, self.hps.data.win_length, - center=False) + def _get_spec(self, audio: AudioInOut): + spec = spectrogram_torch( + audio, + self.hps.data.filter_length, + self.hps.data.sampling_rate, + self.hps.data.hop_length, + self.hps.data.win_length, + center=False, + ) spec = torch.squeeze(spec, 0) return spec - def generate_input(self, newData: any, inputSize: int, crossfadeSize: int, solaSearchFrame: int = 0): + def generate_input( + self, + newData: AudioInOut, + inputSize: int, + crossfadeSize: int, + solaSearchFrame: int = 0, + ): newData = newData.astype(np.float32) / self.hps.data.max_wav_value - if hasattr(self, "audio_buffer"): - self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0) # 過去のデータに連結 + if self.audio_buffer is not None: + self.audio_buffer = np.concatenate( + [self.audio_buffer, newData], 0 + ) # 過去のデータに連結 else: self.audio_buffer = newData @@ -145,9 +186,12 @@ class MMVCv13: if convertSize < 8192: convertSize = 8192 if convertSize % self.hps.data.hop_length != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。 - convertSize = convertSize + (self.hps.data.hop_length - (convertSize % self.hps.data.hop_length)) + convertSize = convertSize + ( + self.hps.data.hop_length - (convertSize % self.hps.data.hop_length) + ) - self.audio_buffer = self.audio_buffer[-1 * convertSize:] # 変換対象の部分だけ抽出 + convertOffset = -1 * convertSize + self.audio_buffer = self.audio_buffer[convertOffset:] # 変換対象の部分だけ抽出 audio = torch.FloatTensor(self.audio_buffer) audio_norm = audio.unsqueeze(0) # unsqueeze @@ -160,25 +204,29 @@ class MMVCv13: return data def _onnx_inference(self, data): - if hasattr(self, "onnx_session") == False or self.onnx_session == None: + if hasattr(self, "onnx_session") is False or self.onnx_session is None: print("[Voice Changer] No ONNX session.") raise NoModeLoadedException("ONNX") x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x for x in data] sid_tgt1 = torch.LongTensor([self.settings.dstId]) # if spec.size()[2] >= 8: - audio1 = self.onnx_session.run( - ["audio"], - { - "specs": spec.numpy(), - "lengths": spec_lengths.numpy(), - "sid_src": sid_src.numpy(), - "sid_tgt": sid_tgt1.numpy() - })[0][0, 0] * self.hps.data.max_wav_value + audio1 = ( + self.onnx_session.run( + ["audio"], + { + "specs": spec.numpy(), + "lengths": spec_lengths.numpy(), + "sid_src": sid_src.numpy(), + "sid_tgt": sid_tgt1.numpy(), + }, + )[0][0, 0] + * self.hps.data.max_wav_value + ) return audio1 def _pyTorch_inference(self, data): - if hasattr(self, "net_g") == False or self.net_g == None: + if hasattr(self, "net_g") is False or self.net_g is None: print("[Voice Changer] No pyTorch session.") raise NoModeLoadedException("pytorch") @@ -188,11 +236,19 @@ class MMVCv13: dev = torch.device("cuda", index=self.settings.gpu) with torch.no_grad(): - x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x.to(dev) for x in data] + x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [ + x.to(dev) for x in data + ] sid_target = torch.LongTensor([self.settings.dstId]).to(dev) - audio1 = (self.net_g.to(dev).voice_conversion(spec, spec_lengths, sid_src=sid_src, - sid_tgt=sid_target)[0, 0].data * self.hps.data.max_wav_value) + audio1 = ( + self.net_g.to(dev) + .voice_conversion( + spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_target + )[0, 0] + .data + * self.hps.data.max_wav_value + ) result = audio1.float().cpu().numpy() return result @@ -208,7 +264,7 @@ class MMVCv13: del self.net_g del self.onnx_session remove_path = os.path.join("MMVC_Client_v13", "python") - sys.path = [x for x in sys.path if x.endswith(remove_path) == False] + sys.path = [x for x in sys.path if x.endswith(remove_path) is False] for key in list(sys.modules): val = sys.modules.get(key) @@ -217,5 +273,5 @@ class MMVCv13: if file_path.find(remove_path + os.path.sep) >= 0: print("remove", key, file_path) sys.modules.pop(key) - except Exception as e: + except: # type:ignore pass diff --git a/server/voice_changer/MMVCv13/TrainerFunctions.py b/server/voice_changer/MMVCv13/TrainerFunctions.py index 8d32bcf8..61eec4c5 100644 --- a/server/voice_changer/MMVCv13/TrainerFunctions.py +++ b/server/voice_changer/MMVCv13/TrainerFunctions.py @@ -1,36 +1,58 @@ import torch -import os, sys, json +import os +import sys +import json import logging + logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) logger = logging hann_window = {} + + def spectrogram_torch(y, n_fft, sampling_rate, hop_size, win_size, center=False): - if torch.min(y) < -1.: - print('min value is ', torch.min(y)) - if torch.max(y) > 1.: - print('max value is ', torch.max(y)) + if torch.min(y) < -1.0: + print("min value is ", torch.min(y)) + if torch.max(y) > 1.0: + print("max value is ", torch.max(y)) global hann_window - dtype_device = str(y.dtype) + '_' + str(y.device) - wnsize_dtype_device = str(win_size) + '_' + dtype_device + dtype_device = str(y.dtype) + "_" + str(y.device) + wnsize_dtype_device = str(win_size) + "_" + dtype_device if wnsize_dtype_device not in hann_window: - hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to(dtype=y.dtype, device=y.device) + hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to( + dtype=y.dtype, device=y.device + ) - y = torch.nn.functional.pad(y.unsqueeze(1), (int((n_fft-hop_size)/2), int((n_fft-hop_size)/2)), mode='reflect') + y = torch.nn.functional.pad( + y.unsqueeze(1), + (int((n_fft - hop_size) / 2), int((n_fft - hop_size) / 2)), + mode="reflect", + ) y = y.squeeze(1) - spec = torch.stft(y, n_fft, hop_length=hop_size, win_length=win_size, window=hann_window[wnsize_dtype_device], - center=center, pad_mode='reflect', normalized=False, onesided=True, return_complex=True) + spec = torch.stft( + y, + n_fft, + hop_length=hop_size, + win_length=win_size, + window=hann_window[wnsize_dtype_device], + center=center, + pad_mode="reflect", + normalized=False, + onesided=True, + return_complex=True, + ) spec = torch.view_as_real(spec) spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6) return spec -class TextAudioSpeakerCollate(): - """ Zero-pads model inputs and targets - """ - def __init__(self, return_ids=False, no_text = False): + +class TextAudioSpeakerCollate: + """Zero-pads model inputs and targets""" + + def __init__(self, return_ids=False, no_text=False): self.return_ids = return_ids self.no_text = no_text @@ -42,8 +64,8 @@ class TextAudioSpeakerCollate(): """ # Right zero-pad all one-hot text sequences to max input length _, ids_sorted_decreasing = torch.sort( - torch.LongTensor([x[1].size(1) for x in batch]), - dim=0, descending=True) + torch.LongTensor([x[1].size(1) for x in batch]), dim=0, descending=True + ) max_text_len = max([len(x[0]) for x in batch]) max_spec_len = max([x[1].size(1) for x in batch]) @@ -64,88 +86,108 @@ class TextAudioSpeakerCollate(): row = batch[ids_sorted_decreasing[i]] text = row[0] - text_padded[i, :text.size(0)] = text + text_padded[i, : text.size(0)] = text text_lengths[i] = text.size(0) spec = row[1] - spec_padded[i, :, :spec.size(1)] = spec + spec_padded[i, :, : spec.size(1)] = spec spec_lengths[i] = spec.size(1) wav = row[2] - wav_padded[i, :, :wav.size(1)] = wav + wav_padded[i, :, : wav.size(1)] = wav wav_lengths[i] = wav.size(1) sid[i] = row[3] if self.return_ids: - return text_padded, text_lengths, spec_padded, spec_lengths, wav_padded, wav_lengths, sid, ids_sorted_decreasing - return text_padded, text_lengths, spec_padded, spec_lengths, wav_padded, wav_lengths, sid + return ( + text_padded, + text_lengths, + spec_padded, + spec_lengths, + wav_padded, + wav_lengths, + sid, + ids_sorted_decreasing, + ) + return ( + text_padded, + text_lengths, + spec_padded, + spec_lengths, + wav_padded, + wav_lengths, + sid, + ) def load_checkpoint(checkpoint_path, model, optimizer=None): - assert os.path.isfile(checkpoint_path), f"No such file or directory: {checkpoint_path}" - checkpoint_dict = torch.load(checkpoint_path, map_location='cpu') - iteration = checkpoint_dict['iteration'] - learning_rate = checkpoint_dict['learning_rate'] - if optimizer is not None: - optimizer.load_state_dict(checkpoint_dict['optimizer']) - saved_state_dict = checkpoint_dict['model'] - if hasattr(model, 'module'): - state_dict = model.module.state_dict() - else: - state_dict = model.state_dict() - new_state_dict= {} - for k, v in state_dict.items(): - try: - new_state_dict[k] = saved_state_dict[k] - except: - logger.info("%s is not in the checkpoint" % k) - new_state_dict[k] = v - if hasattr(model, 'module'): - model.module.load_state_dict(new_state_dict) - else: - model.load_state_dict(new_state_dict) - logger.info("Loaded checkpoint '{}' (iteration {})" .format( - checkpoint_path, iteration)) - return model, optimizer, learning_rate, iteration + assert os.path.isfile( + checkpoint_path + ), f"No such file or directory: {checkpoint_path}" + checkpoint_dict = torch.load(checkpoint_path, map_location="cpu") + iteration = checkpoint_dict["iteration"] + learning_rate = checkpoint_dict["learning_rate"] + if optimizer is not None: + optimizer.load_state_dict(checkpoint_dict["optimizer"]) + saved_state_dict = checkpoint_dict["model"] + if hasattr(model, "module"): + state_dict = model.module.state_dict() + else: + state_dict = model.state_dict() + new_state_dict = {} + for k, v in state_dict.items(): + try: + new_state_dict[k] = saved_state_dict[k] + except: + logger.info("%s is not in the checkpoint" % k) + new_state_dict[k] = v + if hasattr(model, "module"): + model.module.load_state_dict(new_state_dict) + else: + model.load_state_dict(new_state_dict) + logger.info( + "Loaded checkpoint '{}' (iteration {})".format(checkpoint_path, iteration) + ) + return model, optimizer, learning_rate, iteration def get_hparams_from_file(config_path): - with open(config_path, "r") as f: - data = f.read() - config = json.loads(data) + with open(config_path, "r") as f: + data = f.read() + config = json.loads(data) - hparams =HParams(**config) - return hparams + hparams = HParams(**config) + return hparams -class HParams(): - def __init__(self, **kwargs): - for k, v in kwargs.items(): - if type(v) == dict: - v = HParams(**v) - self[k] = v - - def keys(self): - return self.__dict__.keys() - def items(self): - return self.__dict__.items() +class HParams: + def __init__(self, **kwargs): + for k, v in kwargs.items(): + if type(v) == dict: + v = HParams(**v) + self[k] = v - def values(self): - return self.__dict__.values() + def keys(self): + return self.__dict__.keys() - def __len__(self): - return len(self.__dict__) + def items(self): + return self.__dict__.items() - def __getitem__(self, key): - return getattr(self, key) + def values(self): + return self.__dict__.values() - def __setitem__(self, key, value): - return setattr(self, key, value) + def __len__(self): + return len(self.__dict__) - def __contains__(self, key): - return key in self.__dict__ + def __getitem__(self, key): + return getattr(self, key) - def __repr__(self): - return self.__dict__.__repr__() + def __setitem__(self, key, value): + return setattr(self, key, value) + def __contains__(self, key): + return key in self.__dict__ + + def __repr__(self): + return self.__dict__.__repr__() diff --git a/server/voice_changer/VoiceChanger.py b/server/voice_changer/VoiceChanger.py index f38aa4e0..205615af 100755 --- a/server/voice_changer/VoiceChanger.py +++ b/server/voice_changer/VoiceChanger.py @@ -124,6 +124,7 @@ class VoiceChanger: try: return self.voiceChanger.loadModel(props) except Exception as e: + print(traceback.format_exc()) print("[Voice Changer] Model Load Error! Check your model is valid.", e) return {"status": "NG"}