diff --git a/.gitignore b/.gitignore index 10520938..3be54042 100644 --- a/.gitignore +++ b/.gitignore @@ -13,8 +13,15 @@ server/out.wav server/G_*.pth server/train_config.json +# v.1.3.xテスト用モデルフォルダ server/v13 +server/hubert +server/so-vits-svc +# sovitsテスト用モデルフォルダ +server/sovits +server/test + server/memo.md client/lib/dist diff --git a/server/MMVCServerSIO.py b/server/MMVCServerSIO.py index 15aadf7e..e389cf51 100755 --- a/server/MMVCServerSIO.py +++ b/server/MMVCServerSIO.py @@ -1,5 +1,4 @@ import sys -sys.path.append("MMVC_Client/python") from distutils.util import strtobool from datetime import datetime diff --git a/server/voice_changer/VoiceChanger.py b/server/voice_changer/VoiceChanger.py index 456f2f49..ca2492a6 100755 --- a/server/voice_changer/VoiceChanger.py +++ b/server/voice_changer/VoiceChanger.py @@ -1,3 +1,6 @@ +import sys +sys.path.append("MMVC_Client/python") + from const import ERROR_NO_ONNX_SESSION, TMP_DIR import torch import os @@ -14,6 +17,8 @@ from models import SynthesizerTrn import pyworld as pw from voice_changer.client_modules import convert_continuos_f0, spectrogram_torch, TextAudioSpeakerCollate, get_hparams_from_file, load_checkpoint +from voice_changer.MMVCv15 import MMVCv15 + import time providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"] @@ -121,6 +126,8 @@ class VoiceChanger(): self.currentCrossFadeEndRate = 0 self.currentCrossFadeOverlapSize = 0 + self.voiceChanger = MMVCv15() + self.gpu_num = torch.cuda.device_count() self.text_norm = torch.LongTensor([0, 6, 0]) self.audio_buffer = torch.zeros(1, 0) @@ -285,14 +292,14 @@ class VoiceChanger(): return self.get_info() - def _generate_strength(self, unpackedData): + def _generate_strength(self, dataLength): - if self.unpackedData_length != unpackedData.shape[0] or \ + if self.unpackedData_length != dataLength or \ self.currentCrossFadeOffsetRate != self.settings.crossFadeOffsetRate or \ self.currentCrossFadeEndRate != self.settings.crossFadeEndRate or \ self.currentCrossFadeOverlapSize != self.settings.crossFadeOverlapSize: - self.unpackedData_length = unpackedData.shape[0] + self.unpackedData_length = dataLength self.currentCrossFadeOffsetRate = self.settings.crossFadeOffsetRate self.currentCrossFadeEndRate = self.settings.crossFadeEndRate self.currentCrossFadeOverlapSize = self.settings.crossFadeOverlapSize @@ -502,7 +509,7 @@ class VoiceChanger(): convertSize = 8192 if convertSize % 128 != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。 convertSize = convertSize + (128 - (convertSize % 128)) - self._generate_strength(unpackedData) + self._generate_strength(unpackedData.shape[0]) data = self._generate_input(unpackedData, convertSize) preprocess_time = t.secs @@ -510,8 +517,10 @@ class VoiceChanger(): try: if self.settings.framework == "ONNX": result = self._onnx_inference(data, unpackedData.shape[0]) + # result = self.voiceChanger._onnx_inference(data, unpackedData.shape[0]) else: result = self._pyTorch_inference(data, unpackedData.shape[0]) + # result = self.voiceChanger._pyTorch_inference(data, unpackedData.shape[0]) except Exception as e: print("VC PROCESSING!!!! EXCEPTION!!!", e)