improve error handling 1

This commit is contained in:
wataru 2023-04-17 09:45:12 +09:00
parent 390a39fa64
commit e4ac5e74db
9 changed files with 90 additions and 57 deletions

View File

@ -67,6 +67,7 @@ Windows 版と Mac 版を提供しています。
- so-vits-svc 4.0/so-vits-svc 4.0v2、RVC(Retrieval-based-Voice-Conversion)の動作には hubert のモデルが必要になります。[このリポジトリ](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main)から`hubert_base.pt`をダウンロードして、バッチファイルがあるフォルダに格納してください。
- DDSP-SVC の動作には、hubert-soft と enhancer のモデルが必要です。hubert-soft は[このリンク](https://github.com/bshall/hubert/releases/download/v0.1/hubert-soft-0d54a1f4.pt)からダウンロードして、バッチファイルがあるフォルダに格納してください。enhancer は[このサイト](https://github.com/openvpi/vocoders/releases/tag/nsf-hifigan-v1)から`nsf_hifigan_20221211.zip`ダウンロードして下さい。解凍すると出てくる`nsf_hifigan`というフォルダをバッチファイルがあるフォルダに格納してください。
- DDPS-SVC の encoder は hubert-soft のみ対応です。
| Version | OS | フレームワーク | link | サポート VC | サイズ |
| --------- | --- | --------------------------------- | ---------------------------------------------------------------------------------------- | ------------------------------------------------------------------- | ------ |

7
server/Exceptions.py Normal file
View File

@ -0,0 +1,7 @@
class NoModeLoadedException(Exception):
def __init__(self, framework):
self.framework = framework
def __str__(self):
return repr(f"No model for {self.framework} loaded. Please confirm the model uploaded.")

View File

@ -20,6 +20,9 @@ import pyworld as pw
import ddsp.vocoder as vo
from ddsp.core import upsample
from enhancer import Enhancer
from Exceptions import NoModeLoadedException
providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
@ -224,7 +227,7 @@ class DDSP_SVC:
def _onnx_inference(self, data):
if hasattr(self, "onnx_session") == False or self.onnx_session == None:
print("[Voice Changer] No onnx session.")
return np.zeros(1).astype(np.int16)
raise NoModeLoadedException("ONNX")
seg_units = data[0]
# f0 = data[1]
@ -258,7 +261,7 @@ class DDSP_SVC:
if hasattr(self, "model") == False or self.model == None:
print("[Voice Changer] No pyTorch session.")
return np.zeros(1).astype(np.int16)
raise NoModeLoadedException("pytorch")
c = data[0].to(self.useDevice())
f0 = data[1].to(self.useDevice())

View File

@ -22,6 +22,8 @@ from symbols import symbols
from models import SynthesizerTrn
from voice_changer.MMVCv13.TrainerFunctions import TextAudioSpeakerCollate, spectrogram_torch, load_checkpoint, get_hparams_from_file
from Exceptions import NoModeLoadedException
providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
@ -119,6 +121,8 @@ class MMVCv13:
return data
def get_processing_sampling_rate(self):
if hasattr(self, "hps") == False:
raise NoModeLoadedException("config")
return self.hps.data.sampling_rate
def _get_spec(self, audio: any):
@ -158,7 +162,7 @@ class MMVCv13:
def _onnx_inference(self, data):
if hasattr(self, "onnx_session") == False or self.onnx_session == None:
print("[Voice Changer] No ONNX session.")
return np.zeros(1).astype(np.int16)
raise NoModeLoadedException("ONNX")
x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x for x in data]
sid_tgt1 = torch.LongTensor([self.settings.dstId])
@ -176,7 +180,7 @@ class MMVCv13:
def _pyTorch_inference(self, data):
if hasattr(self, "net_g") == False or self.net_g == None:
print("[Voice Changer] No pyTorch session.")
return np.zeros(1).astype(np.int16)
raise NoModeLoadedException("pytorch")
if self.settings.gpu < 0 or self.gpu_num == 0:
dev = torch.device("cpu")

View File

@ -20,6 +20,8 @@ import pyworld as pw
from models import SynthesizerTrn
from voice_changer.MMVCv15.client_modules import convert_continuos_f0, spectrogram_torch, get_hparams_from_file, load_checkpoint
from Exceptions import NoModeLoadedException
providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
@ -138,6 +140,8 @@ class MMVCv15:
return data
def get_processing_sampling_rate(self):
if hasattr(self, "hps") == False:
raise NoModeLoadedException("config")
return self.hps.data.sampling_rate
def _get_f0(self, detector: str, newData: any):
@ -191,7 +195,7 @@ class MMVCv15:
def _onnx_inference(self, data):
if self.settings.onnxModelFile == "":
print("[Voice Changer] No ONNX session.")
return np.zeros(1).astype(np.int16)
raise NoModeLoadedException("ONNX")
spec, f0, sid_src = data
spec = spec.unsqueeze(0)
@ -217,7 +221,7 @@ class MMVCv15:
def _pyTorch_inference(self, data):
if self.settings.pyTorchModelFile == "":
print("[Voice Changer] No pyTorch session.")
return np.zeros(1).astype(np.int16)
raise NoModeLoadedException("pytorch")
if self.settings.gpu < 0 or self.gpu_num == 0:
dev = torch.device("cpu")

View File

@ -2,7 +2,7 @@ import sys
import os
import resampy
from voice_changer.RVC.ModelWrapper import ModelWrapper
from Exceptions import NoModeLoadedException
# avoiding parse arg error in RVC
sys.argv = ["MMVCServerSIO.py"]
@ -198,7 +198,7 @@ class RVC:
def _onnx_inference(self, data):
if hasattr(self, "onnx_session") == False or self.onnx_session == None:
print("[Voice Changer] No onnx session.")
return np.zeros(1).astype(np.int16)
raise NoModeLoadedException("ONNX")
if self.settings.gpu < 0 or self.gpu_num == 0:
dev = torch.device("cpu")
@ -239,7 +239,7 @@ class RVC:
def _pyTorch_inference(self, data):
if hasattr(self, "net_g") == False or self.net_g == None:
print("[Voice Changer] No pyTorch session.")
return np.zeros(1).astype(np.int16)
raise NoModeLoadedException("pytorch")
if self.settings.gpu < 0 or self.gpu_num == 0:
dev = torch.device("cpu")

View File

@ -26,6 +26,10 @@ import cluster
import utils
from fairseq import checkpoint_utils
import librosa
from Exceptions import NoModeLoadedException
providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
@ -185,6 +189,8 @@ class SoVitsSvc40:
return data
def get_processing_sampling_rate(self):
if hasattr(self, "hps") == False:
raise NoModeLoadedException("config")
return self.hps.data.sampling_rate
def get_unit_f0(self, audio_buffer, tran):
@ -278,7 +284,7 @@ class SoVitsSvc40:
def _onnx_inference(self, data):
if hasattr(self, "onnx_session") == False or self.onnx_session == None:
print("[Voice Changer] No onnx session.")
return np.zeros(1).astype(np.int16)
raise NoModeLoadedException("ONNX")
convertSize = data[3]
vol = data[4]
@ -309,7 +315,7 @@ class SoVitsSvc40:
def _pyTorch_inference(self, data):
if hasattr(self, "net_g") == False or self.net_g == None:
print("[Voice Changer] No pyTorch session.")
return np.zeros(1).astype(np.int16)
raise NoModeLoadedException("pytorch")
if self.settings.gpu < 0 or self.gpu_num == 0:
dev = torch.device("cpu")

View File

@ -23,6 +23,9 @@ import cluster
import utils
from fairseq import checkpoint_utils
import librosa
from Exceptions import NoModeLoadedException
providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
@ -161,6 +164,8 @@ class SoVitsSvc40v2:
return data
def get_processing_sampling_rate(self):
if hasattr(self, "hps") == False:
raise NoModeLoadedException("config")
return self.hps.data.sampling_rate
def get_unit_f0(self, audio_buffer, tran):
@ -240,7 +245,7 @@ class SoVitsSvc40v2:
def _onnx_inference(self, data):
if hasattr(self, "onnx_session") == False or self.onnx_session == None:
print("[Voice Changer] No onnx session.")
return np.zeros(1).astype(np.int16)
raise NoModeLoadedException("ONNX")
convertSize = data[3]
vol = data[4]
@ -272,7 +277,7 @@ class SoVitsSvc40v2:
def _pyTorch_inference(self, data):
if hasattr(self, "net_g") == False or self.net_g == None:
print("[Voice Changer] No pyTorch session.")
return np.zeros(1).astype(np.int16)
raise NoModeLoadedException("pytorch")
if self.settings.gpu < 0 or self.gpu_num == 0:
dev = torch.device("cpu")

View File

@ -14,7 +14,7 @@ from voice_changer.IORecorder import IORecorder
from voice_changer.utils.Timer import Timer
from voice_changer.utils.VoiceChangerModel import VoiceChangerModel, AudioInOut
import time
from Exceptions import NoModeLoadedException
providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
@ -211,27 +211,27 @@ class VoiceChanger():
return self.on_request_sola(receivedData)
def on_request_sola(self, receivedData: AudioInOut) -> tuple[AudioInOut, list[Union[int, float]]]:
processing_sampling_rate = self.voiceChanger.get_processing_sampling_rate()
try:
processing_sampling_rate = self.voiceChanger.get_processing_sampling_rate()
# 前処理
with Timer("pre-process") as t:
if self.settings.inputSampleRate != processing_sampling_rate:
newData = cast(AudioInOut, resampy.resample(receivedData, self.settings.inputSampleRate, processing_sampling_rate))
else:
newData = receivedData
# 前処理
with Timer("pre-process") as t:
if self.settings.inputSampleRate != processing_sampling_rate:
newData = cast(AudioInOut, resampy.resample(receivedData, self.settings.inputSampleRate, processing_sampling_rate))
else:
newData = receivedData
sola_search_frame = int(0.012 * processing_sampling_rate)
# sola_search_frame = 0
block_frame = newData.shape[0]
crossfade_frame = min(self.settings.crossFadeOverlapSize, block_frame)
self._generate_strength(crossfade_frame)
sola_search_frame = int(0.012 * processing_sampling_rate)
# sola_search_frame = 0
block_frame = newData.shape[0]
crossfade_frame = min(self.settings.crossFadeOverlapSize, block_frame)
self._generate_strength(crossfade_frame)
data = self.voiceChanger.generate_input(newData, block_frame, crossfade_frame, sola_search_frame)
preprocess_time = t.secs
data = self.voiceChanger.generate_input(newData, block_frame, crossfade_frame, sola_search_frame)
preprocess_time = t.secs
# 変換処理
with Timer("main-process") as t:
try:
# 変換処理
with Timer("main-process") as t:
# Inference
audio = self.voiceChanger.inference(data)
@ -258,38 +258,41 @@ class VoiceChanger():
else:
self.sola_buffer = audio[- crossfade_frame:] * self.np_prev_strength
# self.sola_buffer = audio[- crossfade_frame:]
mainprocess_time = t.secs
except Exception as e:
print("VC PROCESSING!!!! EXCEPTION!!!", e)
print(traceback.format_exc())
return np.zeros(1).astype(np.int16), [0, 0, 0]
mainprocess_time = t.secs
# 後処理
with Timer("post-process") as t:
result = result.astype(np.int16)
if self.settings.inputSampleRate != processing_sampling_rate:
outputData = cast(AudioInOut, resampy.resample(result, processing_sampling_rate, self.settings.inputSampleRate).astype(np.int16))
else:
outputData = result
# 後処理
with Timer("post-process") as t:
result = result.astype(np.int16)
if self.settings.inputSampleRate != processing_sampling_rate:
outputData = cast(AudioInOut, resampy.resample(result, processing_sampling_rate, self.settings.inputSampleRate).astype(np.int16))
else:
outputData = result
print_convert_processing(
f" Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz")
print_convert_processing(
f" Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz")
if self.settings.recordIO == 1:
self.ioRecorder.writeInput(receivedData)
self.ioRecorder.writeOutput(outputData.tobytes())
if self.settings.recordIO == 1:
self.ioRecorder.writeInput(receivedData)
self.ioRecorder.writeOutput(outputData.tobytes())
# if receivedData.shape[0] != outputData.shape[0]:
# print(f"Padding, in:{receivedData.shape[0]} out:{outputData.shape[0]}")
# outputData = pad_array(outputData, receivedData.shape[0])
# # print_convert_processing(
# # f" Padded!, Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz")
postprocess_time = t.secs
# if receivedData.shape[0] != outputData.shape[0]:
# print(f"Padding, in:{receivedData.shape[0]} out:{outputData.shape[0]}")
# outputData = pad_array(outputData, receivedData.shape[0])
# # print_convert_processing(
# # f" Padded!, Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz")
postprocess_time = t.secs
print_convert_processing(f" [fin] Input/Output size:{receivedData.shape[0]},{outputData.shape[0]}")
perf = [preprocess_time, mainprocess_time, postprocess_time]
return outputData, perf
print_convert_processing(f" [fin] Input/Output size:{receivedData.shape[0]},{outputData.shape[0]}")
perf = [preprocess_time, mainprocess_time, postprocess_time]
return outputData, perf
except NoModeLoadedException as e:
print("[Voice Changer] [Exception]", e)
return np.zeros(1).astype(np.int16), [0, 0, 0]
except Exception as e:
print("VC PROCESSING!!!! EXCEPTION!!!", e)
print(traceback.format_exc())
return np.zeros(1).astype(np.int16), [0, 0, 0]
def export2onnx(self):
return self.voiceChanger.export2onnx()