improve error handling 1

This commit is contained in:
wataru 2023-04-17 09:45:12 +09:00
parent 390a39fa64
commit e4ac5e74db
9 changed files with 90 additions and 57 deletions

View File

@ -67,6 +67,7 @@ Windows 版と Mac 版を提供しています。
- so-vits-svc 4.0/so-vits-svc 4.0v2、RVC(Retrieval-based-Voice-Conversion)の動作には hubert のモデルが必要になります。[このリポジトリ](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main)から`hubert_base.pt`をダウンロードして、バッチファイルがあるフォルダに格納してください。 - so-vits-svc 4.0/so-vits-svc 4.0v2、RVC(Retrieval-based-Voice-Conversion)の動作には hubert のモデルが必要になります。[このリポジトリ](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main)から`hubert_base.pt`をダウンロードして、バッチファイルがあるフォルダに格納してください。
- DDSP-SVC の動作には、hubert-soft と enhancer のモデルが必要です。hubert-soft は[このリンク](https://github.com/bshall/hubert/releases/download/v0.1/hubert-soft-0d54a1f4.pt)からダウンロードして、バッチファイルがあるフォルダに格納してください。enhancer は[このサイト](https://github.com/openvpi/vocoders/releases/tag/nsf-hifigan-v1)から`nsf_hifigan_20221211.zip`ダウンロードして下さい。解凍すると出てくる`nsf_hifigan`というフォルダをバッチファイルがあるフォルダに格納してください。 - DDSP-SVC の動作には、hubert-soft と enhancer のモデルが必要です。hubert-soft は[このリンク](https://github.com/bshall/hubert/releases/download/v0.1/hubert-soft-0d54a1f4.pt)からダウンロードして、バッチファイルがあるフォルダに格納してください。enhancer は[このサイト](https://github.com/openvpi/vocoders/releases/tag/nsf-hifigan-v1)から`nsf_hifigan_20221211.zip`ダウンロードして下さい。解凍すると出てくる`nsf_hifigan`というフォルダをバッチファイルがあるフォルダに格納してください。
- DDPS-SVC の encoder は hubert-soft のみ対応です。
| Version | OS | フレームワーク | link | サポート VC | サイズ | | Version | OS | フレームワーク | link | サポート VC | サイズ |
| --------- | --- | --------------------------------- | ---------------------------------------------------------------------------------------- | ------------------------------------------------------------------- | ------ | | --------- | --- | --------------------------------- | ---------------------------------------------------------------------------------------- | ------------------------------------------------------------------- | ------ |

7
server/Exceptions.py Normal file
View File

@ -0,0 +1,7 @@
class NoModeLoadedException(Exception):
def __init__(self, framework):
self.framework = framework
def __str__(self):
return repr(f"No model for {self.framework} loaded. Please confirm the model uploaded.")

View File

@ -20,6 +20,9 @@ import pyworld as pw
import ddsp.vocoder as vo import ddsp.vocoder as vo
from ddsp.core import upsample from ddsp.core import upsample
from enhancer import Enhancer from enhancer import Enhancer
from Exceptions import NoModeLoadedException
providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"] providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
@ -224,7 +227,7 @@ class DDSP_SVC:
def _onnx_inference(self, data): def _onnx_inference(self, data):
if hasattr(self, "onnx_session") == False or self.onnx_session == None: if hasattr(self, "onnx_session") == False or self.onnx_session == None:
print("[Voice Changer] No onnx session.") print("[Voice Changer] No onnx session.")
return np.zeros(1).astype(np.int16) raise NoModeLoadedException("ONNX")
seg_units = data[0] seg_units = data[0]
# f0 = data[1] # f0 = data[1]
@ -258,7 +261,7 @@ class DDSP_SVC:
if hasattr(self, "model") == False or self.model == None: if hasattr(self, "model") == False or self.model == None:
print("[Voice Changer] No pyTorch session.") print("[Voice Changer] No pyTorch session.")
return np.zeros(1).astype(np.int16) raise NoModeLoadedException("pytorch")
c = data[0].to(self.useDevice()) c = data[0].to(self.useDevice())
f0 = data[1].to(self.useDevice()) f0 = data[1].to(self.useDevice())

View File

@ -22,6 +22,8 @@ from symbols import symbols
from models import SynthesizerTrn from models import SynthesizerTrn
from voice_changer.MMVCv13.TrainerFunctions import TextAudioSpeakerCollate, spectrogram_torch, load_checkpoint, get_hparams_from_file from voice_changer.MMVCv13.TrainerFunctions import TextAudioSpeakerCollate, spectrogram_torch, load_checkpoint, get_hparams_from_file
from Exceptions import NoModeLoadedException
providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"] providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
@ -119,6 +121,8 @@ class MMVCv13:
return data return data
def get_processing_sampling_rate(self): def get_processing_sampling_rate(self):
if hasattr(self, "hps") == False:
raise NoModeLoadedException("config")
return self.hps.data.sampling_rate return self.hps.data.sampling_rate
def _get_spec(self, audio: any): def _get_spec(self, audio: any):
@ -158,7 +162,7 @@ class MMVCv13:
def _onnx_inference(self, data): def _onnx_inference(self, data):
if hasattr(self, "onnx_session") == False or self.onnx_session == None: if hasattr(self, "onnx_session") == False or self.onnx_session == None:
print("[Voice Changer] No ONNX session.") print("[Voice Changer] No ONNX session.")
return np.zeros(1).astype(np.int16) raise NoModeLoadedException("ONNX")
x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x for x in data] x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x for x in data]
sid_tgt1 = torch.LongTensor([self.settings.dstId]) sid_tgt1 = torch.LongTensor([self.settings.dstId])
@ -176,7 +180,7 @@ class MMVCv13:
def _pyTorch_inference(self, data): def _pyTorch_inference(self, data):
if hasattr(self, "net_g") == False or self.net_g == None: if hasattr(self, "net_g") == False or self.net_g == None:
print("[Voice Changer] No pyTorch session.") print("[Voice Changer] No pyTorch session.")
return np.zeros(1).astype(np.int16) raise NoModeLoadedException("pytorch")
if self.settings.gpu < 0 or self.gpu_num == 0: if self.settings.gpu < 0 or self.gpu_num == 0:
dev = torch.device("cpu") dev = torch.device("cpu")

View File

@ -20,6 +20,8 @@ import pyworld as pw
from models import SynthesizerTrn from models import SynthesizerTrn
from voice_changer.MMVCv15.client_modules import convert_continuos_f0, spectrogram_torch, get_hparams_from_file, load_checkpoint from voice_changer.MMVCv15.client_modules import convert_continuos_f0, spectrogram_torch, get_hparams_from_file, load_checkpoint
from Exceptions import NoModeLoadedException
providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"] providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
@ -138,6 +140,8 @@ class MMVCv15:
return data return data
def get_processing_sampling_rate(self): def get_processing_sampling_rate(self):
if hasattr(self, "hps") == False:
raise NoModeLoadedException("config")
return self.hps.data.sampling_rate return self.hps.data.sampling_rate
def _get_f0(self, detector: str, newData: any): def _get_f0(self, detector: str, newData: any):
@ -191,7 +195,7 @@ class MMVCv15:
def _onnx_inference(self, data): def _onnx_inference(self, data):
if self.settings.onnxModelFile == "": if self.settings.onnxModelFile == "":
print("[Voice Changer] No ONNX session.") print("[Voice Changer] No ONNX session.")
return np.zeros(1).astype(np.int16) raise NoModeLoadedException("ONNX")
spec, f0, sid_src = data spec, f0, sid_src = data
spec = spec.unsqueeze(0) spec = spec.unsqueeze(0)
@ -217,7 +221,7 @@ class MMVCv15:
def _pyTorch_inference(self, data): def _pyTorch_inference(self, data):
if self.settings.pyTorchModelFile == "": if self.settings.pyTorchModelFile == "":
print("[Voice Changer] No pyTorch session.") print("[Voice Changer] No pyTorch session.")
return np.zeros(1).astype(np.int16) raise NoModeLoadedException("pytorch")
if self.settings.gpu < 0 or self.gpu_num == 0: if self.settings.gpu < 0 or self.gpu_num == 0:
dev = torch.device("cpu") dev = torch.device("cpu")

View File

@ -2,7 +2,7 @@ import sys
import os import os
import resampy import resampy
from voice_changer.RVC.ModelWrapper import ModelWrapper from voice_changer.RVC.ModelWrapper import ModelWrapper
from Exceptions import NoModeLoadedException
# avoiding parse arg error in RVC # avoiding parse arg error in RVC
sys.argv = ["MMVCServerSIO.py"] sys.argv = ["MMVCServerSIO.py"]
@ -198,7 +198,7 @@ class RVC:
def _onnx_inference(self, data): def _onnx_inference(self, data):
if hasattr(self, "onnx_session") == False or self.onnx_session == None: if hasattr(self, "onnx_session") == False or self.onnx_session == None:
print("[Voice Changer] No onnx session.") print("[Voice Changer] No onnx session.")
return np.zeros(1).astype(np.int16) raise NoModeLoadedException("ONNX")
if self.settings.gpu < 0 or self.gpu_num == 0: if self.settings.gpu < 0 or self.gpu_num == 0:
dev = torch.device("cpu") dev = torch.device("cpu")
@ -239,7 +239,7 @@ class RVC:
def _pyTorch_inference(self, data): def _pyTorch_inference(self, data):
if hasattr(self, "net_g") == False or self.net_g == None: if hasattr(self, "net_g") == False or self.net_g == None:
print("[Voice Changer] No pyTorch session.") print("[Voice Changer] No pyTorch session.")
return np.zeros(1).astype(np.int16) raise NoModeLoadedException("pytorch")
if self.settings.gpu < 0 or self.gpu_num == 0: if self.settings.gpu < 0 or self.gpu_num == 0:
dev = torch.device("cpu") dev = torch.device("cpu")

View File

@ -26,6 +26,10 @@ import cluster
import utils import utils
from fairseq import checkpoint_utils from fairseq import checkpoint_utils
import librosa import librosa
from Exceptions import NoModeLoadedException
providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"] providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
@ -185,6 +189,8 @@ class SoVitsSvc40:
return data return data
def get_processing_sampling_rate(self): def get_processing_sampling_rate(self):
if hasattr(self, "hps") == False:
raise NoModeLoadedException("config")
return self.hps.data.sampling_rate return self.hps.data.sampling_rate
def get_unit_f0(self, audio_buffer, tran): def get_unit_f0(self, audio_buffer, tran):
@ -278,7 +284,7 @@ class SoVitsSvc40:
def _onnx_inference(self, data): def _onnx_inference(self, data):
if hasattr(self, "onnx_session") == False or self.onnx_session == None: if hasattr(self, "onnx_session") == False or self.onnx_session == None:
print("[Voice Changer] No onnx session.") print("[Voice Changer] No onnx session.")
return np.zeros(1).astype(np.int16) raise NoModeLoadedException("ONNX")
convertSize = data[3] convertSize = data[3]
vol = data[4] vol = data[4]
@ -309,7 +315,7 @@ class SoVitsSvc40:
def _pyTorch_inference(self, data): def _pyTorch_inference(self, data):
if hasattr(self, "net_g") == False or self.net_g == None: if hasattr(self, "net_g") == False or self.net_g == None:
print("[Voice Changer] No pyTorch session.") print("[Voice Changer] No pyTorch session.")
return np.zeros(1).astype(np.int16) raise NoModeLoadedException("pytorch")
if self.settings.gpu < 0 or self.gpu_num == 0: if self.settings.gpu < 0 or self.gpu_num == 0:
dev = torch.device("cpu") dev = torch.device("cpu")

View File

@ -23,6 +23,9 @@ import cluster
import utils import utils
from fairseq import checkpoint_utils from fairseq import checkpoint_utils
import librosa import librosa
from Exceptions import NoModeLoadedException
providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"] providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
@ -161,6 +164,8 @@ class SoVitsSvc40v2:
return data return data
def get_processing_sampling_rate(self): def get_processing_sampling_rate(self):
if hasattr(self, "hps") == False:
raise NoModeLoadedException("config")
return self.hps.data.sampling_rate return self.hps.data.sampling_rate
def get_unit_f0(self, audio_buffer, tran): def get_unit_f0(self, audio_buffer, tran):
@ -240,7 +245,7 @@ class SoVitsSvc40v2:
def _onnx_inference(self, data): def _onnx_inference(self, data):
if hasattr(self, "onnx_session") == False or self.onnx_session == None: if hasattr(self, "onnx_session") == False or self.onnx_session == None:
print("[Voice Changer] No onnx session.") print("[Voice Changer] No onnx session.")
return np.zeros(1).astype(np.int16) raise NoModeLoadedException("ONNX")
convertSize = data[3] convertSize = data[3]
vol = data[4] vol = data[4]
@ -272,7 +277,7 @@ class SoVitsSvc40v2:
def _pyTorch_inference(self, data): def _pyTorch_inference(self, data):
if hasattr(self, "net_g") == False or self.net_g == None: if hasattr(self, "net_g") == False or self.net_g == None:
print("[Voice Changer] No pyTorch session.") print("[Voice Changer] No pyTorch session.")
return np.zeros(1).astype(np.int16) raise NoModeLoadedException("pytorch")
if self.settings.gpu < 0 or self.gpu_num == 0: if self.settings.gpu < 0 or self.gpu_num == 0:
dev = torch.device("cpu") dev = torch.device("cpu")

View File

@ -14,7 +14,7 @@ from voice_changer.IORecorder import IORecorder
from voice_changer.utils.Timer import Timer from voice_changer.utils.Timer import Timer
from voice_changer.utils.VoiceChangerModel import VoiceChangerModel, AudioInOut from voice_changer.utils.VoiceChangerModel import VoiceChangerModel, AudioInOut
import time import time
from Exceptions import NoModeLoadedException
providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"] providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
@ -211,27 +211,27 @@ class VoiceChanger():
return self.on_request_sola(receivedData) return self.on_request_sola(receivedData)
def on_request_sola(self, receivedData: AudioInOut) -> tuple[AudioInOut, list[Union[int, float]]]: def on_request_sola(self, receivedData: AudioInOut) -> tuple[AudioInOut, list[Union[int, float]]]:
processing_sampling_rate = self.voiceChanger.get_processing_sampling_rate() try:
processing_sampling_rate = self.voiceChanger.get_processing_sampling_rate()
# 前処理 # 前処理
with Timer("pre-process") as t: with Timer("pre-process") as t:
if self.settings.inputSampleRate != processing_sampling_rate: if self.settings.inputSampleRate != processing_sampling_rate:
newData = cast(AudioInOut, resampy.resample(receivedData, self.settings.inputSampleRate, processing_sampling_rate)) newData = cast(AudioInOut, resampy.resample(receivedData, self.settings.inputSampleRate, processing_sampling_rate))
else: else:
newData = receivedData newData = receivedData
sola_search_frame = int(0.012 * processing_sampling_rate) sola_search_frame = int(0.012 * processing_sampling_rate)
# sola_search_frame = 0 # sola_search_frame = 0
block_frame = newData.shape[0] block_frame = newData.shape[0]
crossfade_frame = min(self.settings.crossFadeOverlapSize, block_frame) crossfade_frame = min(self.settings.crossFadeOverlapSize, block_frame)
self._generate_strength(crossfade_frame) self._generate_strength(crossfade_frame)
data = self.voiceChanger.generate_input(newData, block_frame, crossfade_frame, sola_search_frame) data = self.voiceChanger.generate_input(newData, block_frame, crossfade_frame, sola_search_frame)
preprocess_time = t.secs preprocess_time = t.secs
# 変換処理 # 変換処理
with Timer("main-process") as t: with Timer("main-process") as t:
try:
# Inference # Inference
audio = self.voiceChanger.inference(data) audio = self.voiceChanger.inference(data)
@ -258,38 +258,41 @@ class VoiceChanger():
else: else:
self.sola_buffer = audio[- crossfade_frame:] * self.np_prev_strength self.sola_buffer = audio[- crossfade_frame:] * self.np_prev_strength
# self.sola_buffer = audio[- crossfade_frame:] # self.sola_buffer = audio[- crossfade_frame:]
mainprocess_time = t.secs
except Exception as e: # 後処理
print("VC PROCESSING!!!! EXCEPTION!!!", e) with Timer("post-process") as t:
print(traceback.format_exc()) result = result.astype(np.int16)
return np.zeros(1).astype(np.int16), [0, 0, 0] if self.settings.inputSampleRate != processing_sampling_rate:
mainprocess_time = t.secs outputData = cast(AudioInOut, resampy.resample(result, processing_sampling_rate, self.settings.inputSampleRate).astype(np.int16))
else:
outputData = result
# 後処理 print_convert_processing(
with Timer("post-process") as t: f" Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz")
result = result.astype(np.int16)
if self.settings.inputSampleRate != processing_sampling_rate:
outputData = cast(AudioInOut, resampy.resample(result, processing_sampling_rate, self.settings.inputSampleRate).astype(np.int16))
else:
outputData = result
print_convert_processing( if self.settings.recordIO == 1:
f" Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz") self.ioRecorder.writeInput(receivedData)
self.ioRecorder.writeOutput(outputData.tobytes())
if self.settings.recordIO == 1: # if receivedData.shape[0] != outputData.shape[0]:
self.ioRecorder.writeInput(receivedData) # print(f"Padding, in:{receivedData.shape[0]} out:{outputData.shape[0]}")
self.ioRecorder.writeOutput(outputData.tobytes()) # outputData = pad_array(outputData, receivedData.shape[0])
# # print_convert_processing(
# # f" Padded!, Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz")
postprocess_time = t.secs
# if receivedData.shape[0] != outputData.shape[0]: print_convert_processing(f" [fin] Input/Output size:{receivedData.shape[0]},{outputData.shape[0]}")
# print(f"Padding, in:{receivedData.shape[0]} out:{outputData.shape[0]}") perf = [preprocess_time, mainprocess_time, postprocess_time]
# outputData = pad_array(outputData, receivedData.shape[0]) return outputData, perf
# # print_convert_processing(
# # f" Padded!, Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz")
postprocess_time = t.secs
print_convert_processing(f" [fin] Input/Output size:{receivedData.shape[0]},{outputData.shape[0]}") except NoModeLoadedException as e:
perf = [preprocess_time, mainprocess_time, postprocess_time] print("[Voice Changer] [Exception]", e)
return outputData, perf return np.zeros(1).astype(np.int16), [0, 0, 0]
except Exception as e:
print("VC PROCESSING!!!! EXCEPTION!!!", e)
print(traceback.format_exc())
return np.zeros(1).astype(np.int16), [0, 0, 0]
def export2onnx(self): def export2onnx(self):
return self.voiceChanger.export2onnx() return self.voiceChanger.export2onnx()