mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-01-23 21:45:00 +03:00
improve error handling 1
This commit is contained in:
parent
390a39fa64
commit
e4ac5e74db
@ -67,6 +67,7 @@ Windows 版と Mac 版を提供しています。
|
||||
- so-vits-svc 4.0/so-vits-svc 4.0v2、RVC(Retrieval-based-Voice-Conversion)の動作には hubert のモデルが必要になります。[このリポジトリ](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main)から`hubert_base.pt`をダウンロードして、バッチファイルがあるフォルダに格納してください。
|
||||
|
||||
- DDSP-SVC の動作には、hubert-soft と enhancer のモデルが必要です。hubert-soft は[このリンク](https://github.com/bshall/hubert/releases/download/v0.1/hubert-soft-0d54a1f4.pt)からダウンロードして、バッチファイルがあるフォルダに格納してください。enhancer は[このサイト](https://github.com/openvpi/vocoders/releases/tag/nsf-hifigan-v1)から`nsf_hifigan_20221211.zip`ダウンロードして下さい。解凍すると出てくる`nsf_hifigan`というフォルダをバッチファイルがあるフォルダに格納してください。
|
||||
- DDPS-SVC の encoder は hubert-soft のみ対応です。
|
||||
|
||||
| Version | OS | フレームワーク | link | サポート VC | サイズ |
|
||||
| --------- | --- | --------------------------------- | ---------------------------------------------------------------------------------------- | ------------------------------------------------------------------- | ------ |
|
||||
|
7
server/Exceptions.py
Normal file
7
server/Exceptions.py
Normal file
@ -0,0 +1,7 @@
|
||||
|
||||
class NoModeLoadedException(Exception):
|
||||
def __init__(self, framework):
|
||||
self.framework = framework
|
||||
|
||||
def __str__(self):
|
||||
return repr(f"No model for {self.framework} loaded. Please confirm the model uploaded.")
|
@ -20,6 +20,9 @@ import pyworld as pw
|
||||
import ddsp.vocoder as vo
|
||||
from ddsp.core import upsample
|
||||
from enhancer import Enhancer
|
||||
|
||||
from Exceptions import NoModeLoadedException
|
||||
|
||||
providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
|
||||
|
||||
|
||||
@ -224,7 +227,7 @@ class DDSP_SVC:
|
||||
def _onnx_inference(self, data):
|
||||
if hasattr(self, "onnx_session") == False or self.onnx_session == None:
|
||||
print("[Voice Changer] No onnx session.")
|
||||
return np.zeros(1).astype(np.int16)
|
||||
raise NoModeLoadedException("ONNX")
|
||||
|
||||
seg_units = data[0]
|
||||
# f0 = data[1]
|
||||
@ -258,7 +261,7 @@ class DDSP_SVC:
|
||||
|
||||
if hasattr(self, "model") == False or self.model == None:
|
||||
print("[Voice Changer] No pyTorch session.")
|
||||
return np.zeros(1).astype(np.int16)
|
||||
raise NoModeLoadedException("pytorch")
|
||||
|
||||
c = data[0].to(self.useDevice())
|
||||
f0 = data[1].to(self.useDevice())
|
||||
|
@ -22,6 +22,8 @@ from symbols import symbols
|
||||
from models import SynthesizerTrn
|
||||
from voice_changer.MMVCv13.TrainerFunctions import TextAudioSpeakerCollate, spectrogram_torch, load_checkpoint, get_hparams_from_file
|
||||
|
||||
from Exceptions import NoModeLoadedException
|
||||
|
||||
providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
|
||||
|
||||
|
||||
@ -119,6 +121,8 @@ class MMVCv13:
|
||||
return data
|
||||
|
||||
def get_processing_sampling_rate(self):
|
||||
if hasattr(self, "hps") == False:
|
||||
raise NoModeLoadedException("config")
|
||||
return self.hps.data.sampling_rate
|
||||
|
||||
def _get_spec(self, audio: any):
|
||||
@ -158,7 +162,7 @@ class MMVCv13:
|
||||
def _onnx_inference(self, data):
|
||||
if hasattr(self, "onnx_session") == False or self.onnx_session == None:
|
||||
print("[Voice Changer] No ONNX session.")
|
||||
return np.zeros(1).astype(np.int16)
|
||||
raise NoModeLoadedException("ONNX")
|
||||
|
||||
x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x for x in data]
|
||||
sid_tgt1 = torch.LongTensor([self.settings.dstId])
|
||||
@ -176,7 +180,7 @@ class MMVCv13:
|
||||
def _pyTorch_inference(self, data):
|
||||
if hasattr(self, "net_g") == False or self.net_g == None:
|
||||
print("[Voice Changer] No pyTorch session.")
|
||||
return np.zeros(1).astype(np.int16)
|
||||
raise NoModeLoadedException("pytorch")
|
||||
|
||||
if self.settings.gpu < 0 or self.gpu_num == 0:
|
||||
dev = torch.device("cpu")
|
||||
|
@ -20,6 +20,8 @@ import pyworld as pw
|
||||
from models import SynthesizerTrn
|
||||
from voice_changer.MMVCv15.client_modules import convert_continuos_f0, spectrogram_torch, get_hparams_from_file, load_checkpoint
|
||||
|
||||
from Exceptions import NoModeLoadedException
|
||||
|
||||
providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
|
||||
|
||||
|
||||
@ -138,6 +140,8 @@ class MMVCv15:
|
||||
return data
|
||||
|
||||
def get_processing_sampling_rate(self):
|
||||
if hasattr(self, "hps") == False:
|
||||
raise NoModeLoadedException("config")
|
||||
return self.hps.data.sampling_rate
|
||||
|
||||
def _get_f0(self, detector: str, newData: any):
|
||||
@ -191,7 +195,7 @@ class MMVCv15:
|
||||
def _onnx_inference(self, data):
|
||||
if self.settings.onnxModelFile == "":
|
||||
print("[Voice Changer] No ONNX session.")
|
||||
return np.zeros(1).astype(np.int16)
|
||||
raise NoModeLoadedException("ONNX")
|
||||
|
||||
spec, f0, sid_src = data
|
||||
spec = spec.unsqueeze(0)
|
||||
@ -217,7 +221,7 @@ class MMVCv15:
|
||||
def _pyTorch_inference(self, data):
|
||||
if self.settings.pyTorchModelFile == "":
|
||||
print("[Voice Changer] No pyTorch session.")
|
||||
return np.zeros(1).astype(np.int16)
|
||||
raise NoModeLoadedException("pytorch")
|
||||
|
||||
if self.settings.gpu < 0 or self.gpu_num == 0:
|
||||
dev = torch.device("cpu")
|
||||
|
@ -2,7 +2,7 @@ import sys
|
||||
import os
|
||||
import resampy
|
||||
from voice_changer.RVC.ModelWrapper import ModelWrapper
|
||||
|
||||
from Exceptions import NoModeLoadedException
|
||||
|
||||
# avoiding parse arg error in RVC
|
||||
sys.argv = ["MMVCServerSIO.py"]
|
||||
@ -198,7 +198,7 @@ class RVC:
|
||||
def _onnx_inference(self, data):
|
||||
if hasattr(self, "onnx_session") == False or self.onnx_session == None:
|
||||
print("[Voice Changer] No onnx session.")
|
||||
return np.zeros(1).astype(np.int16)
|
||||
raise NoModeLoadedException("ONNX")
|
||||
|
||||
if self.settings.gpu < 0 or self.gpu_num == 0:
|
||||
dev = torch.device("cpu")
|
||||
@ -239,7 +239,7 @@ class RVC:
|
||||
def _pyTorch_inference(self, data):
|
||||
if hasattr(self, "net_g") == False or self.net_g == None:
|
||||
print("[Voice Changer] No pyTorch session.")
|
||||
return np.zeros(1).astype(np.int16)
|
||||
raise NoModeLoadedException("pytorch")
|
||||
|
||||
if self.settings.gpu < 0 or self.gpu_num == 0:
|
||||
dev = torch.device("cpu")
|
||||
|
@ -26,6 +26,10 @@ import cluster
|
||||
import utils
|
||||
from fairseq import checkpoint_utils
|
||||
import librosa
|
||||
|
||||
from Exceptions import NoModeLoadedException
|
||||
|
||||
|
||||
providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
|
||||
|
||||
|
||||
@ -185,6 +189,8 @@ class SoVitsSvc40:
|
||||
return data
|
||||
|
||||
def get_processing_sampling_rate(self):
|
||||
if hasattr(self, "hps") == False:
|
||||
raise NoModeLoadedException("config")
|
||||
return self.hps.data.sampling_rate
|
||||
|
||||
def get_unit_f0(self, audio_buffer, tran):
|
||||
@ -278,7 +284,7 @@ class SoVitsSvc40:
|
||||
def _onnx_inference(self, data):
|
||||
if hasattr(self, "onnx_session") == False or self.onnx_session == None:
|
||||
print("[Voice Changer] No onnx session.")
|
||||
return np.zeros(1).astype(np.int16)
|
||||
raise NoModeLoadedException("ONNX")
|
||||
|
||||
convertSize = data[3]
|
||||
vol = data[4]
|
||||
@ -309,7 +315,7 @@ class SoVitsSvc40:
|
||||
def _pyTorch_inference(self, data):
|
||||
if hasattr(self, "net_g") == False or self.net_g == None:
|
||||
print("[Voice Changer] No pyTorch session.")
|
||||
return np.zeros(1).astype(np.int16)
|
||||
raise NoModeLoadedException("pytorch")
|
||||
|
||||
if self.settings.gpu < 0 or self.gpu_num == 0:
|
||||
dev = torch.device("cpu")
|
||||
|
@ -23,6 +23,9 @@ import cluster
|
||||
import utils
|
||||
from fairseq import checkpoint_utils
|
||||
import librosa
|
||||
|
||||
from Exceptions import NoModeLoadedException
|
||||
|
||||
providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
|
||||
|
||||
|
||||
@ -161,6 +164,8 @@ class SoVitsSvc40v2:
|
||||
return data
|
||||
|
||||
def get_processing_sampling_rate(self):
|
||||
if hasattr(self, "hps") == False:
|
||||
raise NoModeLoadedException("config")
|
||||
return self.hps.data.sampling_rate
|
||||
|
||||
def get_unit_f0(self, audio_buffer, tran):
|
||||
@ -240,7 +245,7 @@ class SoVitsSvc40v2:
|
||||
def _onnx_inference(self, data):
|
||||
if hasattr(self, "onnx_session") == False or self.onnx_session == None:
|
||||
print("[Voice Changer] No onnx session.")
|
||||
return np.zeros(1).astype(np.int16)
|
||||
raise NoModeLoadedException("ONNX")
|
||||
|
||||
convertSize = data[3]
|
||||
vol = data[4]
|
||||
@ -272,7 +277,7 @@ class SoVitsSvc40v2:
|
||||
def _pyTorch_inference(self, data):
|
||||
if hasattr(self, "net_g") == False or self.net_g == None:
|
||||
print("[Voice Changer] No pyTorch session.")
|
||||
return np.zeros(1).astype(np.int16)
|
||||
raise NoModeLoadedException("pytorch")
|
||||
|
||||
if self.settings.gpu < 0 or self.gpu_num == 0:
|
||||
dev = torch.device("cpu")
|
||||
|
@ -14,7 +14,7 @@ from voice_changer.IORecorder import IORecorder
|
||||
from voice_changer.utils.Timer import Timer
|
||||
from voice_changer.utils.VoiceChangerModel import VoiceChangerModel, AudioInOut
|
||||
import time
|
||||
|
||||
from Exceptions import NoModeLoadedException
|
||||
|
||||
providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
|
||||
|
||||
@ -211,27 +211,27 @@ class VoiceChanger():
|
||||
return self.on_request_sola(receivedData)
|
||||
|
||||
def on_request_sola(self, receivedData: AudioInOut) -> tuple[AudioInOut, list[Union[int, float]]]:
|
||||
processing_sampling_rate = self.voiceChanger.get_processing_sampling_rate()
|
||||
try:
|
||||
processing_sampling_rate = self.voiceChanger.get_processing_sampling_rate()
|
||||
|
||||
# 前処理
|
||||
with Timer("pre-process") as t:
|
||||
if self.settings.inputSampleRate != processing_sampling_rate:
|
||||
newData = cast(AudioInOut, resampy.resample(receivedData, self.settings.inputSampleRate, processing_sampling_rate))
|
||||
else:
|
||||
newData = receivedData
|
||||
# 前処理
|
||||
with Timer("pre-process") as t:
|
||||
if self.settings.inputSampleRate != processing_sampling_rate:
|
||||
newData = cast(AudioInOut, resampy.resample(receivedData, self.settings.inputSampleRate, processing_sampling_rate))
|
||||
else:
|
||||
newData = receivedData
|
||||
|
||||
sola_search_frame = int(0.012 * processing_sampling_rate)
|
||||
# sola_search_frame = 0
|
||||
block_frame = newData.shape[0]
|
||||
crossfade_frame = min(self.settings.crossFadeOverlapSize, block_frame)
|
||||
self._generate_strength(crossfade_frame)
|
||||
sola_search_frame = int(0.012 * processing_sampling_rate)
|
||||
# sola_search_frame = 0
|
||||
block_frame = newData.shape[0]
|
||||
crossfade_frame = min(self.settings.crossFadeOverlapSize, block_frame)
|
||||
self._generate_strength(crossfade_frame)
|
||||
|
||||
data = self.voiceChanger.generate_input(newData, block_frame, crossfade_frame, sola_search_frame)
|
||||
preprocess_time = t.secs
|
||||
data = self.voiceChanger.generate_input(newData, block_frame, crossfade_frame, sola_search_frame)
|
||||
preprocess_time = t.secs
|
||||
|
||||
# 変換処理
|
||||
with Timer("main-process") as t:
|
||||
try:
|
||||
# 変換処理
|
||||
with Timer("main-process") as t:
|
||||
# Inference
|
||||
audio = self.voiceChanger.inference(data)
|
||||
|
||||
@ -258,38 +258,41 @@ class VoiceChanger():
|
||||
else:
|
||||
self.sola_buffer = audio[- crossfade_frame:] * self.np_prev_strength
|
||||
# self.sola_buffer = audio[- crossfade_frame:]
|
||||
mainprocess_time = t.secs
|
||||
|
||||
except Exception as e:
|
||||
print("VC PROCESSING!!!! EXCEPTION!!!", e)
|
||||
print(traceback.format_exc())
|
||||
return np.zeros(1).astype(np.int16), [0, 0, 0]
|
||||
mainprocess_time = t.secs
|
||||
# 後処理
|
||||
with Timer("post-process") as t:
|
||||
result = result.astype(np.int16)
|
||||
if self.settings.inputSampleRate != processing_sampling_rate:
|
||||
outputData = cast(AudioInOut, resampy.resample(result, processing_sampling_rate, self.settings.inputSampleRate).astype(np.int16))
|
||||
else:
|
||||
outputData = result
|
||||
|
||||
# 後処理
|
||||
with Timer("post-process") as t:
|
||||
result = result.astype(np.int16)
|
||||
if self.settings.inputSampleRate != processing_sampling_rate:
|
||||
outputData = cast(AudioInOut, resampy.resample(result, processing_sampling_rate, self.settings.inputSampleRate).astype(np.int16))
|
||||
else:
|
||||
outputData = result
|
||||
print_convert_processing(
|
||||
f" Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz")
|
||||
|
||||
print_convert_processing(
|
||||
f" Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz")
|
||||
if self.settings.recordIO == 1:
|
||||
self.ioRecorder.writeInput(receivedData)
|
||||
self.ioRecorder.writeOutput(outputData.tobytes())
|
||||
|
||||
if self.settings.recordIO == 1:
|
||||
self.ioRecorder.writeInput(receivedData)
|
||||
self.ioRecorder.writeOutput(outputData.tobytes())
|
||||
# if receivedData.shape[0] != outputData.shape[0]:
|
||||
# print(f"Padding, in:{receivedData.shape[0]} out:{outputData.shape[0]}")
|
||||
# outputData = pad_array(outputData, receivedData.shape[0])
|
||||
# # print_convert_processing(
|
||||
# # f" Padded!, Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz")
|
||||
postprocess_time = t.secs
|
||||
|
||||
# if receivedData.shape[0] != outputData.shape[0]:
|
||||
# print(f"Padding, in:{receivedData.shape[0]} out:{outputData.shape[0]}")
|
||||
# outputData = pad_array(outputData, receivedData.shape[0])
|
||||
# # print_convert_processing(
|
||||
# # f" Padded!, Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz")
|
||||
postprocess_time = t.secs
|
||||
print_convert_processing(f" [fin] Input/Output size:{receivedData.shape[0]},{outputData.shape[0]}")
|
||||
perf = [preprocess_time, mainprocess_time, postprocess_time]
|
||||
return outputData, perf
|
||||
|
||||
print_convert_processing(f" [fin] Input/Output size:{receivedData.shape[0]},{outputData.shape[0]}")
|
||||
perf = [preprocess_time, mainprocess_time, postprocess_time]
|
||||
return outputData, perf
|
||||
except NoModeLoadedException as e:
|
||||
print("[Voice Changer] [Exception]", e)
|
||||
return np.zeros(1).astype(np.int16), [0, 0, 0]
|
||||
except Exception as e:
|
||||
print("VC PROCESSING!!!! EXCEPTION!!!", e)
|
||||
print(traceback.format_exc())
|
||||
return np.zeros(1).astype(np.int16), [0, 0, 0]
|
||||
|
||||
def export2onnx(self):
|
||||
return self.voiceChanger.export2onnx()
|
||||
|
Loading…
Reference in New Issue
Block a user