mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-02-02 16:23:58 +03:00
apply sola for all vc forcely
This commit is contained in:
parent
b3a95b2f7e
commit
d85bbae478
@ -145,10 +145,6 @@
|
|||||||
{
|
{
|
||||||
"name": "trancateNumThreshold",
|
"name": "trancateNumThreshold",
|
||||||
"options": {}
|
"options": {}
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "solaEnable",
|
|
||||||
"options": {}
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -147,10 +147,6 @@
|
|||||||
{
|
{
|
||||||
"name": "trancateNumThreshold",
|
"name": "trancateNumThreshold",
|
||||||
"options": {}
|
"options": {}
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "solaEnable",
|
|
||||||
"options": {}
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -166,10 +166,6 @@
|
|||||||
{
|
{
|
||||||
"name": "rvcQuality",
|
"name": "rvcQuality",
|
||||||
"options": {}
|
"options": {}
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "solaEnable",
|
|
||||||
"options": {}
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -157,10 +157,6 @@
|
|||||||
{
|
{
|
||||||
"name": "trancateNumThreshold",
|
"name": "trancateNumThreshold",
|
||||||
"options": {}
|
"options": {}
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "solaEnable",
|
|
||||||
"options": {}
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -157,10 +157,6 @@
|
|||||||
{
|
{
|
||||||
"name": "trancateNumThreshold",
|
"name": "trancateNumThreshold",
|
||||||
"options": {}
|
"options": {}
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "solaEnable",
|
|
||||||
"options": {}
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -145,10 +145,6 @@
|
|||||||
{
|
{
|
||||||
"name": "trancateNumThreshold",
|
"name": "trancateNumThreshold",
|
||||||
"options": {}
|
"options": {}
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "solaEnable",
|
|
||||||
"options": {}
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -147,10 +147,6 @@
|
|||||||
{
|
{
|
||||||
"name": "trancateNumThreshold",
|
"name": "trancateNumThreshold",
|
||||||
"options": {}
|
"options": {}
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "solaEnable",
|
|
||||||
"options": {}
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -166,10 +166,6 @@
|
|||||||
{
|
{
|
||||||
"name": "rvcQuality",
|
"name": "rvcQuality",
|
||||||
"options": {}
|
"options": {}
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "solaEnable",
|
|
||||||
"options": {}
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -157,10 +157,6 @@
|
|||||||
{
|
{
|
||||||
"name": "trancateNumThreshold",
|
"name": "trancateNumThreshold",
|
||||||
"options": {}
|
"options": {}
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "solaEnable",
|
|
||||||
"options": {}
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -157,10 +157,6 @@
|
|||||||
{
|
{
|
||||||
"name": "trancateNumThreshold",
|
"name": "trancateNumThreshold",
|
||||||
"options": {}
|
"options": {}
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "solaEnable",
|
|
||||||
"options": {}
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -130,7 +130,7 @@ class MMVCv13:
|
|||||||
spec = torch.squeeze(spec, 0)
|
spec = torch.squeeze(spec, 0)
|
||||||
return spec
|
return spec
|
||||||
|
|
||||||
def generate_input(self, newData: any, inputSize: int, crossfadeSize: int, solaEnabled: bool = False, solaSearchFrame: int = 0):
|
def generate_input(self, newData: any, inputSize: int, crossfadeSize: int, solaSearchFrame: int = 0):
|
||||||
newData = newData.astype(np.float32) / self.hps.data.max_wav_value
|
newData = newData.astype(np.float32) / self.hps.data.max_wav_value
|
||||||
|
|
||||||
if hasattr(self, "audio_buffer"):
|
if hasattr(self, "audio_buffer"):
|
||||||
@ -138,10 +138,7 @@ class MMVCv13:
|
|||||||
else:
|
else:
|
||||||
self.audio_buffer = newData
|
self.audio_buffer = newData
|
||||||
|
|
||||||
if solaEnabled:
|
convertSize = inputSize + crossfadeSize + solaSearchFrame
|
||||||
convertSize = inputSize + crossfadeSize + solaSearchFrame
|
|
||||||
else:
|
|
||||||
convertSize = inputSize + crossfadeSize
|
|
||||||
|
|
||||||
if convertSize < 8192:
|
if convertSize < 8192:
|
||||||
convertSize = 8192
|
convertSize = 8192
|
||||||
@ -160,32 +157,6 @@ class MMVCv13:
|
|||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def generate_input_old(self, newData: any, inputSize: int, crossfadeSize: int):
|
|
||||||
newData = newData.astype(np.float32) / self.hps.data.max_wav_value
|
|
||||||
|
|
||||||
if hasattr(self, "audio_buffer"):
|
|
||||||
self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0) # 過去のデータに連結
|
|
||||||
else:
|
|
||||||
self.audio_buffer = newData
|
|
||||||
|
|
||||||
convertSize = inputSize + crossfadeSize
|
|
||||||
if convertSize < 8192:
|
|
||||||
convertSize = 8192
|
|
||||||
if convertSize % self.hps.data.hop_length != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
|
|
||||||
convertSize = convertSize + (self.hps.data.hop_length - (convertSize % self.hps.data.hop_length))
|
|
||||||
|
|
||||||
self.audio_buffer = self.audio_buffer[-1 * convertSize:] # 変換対象の部分だけ抽出
|
|
||||||
|
|
||||||
audio = torch.FloatTensor(self.audio_buffer)
|
|
||||||
audio_norm = audio.unsqueeze(0) # unsqueeze
|
|
||||||
spec = self._get_spec(audio_norm)
|
|
||||||
sid = torch.LongTensor([int(self.settings.srcId)])
|
|
||||||
|
|
||||||
data = (self.text_norm, spec, audio_norm, sid)
|
|
||||||
data = TextAudioSpeakerCollate()([data])
|
|
||||||
|
|
||||||
return data
|
|
||||||
|
|
||||||
def _onnx_inference(self, data):
|
def _onnx_inference(self, data):
|
||||||
if hasattr(self, "onnx_session") == False or self.onnx_session == None:
|
if hasattr(self, "onnx_session") == False or self.onnx_session == None:
|
||||||
print("[Voice Changer] No ONNX session.")
|
print("[Voice Changer] No ONNX session.")
|
||||||
|
@ -166,7 +166,7 @@ class MMVCv15:
|
|||||||
spec = torch.squeeze(spec, 0)
|
spec = torch.squeeze(spec, 0)
|
||||||
return spec
|
return spec
|
||||||
|
|
||||||
def generate_input(self, newData: any, inputSize: int, crossfadeSize: int, solaEnabled: bool = False, solaSearchFrame: int = 0):
|
def generate_input(self, newData: any, inputSize: int, crossfadeSize: int, solaSearchFrame: int = 0):
|
||||||
newData = newData.astype(np.float32) / self.hps.data.max_wav_value
|
newData = newData.astype(np.float32) / self.hps.data.max_wav_value
|
||||||
|
|
||||||
if hasattr(self, "audio_buffer"):
|
if hasattr(self, "audio_buffer"):
|
||||||
@ -174,10 +174,7 @@ class MMVCv15:
|
|||||||
else:
|
else:
|
||||||
self.audio_buffer = newData
|
self.audio_buffer = newData
|
||||||
|
|
||||||
if solaEnabled:
|
convertSize = inputSize + crossfadeSize + solaSearchFrame
|
||||||
convertSize = inputSize + crossfadeSize + solaSearchFrame
|
|
||||||
else:
|
|
||||||
convertSize = inputSize + crossfadeSize
|
|
||||||
|
|
||||||
if convertSize < 8192:
|
if convertSize < 8192:
|
||||||
convertSize = 8192
|
convertSize = 8192
|
||||||
|
@ -165,7 +165,7 @@ class RVC:
|
|||||||
def get_processing_sampling_rate(self):
|
def get_processing_sampling_rate(self):
|
||||||
return self.settings.modelSamplingRate
|
return self.settings.modelSamplingRate
|
||||||
|
|
||||||
def generate_input(self, newData: any, inputSize: int, crossfadeSize: int, solaEnabled: bool = False, solaSearchFrame: int = 0):
|
def generate_input(self, newData: any, inputSize: int, crossfadeSize: int, solaSearchFrame: int = 0):
|
||||||
newData = newData.astype(np.float32) / 32768.0
|
newData = newData.astype(np.float32) / 32768.0
|
||||||
|
|
||||||
if hasattr(self, "audio_buffer"):
|
if hasattr(self, "audio_buffer"):
|
||||||
@ -173,10 +173,7 @@ class RVC:
|
|||||||
else:
|
else:
|
||||||
self.audio_buffer = newData
|
self.audio_buffer = newData
|
||||||
|
|
||||||
if solaEnabled:
|
convertSize = inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
|
||||||
convertSize = inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
|
|
||||||
else:
|
|
||||||
convertSize = inputSize + crossfadeSize + self.settings.extraConvertSize
|
|
||||||
|
|
||||||
if convertSize % 128 != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
|
if convertSize % 128 != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
|
||||||
convertSize = convertSize + (128 - (convertSize % 128))
|
convertSize = convertSize + (128 - (convertSize % 128))
|
||||||
@ -188,30 +185,6 @@ class RVC:
|
|||||||
vol = max(rms, self.prevVol * 0.0)
|
vol = max(rms, self.prevVol * 0.0)
|
||||||
self.prevVol = vol
|
self.prevVol = vol
|
||||||
|
|
||||||
return (self.audio_buffer, convertSize, vol, solaEnabled)
|
|
||||||
|
|
||||||
def generate_input_old(self, newData: any, inputSize: int, crossfadeSize: int):
|
|
||||||
newData = newData.astype(np.float32) / 32768.0
|
|
||||||
|
|
||||||
if hasattr(self, "audio_buffer"):
|
|
||||||
self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0) # 過去のデータに連結
|
|
||||||
else:
|
|
||||||
self.audio_buffer = newData
|
|
||||||
|
|
||||||
convertSize = inputSize + crossfadeSize + self.settings.extraConvertSize
|
|
||||||
|
|
||||||
# if convertSize % self.hps.data.hop_length != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
|
|
||||||
if convertSize % 128 != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
|
|
||||||
# convertSize = convertSize + (self.hps.data.hop_length - (convertSize % self.hps.data.hop_length))
|
|
||||||
convertSize = convertSize + (128 - (convertSize % 128))
|
|
||||||
|
|
||||||
self.audio_buffer = self.audio_buffer[-1 * convertSize:] # 変換対象の部分だけ抽出
|
|
||||||
|
|
||||||
crop = self.audio_buffer[-1 * (inputSize + crossfadeSize):-1 * (crossfadeSize)]
|
|
||||||
rms = np.sqrt(np.square(crop).mean(axis=0))
|
|
||||||
vol = max(rms, self.prevVol * 0.0)
|
|
||||||
self.prevVol = vol
|
|
||||||
|
|
||||||
return (self.audio_buffer, convertSize, vol)
|
return (self.audio_buffer, convertSize, vol)
|
||||||
|
|
||||||
def _onnx_inference(self, data):
|
def _onnx_inference(self, data):
|
||||||
@ -302,12 +275,7 @@ class RVC:
|
|||||||
else:
|
else:
|
||||||
audio = self._pyTorch_inference(data)
|
audio = self._pyTorch_inference(data)
|
||||||
|
|
||||||
sola_enabled = data[3]
|
return audio
|
||||||
if sola_enabled:
|
|
||||||
return audio
|
|
||||||
# return audio[self.settings.extraConvertSize:]
|
|
||||||
else:
|
|
||||||
return audio
|
|
||||||
|
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
del self.net_g
|
del self.net_g
|
||||||
|
@ -252,7 +252,7 @@ class SoVitsSvc40:
|
|||||||
c = c.unsqueeze(0)
|
c = c.unsqueeze(0)
|
||||||
return c, f0, uv
|
return c, f0, uv
|
||||||
|
|
||||||
def generate_input(self, newData: any, inputSize: int, crossfadeSize: int, solaEnabled: bool = False, solaSearchFrame: int = 0):
|
def generate_input(self, newData: any, inputSize: int, crossfadeSize: int, solaSearchFrame: int = 0):
|
||||||
newData = newData.astype(np.float32) / self.hps.data.max_wav_value
|
newData = newData.astype(np.float32) / self.hps.data.max_wav_value
|
||||||
|
|
||||||
if hasattr(self, "audio_buffer"):
|
if hasattr(self, "audio_buffer"):
|
||||||
@ -260,10 +260,7 @@ class SoVitsSvc40:
|
|||||||
else:
|
else:
|
||||||
self.audio_buffer = newData
|
self.audio_buffer = newData
|
||||||
|
|
||||||
if solaEnabled:
|
convertSize = inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
|
||||||
convertSize = inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
|
|
||||||
else:
|
|
||||||
convertSize = inputSize + crossfadeSize + self.settings.extraConvertSize
|
|
||||||
|
|
||||||
if convertSize % self.hps.data.hop_length != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
|
if convertSize % self.hps.data.hop_length != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
|
||||||
convertSize = convertSize + (self.hps.data.hop_length - (convertSize % self.hps.data.hop_length))
|
convertSize = convertSize + (self.hps.data.hop_length - (convertSize % self.hps.data.hop_length))
|
||||||
|
@ -215,7 +215,7 @@ class SoVitsSvc40v2:
|
|||||||
c = c.unsqueeze(0)
|
c = c.unsqueeze(0)
|
||||||
return c, f0, uv
|
return c, f0, uv
|
||||||
|
|
||||||
def generate_input(self, newData: any, inputSize: int, crossfadeSize: int, solaEnabled: bool = False, solaSearchFrame: int = 0):
|
def generate_input(self, newData: any, inputSize: int, crossfadeSize: int, solaSearchFrame: int = 0):
|
||||||
newData = newData.astype(np.float32) / self.hps.data.max_wav_value
|
newData = newData.astype(np.float32) / self.hps.data.max_wav_value
|
||||||
|
|
||||||
if hasattr(self, "audio_buffer"):
|
if hasattr(self, "audio_buffer"):
|
||||||
@ -223,10 +223,7 @@ class SoVitsSvc40v2:
|
|||||||
else:
|
else:
|
||||||
self.audio_buffer = newData
|
self.audio_buffer = newData
|
||||||
|
|
||||||
if solaEnabled:
|
convertSize = inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
|
||||||
convertSize = inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
|
|
||||||
else:
|
|
||||||
convertSize = inputSize + crossfadeSize + self.settings.extraConvertSize
|
|
||||||
|
|
||||||
if convertSize % self.hps.data.hop_length != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
|
if convertSize % self.hps.data.hop_length != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
|
||||||
convertSize = convertSize + (self.hps.data.hop_length - (convertSize % self.hps.data.hop_length))
|
convertSize = convertSize + (self.hps.data.hop_length - (convertSize % self.hps.data.hop_length))
|
||||||
@ -274,8 +271,6 @@ class SoVitsSvc40v2:
|
|||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
pass
|
|
||||||
|
|
||||||
def _pyTorch_inference(self, data):
|
def _pyTorch_inference(self, data):
|
||||||
if hasattr(self, "net_g") == False or self.net_g == None:
|
if hasattr(self, "net_g") == False or self.net_g == None:
|
||||||
print("[Voice Changer] No pyTorch session.")
|
print("[Voice Changer] No pyTorch session.")
|
||||||
|
@ -208,13 +208,9 @@ class VoiceChanger():
|
|||||||
|
|
||||||
# receivedData: tuple of short
|
# receivedData: tuple of short
|
||||||
def on_request(self, receivedData: AudioInOut) -> tuple[AudioInOut, list[Union[int, float]]]:
|
def on_request(self, receivedData: AudioInOut) -> tuple[AudioInOut, list[Union[int, float]]]:
|
||||||
if self.settings.solaEnabled:
|
return self.on_request_sola(receivedData)
|
||||||
return self.on_request_sola(receivedData)
|
|
||||||
else:
|
|
||||||
return self.on_request_legacy(receivedData)
|
|
||||||
|
|
||||||
def on_request_sola(self, receivedData: AudioInOut) -> tuple[AudioInOut, list[Union[int, float]]]:
|
def on_request_sola(self, receivedData: AudioInOut) -> tuple[AudioInOut, list[Union[int, float]]]:
|
||||||
print("processing with sola")
|
|
||||||
processing_sampling_rate = self.voiceChanger.get_processing_sampling_rate()
|
processing_sampling_rate = self.voiceChanger.get_processing_sampling_rate()
|
||||||
|
|
||||||
# 前処理
|
# 前処理
|
||||||
@ -230,7 +226,7 @@ class VoiceChanger():
|
|||||||
crossfade_frame = min(self.settings.crossFadeOverlapSize, block_frame)
|
crossfade_frame = min(self.settings.crossFadeOverlapSize, block_frame)
|
||||||
self._generate_strength(crossfade_frame)
|
self._generate_strength(crossfade_frame)
|
||||||
|
|
||||||
data = self.voiceChanger.generate_input(newData, block_frame, crossfade_frame, True, sola_search_frame)
|
data = self.voiceChanger.generate_input(newData, block_frame, crossfade_frame, sola_search_frame)
|
||||||
preprocess_time = t.secs
|
preprocess_time = t.secs
|
||||||
|
|
||||||
# 変換処理
|
# 変換処理
|
||||||
@ -295,110 +291,10 @@ class VoiceChanger():
|
|||||||
perf = [preprocess_time, mainprocess_time, postprocess_time]
|
perf = [preprocess_time, mainprocess_time, postprocess_time]
|
||||||
return outputData, perf
|
return outputData, perf
|
||||||
|
|
||||||
def on_request_legacy(self, receivedData: AudioInOut) -> tuple[AudioInOut, list[Union[int, float]]]:
|
|
||||||
# print("processing with legacy")
|
|
||||||
|
|
||||||
processing_sampling_rate = self.voiceChanger.get_processing_sampling_rate()
|
|
||||||
print_convert_processing(f"------------ Convert processing.... ------------")
|
|
||||||
# 前処理
|
|
||||||
with Timer("pre-process") as t:
|
|
||||||
|
|
||||||
with Timer("pre-process") as t1:
|
|
||||||
|
|
||||||
if self.settings.inputSampleRate != processing_sampling_rate:
|
|
||||||
newData = cast(AudioInOut, resampy.resample(receivedData, self.settings.inputSampleRate, processing_sampling_rate))
|
|
||||||
else:
|
|
||||||
newData = receivedData
|
|
||||||
# print("t1::::", t1.secs)
|
|
||||||
inputSize = newData.shape[0]
|
|
||||||
crossfadeSize = min(self.settings.crossFadeOverlapSize, inputSize)
|
|
||||||
|
|
||||||
print_convert_processing(
|
|
||||||
f" Input data size: {receivedData.shape[0]}/{self.settings.inputSampleRate}hz {inputSize}/{processing_sampling_rate}hz")
|
|
||||||
print_convert_processing(
|
|
||||||
f" Crossfade data size: crossfade:{crossfadeSize}, crossfade setting:{self.settings.crossFadeOverlapSize}, input size:{inputSize}")
|
|
||||||
|
|
||||||
print_convert_processing(f" Convert data size of {inputSize + crossfadeSize} (+ extra size)")
|
|
||||||
print_convert_processing(f" will be cropped:{-1 * (inputSize + crossfadeSize)}, {-1 * (crossfadeSize)}")
|
|
||||||
|
|
||||||
self._generate_strength(crossfadeSize)
|
|
||||||
with Timer("pre-process") as t2:
|
|
||||||
data = self.voiceChanger.generate_input(newData, inputSize, crossfadeSize)
|
|
||||||
# print("t2::::", t2.secs)
|
|
||||||
preprocess_time = t.secs
|
|
||||||
|
|
||||||
# 変換処理
|
|
||||||
with Timer("main-process") as t:
|
|
||||||
try:
|
|
||||||
# Inference
|
|
||||||
audio = self.voiceChanger.inference(data)
|
|
||||||
|
|
||||||
if hasattr(self, 'np_prev_audio1') == True:
|
|
||||||
np.set_printoptions(threshold=10000)
|
|
||||||
prev_overlap_start = -1 * crossfadeSize
|
|
||||||
prev_overlap = self.np_prev_audio1[prev_overlap_start:]
|
|
||||||
cur_overlap_start = -1 * (inputSize + crossfadeSize)
|
|
||||||
cur_overlap_end = -1 * inputSize
|
|
||||||
cur_overlap = audio[cur_overlap_start:cur_overlap_end]
|
|
||||||
print_convert_processing(
|
|
||||||
f" audio:{audio.shape}, prev_overlap:{prev_overlap.shape}, self.np_prev_strength:{self.np_prev_strength.shape}")
|
|
||||||
powered_prev = prev_overlap * self.np_prev_strength
|
|
||||||
print_convert_processing(
|
|
||||||
f" audio:{audio.shape}, cur_overlap:{cur_overlap.shape}, self.np_cur_strength:{self.np_cur_strength.shape}")
|
|
||||||
print_convert_processing(f" cur_overlap_strt:{cur_overlap_start}, cur_overlap_end{cur_overlap_end}")
|
|
||||||
|
|
||||||
powered_cur = cur_overlap * self.np_cur_strength
|
|
||||||
powered_result = powered_prev + powered_cur
|
|
||||||
|
|
||||||
cur = audio[-1 * inputSize:-1 * crossfadeSize]
|
|
||||||
result = np.concatenate([powered_result, cur], axis=0)
|
|
||||||
print_convert_processing(
|
|
||||||
f" overlap:{crossfadeSize}, current:{cur.shape[0]}, result:{result.shape[0]}... result should be same as input")
|
|
||||||
if cur.shape[0] != result.shape[0]:
|
|
||||||
print_convert_processing(f" current and result should be same as input")
|
|
||||||
|
|
||||||
else:
|
|
||||||
result = np.zeros(4096).astype(np.int16)
|
|
||||||
self.np_prev_audio1 = audio
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print("VC PROCESSING!!!! EXCEPTION!!!", e)
|
|
||||||
print(traceback.format_exc())
|
|
||||||
if hasattr(self, "np_prev_audio1"):
|
|
||||||
del self.np_prev_audio1
|
|
||||||
return np.zeros(1).astype(np.int16), [0, 0, 0]
|
|
||||||
mainprocess_time = t.secs
|
|
||||||
|
|
||||||
# 後処理
|
|
||||||
with Timer("post-process") as t:
|
|
||||||
result = result.astype(np.int16)
|
|
||||||
if self.settings.inputSampleRate != processing_sampling_rate:
|
|
||||||
outputData = cast(AudioInOut, resampy.resample(result, processing_sampling_rate, self.settings.inputSampleRate).astype(np.int16))
|
|
||||||
else:
|
|
||||||
outputData = result
|
|
||||||
# outputData = result
|
|
||||||
|
|
||||||
print_convert_processing(
|
|
||||||
f" Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz")
|
|
||||||
|
|
||||||
if self.settings.recordIO == 1:
|
|
||||||
self.ioRecorder.writeInput(receivedData)
|
|
||||||
self.ioRecorder.writeOutput(outputData.tobytes())
|
|
||||||
|
|
||||||
# if receivedData.shape[0] != outputData.shape[0]:
|
|
||||||
# print(f"Padding, in:{receivedData.shape[0]} out:{outputData.shape[0]}")
|
|
||||||
# outputData = pad_array(outputData, receivedData.shape[0])
|
|
||||||
# # print_convert_processing(
|
|
||||||
# # f" Padded!, Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz")
|
|
||||||
postprocess_time = t.secs
|
|
||||||
|
|
||||||
print_convert_processing(f" [fin] Input/Output size:{receivedData.shape[0]},{outputData.shape[0]}")
|
|
||||||
perf = [preprocess_time, mainprocess_time, postprocess_time]
|
|
||||||
return outputData, perf
|
|
||||||
|
|
||||||
def export2onnx(self):
|
def export2onnx(self):
|
||||||
return self.voiceChanger.export2onnx()
|
return self.voiceChanger.export2onnx()
|
||||||
|
|
||||||
|
|
||||||
##############
|
##############
|
||||||
PRINT_CONVERT_PROCESSING: bool = False
|
PRINT_CONVERT_PROCESSING: bool = False
|
||||||
# PRINT_CONVERT_PROCESSING = True
|
# PRINT_CONVERT_PROCESSING = True
|
||||||
|
Loading…
Reference in New Issue
Block a user