diff --git a/server/voice_changer/SoVitsSvc40v2/SoVitsSvc40v2.py b/server/voice_changer/SoVitsSvc40v2/SoVitsSvc40v2.py index ac1d16c8..6168a357 100644 --- a/server/voice_changer/SoVitsSvc40v2/SoVitsSvc40v2.py +++ b/server/voice_changer/SoVitsSvc40v2/SoVitsSvc40v2.py @@ -137,6 +137,9 @@ class SoVitsSvc40v2: def get_processing_sampling_rate(self): return self.hps.data.sampling_rate + def get_processing_hop_length(self): + return self.hps.data.hop_length + def get_unit_f0(self, audio_buffer, tran): wav_44k = audio_buffer # f0 = utils.compute_f0_parselmouth(wav, sampling_rate=self.target_sample, hop_length=self.hop_size) diff --git a/server/voice_changer/VoiceChanger.py b/server/voice_changer/VoiceChanger.py index 2e2511ed..4b80af80 100755 --- a/server/voice_changer/VoiceChanger.py +++ b/server/voice_changer/VoiceChanger.py @@ -147,6 +147,8 @@ class VoiceChanger(): # receivedData: tuple of short def on_request(self, receivedData: any): processing_sampling_rate = self.voiceChanger.get_processing_sampling_rate() + processing_hop_length = self.voiceChanger.get_processing_hop_length() + print_convert_processing(f"------------ Convert processing.... ------------") # 前処理 with Timer("pre-process") as t: @@ -163,10 +165,9 @@ class VoiceChanger(): if convertSize < 8192: convertSize = 8192 - # if convertSize % 128 != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。 - # convertSize = convertSize + (128 - (convertSize % 128)) - if convertSize % 512 != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。 - convertSize = convertSize + (512 - (convertSize % 512)) + + if convertSize % processing_hop_length != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。 + convertSize = convertSize + (processing_hop_length - (convertSize % processing_hop_length)) overlapSize = min(self.settings.crossFadeOverlapSize, inputSize) cropRange = (-1 * (inputSize + overlapSize), -1 * overlapSize)