WIP: support so-vits-svc, apply hopsize to convert size

2025-03-13 03:17:41 +03:00 · 2023-03-11 04:02:40 +09:00 · 2023-03-11 04:02:40 +09:00 · a5c7414fc3
commit a5c7414fc3
parent d7cff0709f
2 changed files with 8 additions and 4 deletions
--- a/server/voice_changer/SoVitsSvc40v2/SoVitsSvc40v2.py
+++ b/server/voice_changer/SoVitsSvc40v2/SoVitsSvc40v2.py
@ -137,6 +137,9 @@ class SoVitsSvc40v2:
    def get_processing_sampling_rate(self):
        return self.hps.data.sampling_rate

+    def get_processing_hop_length(self):
+        return self.hps.data.hop_length
+
    def get_unit_f0(self, audio_buffer, tran):
        wav_44k = audio_buffer
        # f0 = utils.compute_f0_parselmouth(wav, sampling_rate=self.target_sample, hop_length=self.hop_size)
--- a/server/voice_changer/VoiceChanger.py
+++ b/server/voice_changer/VoiceChanger.py
@ -147,6 +147,8 @@ class VoiceChanger():
    #  receivedData: tuple of short
    def on_request(self, receivedData: any):
        processing_sampling_rate = self.voiceChanger.get_processing_sampling_rate()
+        processing_hop_length = self.voiceChanger.get_processing_hop_length()
+
        print_convert_processing(f"------------ Convert processing.... ------------")
        # 前処理
        with Timer("pre-process") as t:
@ -163,10 +165,9 @@ class VoiceChanger():

            if convertSize < 8192:
                convertSize = 8192
-            # if convertSize % 128 != 0:  # モデルの出力のホップサイズで切り捨てが発生するので補う。
-            #     convertSize = convertSize + (128 - (convertSize % 128))
-            if convertSize % 512 != 0:  # モデルの出力のホップサイズで切り捨てが発生するので補う。
-                convertSize = convertSize + (512 - (convertSize % 512))
+
+            if convertSize % processing_hop_length != 0:  # モデルの出力のホップサイズで切り捨てが発生するので補う。
+                convertSize = convertSize + (processing_hop_length - (convertSize % processing_hop_length))

            overlapSize = min(self.settings.crossFadeOverlapSize, inputSize)
            cropRange = (-1 * (inputSize + overlapSize), -1 * overlapSize)