Merge pull request #1077 from icecoins/master

implement of the fcpe in RVC
2025-01-23 05:25:01 +03:00 · 2024-01-18 06:32:12 +09:00 · 2024-01-18 06:32:12 +09:00 · 927bba6467
commit 927bba6467
parent 41238258ba 8f230e5c45
7 changed files with 52 additions and 2 deletions
--- a/client/demo/dist/assets/gui_settings/GUI.json
+++ b/client/demo/dist/assets/gui_settings/GUI.json
@ -21,7 +21,7 @@
            {
                "name": "configArea",
                "options": {
-                    "detectors": ["dio", "harvest", "crepe", "crepe_full", "crepe_tiny", "rmvpe", "rmvpe_onnx"],
+                    "detectors": ["dio", "harvest", "crepe", "crepe_full", "crepe_tiny", "rmvpe", "rmvpe_onnx", "fcpe" ],
                    "inputChunkNums": [1, 2, 4, 6, 8, 16, 24, 32, 40, 48, 64, 80, 96, 112, 128, 192, 256, 320, 384, 448, 512, 576, 640, 704, 768, 832, 896, 960, 1024, 2048, 4096, 8192, 16384]
                }
            }
--- a/client/demo/public/assets/gui_settings/GUI.json
+++ b/client/demo/public/assets/gui_settings/GUI.json
@ -21,7 +21,7 @@
            {
                "name": "configArea",
                "options": {
-                    "detectors": ["dio", "harvest", "crepe", "crepe_full", "crepe_tiny", "rmvpe", "rmvpe_onnx"],
+                    "detectors": ["dio", "harvest", "crepe", "crepe_full", "crepe_tiny", "rmvpe", "rmvpe_onnx", "fcpe"],
                    "inputChunkNums": [1, 2, 4, 6, 8, 16, 24, 32, 40, 48, 64, 80, 96, 112, 128, 192, 256, 320, 384, 448, 512, 576, 640, 704, 768, 832, 896, 960, 1024, 2048, 4096, 8192, 16384]
                }
            }
--- a/client/lib/src/const.ts
+++ b/client/lib/src/const.ts
@ -56,6 +56,7 @@ export const F0Detector = {
    crepe_tiny: "crepe_tiny",
    rmvpe: "rmvpe",
    rmvpe_onnx: "rmvpe_onnx",
+    fcpe: "fcpe",
 } as const;
 export type F0Detector = (typeof F0Detector)[keyof typeof F0Detector];

--- a/server/const.py
+++ b/server/const.py
@ -82,6 +82,7 @@ PitchExtractorType: TypeAlias = Literal[
    "crepe_tiny",
    "rmvpe",
    "rmvpe_onnx",
+    "fcpe",
 ]

 ServerAudioDeviceType: TypeAlias = Literal["audioinput", "audiooutput"]
--- a/server/requirements.txt
+++ b/server/requirements.txt
@ -27,3 +27,4 @@ websockets==11.0.2
 sounddevice==0.4.6
 dataclasses_json==0.5.7
 onnxsim==0.4.28
+torchfcpe
--- a/server/voice_changer/RVC/pitchExtractor/FcpePitchExtractor.py
+++ b/server/voice_changer/RVC/pitchExtractor/FcpePitchExtractor.py
@ -0,0 +1,44 @@
+import numpy as np
+from const import PitchExtractorType
+from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
+from voice_changer.RVC.pitchExtractor.PitchExtractor import PitchExtractor
+import torchfcpe
+
+class FcpePitchExtractor(PitchExtractor):
+
+    def __init__(self, gpu: int):
+        super().__init__()
+        self.pitchExtractorType: PitchExtractorType = "fcpe"
+        self.device = DeviceManager.get_instance().getDevice(gpu)
+        self.fcpe = torchfcpe.spawn_bundled_infer_model(self.device)
+
+    # I merge the code of Voice-Changer-CrepePitchExtractor and RVC-fcpe-infer, sry I don't know how to optimize the function.
+    def extract(self, audio, pitchf, f0_up_key, sr, window, silence_front=0):
+        start_frame = int(silence_front * sr / window)
+        real_silence_front = start_frame * window / sr
+
+        silence_front_offset = int(np.round(real_silence_front * sr))
+        audio = audio[silence_front_offset:]
+
+        f0_min = 50
+        f0_max = 1100
+        f0_mel_min = 1127 * np.log(1 + f0_min / 700)
+        f0_mel_max = 1127 * np.log(1 + f0_max / 700)
+
+        f0 = self.fcpe.infer(
+            audio.to(self.device).unsqueeze(0).float(),
+            sr=16000,
+            decoder_mode="local_argmax",
+            threshold=0.006,
+        )
+        f0 = f0.squeeze()
+
+        f0 *= pow(2, f0_up_key / 12)
+        pitchf[-f0.shape[0]:] = f0.detach().cpu().numpy()[:pitchf.shape[0]]
+        f0bak = pitchf.copy()
+        f0_mel = 1127.0 * np.log(1.0 + f0bak / 700.0)
+        f0_mel = np.clip(
+            (f0_mel - f0_mel_min) * 254.0 / (f0_mel_max - f0_mel_min) + 1.0, 1.0, 255.0
+        )
+        pitch_coarse = f0_mel.astype(int)
+        return pitch_coarse, pitchf
--- a/server/voice_changer/RVC/pitchExtractor/PitchExtractorManager.py
+++ b/server/voice_changer/RVC/pitchExtractor/PitchExtractorManager.py
@ -43,6 +43,9 @@ class PitchExtractorManager(Protocol):
            return RMVPEPitchExtractor(cls.params.rmvpe, gpu)
        elif pitchExtractorType == "rmvpe_onnx":
            return RMVPEOnnxPitchExtractor(cls.params.rmvpe_onnx, gpu)
+        elif pitchExtractorType == "fcpe":
+            # add the FcpePitchExtractor
+            return FcpePitchExtractor(gpu)
        else:
            # return hubert as default
            print("[Voice Changer] PitchExctractor not found", pitchExtractorType)