add sampling rate to rvc

2025-01-23 13:35:12 +03:00 · 2023-04-08 04:39:04 +09:00 · 2023-04-08 04:39:04 +09:00 · 3fefc528ab
commit 3fefc528ab
parent 36c6798f14
8 changed files with 158 additions and 19 deletions
--- a/client/demo/dist/assets/gui_settings/RVC.json
+++ b/client/demo/dist/assets/gui_settings/RVC.json
@ -51,6 +51,10 @@
                "options": {
                    "showFramework": true
                }
+            },
+            {
+                "name": "modelSamplingRate",
+                "options": {}
            }
        ],
        "deviceSetting": [
@ -160,6 +164,10 @@
            {
                "name": "trancateNumThreshold",
                "options": {}
+            },
+            {
+                "name": "rvcQuality",
+                "options": {}
            }
        ]
    },
--- a/client/demo/dist/index.js
+++ b/client/demo/dist/index.js
--- a/client/demo/public/assets/gui_settings/RVC.json
+++ b/client/demo/public/assets/gui_settings/RVC.json
@ -51,6 +51,10 @@
                "options": {
                    "showFramework": true
                }
+            },
+            {
+                "name": "modelSamplingRate",
+                "options": {}
            }
        ],
        "deviceSetting": [
@ -160,6 +164,10 @@
            {
                "name": "trancateNumThreshold",
                "options": {}
+            },
+            {
+                "name": "rvcQuality",
+                "options": {}
            }
        ]
    },
--- a/client/demo/src/components/demo/002_ComponentGenerator.tsx
+++ b/client/demo/src/components/demo/002_ComponentGenerator.tsx
@ -34,6 +34,8 @@ import { CrossFadeEndRateRow, CrossFadeEndRateRowProps } from "./components/807_
 import { DownSamplingModeRow, DownSamplingModeRowProps } from "./components/808_DownSamplingModeRow"
 import { TrancateNumTresholdRow, TrancateNumTresholdRowProps } from "./components/809_TrancateNumTresholdRow"
 import { IndexRatioRow, IndexRatioRowProps } from "./components/609_IndexRatioRow"
+import { RVCQualityRow, RVCQualityRowProps } from "./components/810_RVCQuality"
+import { ModelSamplingRateRow, ModelSamplingRateRowProps } from "./components/303_ModelSamplingRateRow"

 export const catalog: { [key: string]: (props: any) => JSX.Element } = {}

@ -61,6 +63,7 @@ const initialize = () => {

    addToCatalog("modelUploader", (props: ModelUploaderRowProps) => { return <ModelUploaderRow {...props} /> })
    addToCatalog("framework", (props: FrameworkRowProps) => { return <FrameworkRow {...props} /> })
+    addToCatalog("modelSamplingRate", (props: ModelSamplingRateRowProps) => { return <ModelSamplingRateRow {...props} /> })


    addToCatalog("audioInput", (props: AudioInputRowProps) => { return <AudioInputRow {...props} /> })
@ -96,6 +99,8 @@ const initialize = () => {
    addToCatalog("crossFadeEndRate", (props: CrossFadeEndRateRowProps) => { return <CrossFadeEndRateRow {...props} /> })
    addToCatalog("downSamplingMode", (props: DownSamplingModeRowProps) => { return <DownSamplingModeRow {...props} /> })
    addToCatalog("trancateNumThreshold", (props: TrancateNumTresholdRowProps) => { return <TrancateNumTresholdRow {...props} /> })
+    addToCatalog("rvcQuality", (props: RVCQualityRowProps) => { return <RVCQualityRow {...props} /> })
+


 }
--- a/client/demo/src/components/demo/components/303_ModelSamplingRateRow.tsx
+++ b/client/demo/src/components/demo/components/303_ModelSamplingRateRow.tsx
@ -0,0 +1,43 @@
+import React, { useMemo } from "react"
+import { fileSelector, ModelSamplingRate } from "@dannadori/voice-changer-client-js"
+import { useAppState } from "../../../001_provider/001_AppStateProvider"
+
+export type ModelSamplingRateRowProps = {
+}
+
+export const ModelSamplingRateRow = (_props: ModelSamplingRateRowProps) => {
+    const appState = useAppState()
+
+    const modelSamplingRateRow = useMemo(() => {
+        const onModelSamplingRateChanged = (val: ModelSamplingRate) => {
+            appState.serverSetting.updateServerSettings({
+                ...appState.serverSetting.serverSetting,
+                modelSamplingRate: val
+            })
+        }
+
+        return (
+            <div className="body-row split-3-3-4 left-padding-1 guided">
+                <div className="body-item-title left-padding-2">Model Sampling Rate</div>
+                <div className="body-item-text">
+                    <div></div>
+                </div>
+                <div className="body-button-container">
+                    <select className="body-select" value={appState.serverSetting.serverSetting.modelSamplingRate} onChange={(e) => {
+                        onModelSamplingRateChanged(e.target.value as unknown as ModelSamplingRate)
+                    }}>
+                        {
+                            Object.values(ModelSamplingRate).map(x => {
+                                return <option key={x} value={x}>{x}</option>
+                            })
+                        }
+                    </select>
+
+
+                </div>
+            </div>
+        )
+    }, [appState.serverSetting.serverSetting, appState.serverSetting.updateServerSettings])
+
+    return modelSamplingRateRow
+}
--- a/client/demo/src/components/demo/components/810_RVCQuality.tsx
+++ b/client/demo/src/components/demo/components/810_RVCQuality.tsx
@ -0,0 +1,31 @@
+import React, { useMemo } from "react"
+import { useAppState } from "../../../001_provider/001_AppStateProvider"
+
+export type RVCQualityRowProps = {
+}
+
+export const RVCQualityRow = (_props: RVCQualityRowProps) => {
+    const appState = useAppState()
+
+    const trancateNumTresholdRow = useMemo(() => {
+        const onRVCQualityChanged = (val: number) => {
+            appState.serverSetting.updateServerSettings({
+                ...appState.serverSetting.serverSetting,
+                rvcQuality: val
+            })
+        }
+        return (
+            <div className="body-row split-3-7 left-padding-1 guided">
+                <div className="body-item-title left-padding-1">RCV Quality</div>
+                <div className="body-input-container">
+                    <select value={appState.serverSetting.serverSetting.rvcQuality} onChange={(e) => { onRVCQualityChanged(Number(e.target.value)) }}>
+                        <option value="0" >low</option>
+                        <option value="1" >high</option>
+                    </select>
+                </div>
+            </div>
+        )
+    }, [appState.serverSetting.serverSetting, appState.serverSetting.updateServerSettings])
+
+    return trancateNumTresholdRow
+}
--- a/client/lib/src/const.ts
+++ b/client/lib/src/const.ts
@ -25,6 +25,14 @@ export const InputSampleRate = {
 } as const
 export type InputSampleRate = typeof InputSampleRate[keyof typeof InputSampleRate]

+export const ModelSamplingRate = {
+    "48000": 48000,
+    "40000": 40000,
+    "32000": 32000
+} as const
+export type ModelSamplingRate = typeof InputSampleRate[keyof typeof InputSampleRate]
+
+
 export const CrossFadeOverlapSize = {
    "1024": 1024,
    "2048": 2048,
@ -79,6 +87,9 @@ export const ServerSettingKey = {
    "clusterInferRatio": "clusterInferRatio",

    "indexRatio": "indexRatio",
+    "rvcQuality": "rvcQuality",
+    "modelSamplingRate": "modelSamplingRate",
+

    "inputSampleRate": "inputSampleRate",
 } as const
@ -109,6 +120,8 @@ export type VoiceChangerServerSetting = {
    clusterInferRatio: number // so-vits-svc

    indexRatio: number // RVC
+    rvcQuality: number // 0:low, 1:high
+    modelSamplingRate: ModelSamplingRate // 32000,40000,48000

    inputSampleRate: InputSampleRate
 }
@ -147,6 +160,8 @@ export const DefaultServerSetting_MMVCv15: ServerInfo = {
    clusterInferRatio: 0,

    indexRatio: 0,
+    rvcQuality: 0,
+    modelSamplingRate: 48000,

    inputSampleRate: 24000,

@ -181,6 +196,9 @@ export const DefaultServerSetting_MMVCv13: ServerInfo = {
    clusterInferRatio: 0,

    indexRatio: 0,
+    rvcQuality: 0,
+    modelSamplingRate: 48000,
+

    inputSampleRate: 24000,

@ -219,6 +237,9 @@ export const DefaultServerSetting_so_vits_svc_40: ServerInfo = {
    clusterInferRatio: 0.1,

    indexRatio: 0,
+    rvcQuality: 0,
+    modelSamplingRate: 48000,
+

    inputSampleRate: 24000,

@ -257,6 +278,9 @@ export const DefaultServerSetting_so_vits_svc_40_c: ServerInfo = {
    clusterInferRatio: 0.1,

    indexRatio: 0,
+    rvcQuality: 0,
+    modelSamplingRate: 48000,
+

    inputSampleRate: 24000,

@ -294,6 +318,9 @@ export const DefaultServerSetting_so_vits_svc_40v2: ServerInfo = {
    clusterInferRatio: 0.1,

    indexRatio: 0,
+    rvcQuality: 0,
+    modelSamplingRate: 48000,
+

    inputSampleRate: 24000,

@ -333,6 +360,8 @@ export const DefaultServerSetting_RVC: ServerInfo = {
    clusterInferRatio: 0.1,

    indexRatio: 0,
+    rvcQuality: 0,
+    modelSamplingRate: 48000,

    inputSampleRate: 24000,

--- a/server/voice_changer/RVC/RVC.py
+++ b/server/voice_changer/RVC/RVC.py
@ -28,7 +28,7 @@ from const import HUBERT_ONNX_MODEL_PATH
 import pyworld as pw

 from vc_infer_pipeline import VC
-from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono
+from infer_pack.models import SynthesizerTrnMs256NSFsid
 from fairseq import checkpoint_utils
 providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]

@ -52,14 +52,15 @@ class RVCSettings():
    configFile: str = ""

    indexRatio: float = 0
-    quality: int = 0
+    rvcQuality: int = 0
+    modelSamplingRate: int = 48000

    speakers: dict[str, int] = field(
        default_factory=lambda: {}
    )

    # ↓mutableな物だけ列挙
-    intData = ["gpu", "dstId", "tran", "predictF0", "extraConvertSize", "quality"]
+    intData = ["gpu", "dstId", "tran", "predictF0", "extraConvertSize", "rvcQuality", "modelSamplingRate"]
    floatData = ["noiceScale", "silentThreshold", "indexRatio"]
    strData = ["framework", "f0Detector"]

@ -82,7 +83,6 @@ class RVC:
        self.index_file = index_file
        self.is_half = is_half

-        self.tgt_sr = 40000
        try:
            hubert_path = self.params["hubert"]
            models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task([hubert_path], suffix="",)
@ -103,7 +103,7 @@ class RVC:
        # PyTorchモデル生成
        if pyTorch_model_file != None:
            cpt = torch.load(pyTorch_model_file, map_location="cpu")
-            self.tgt_sr = cpt["config"][-1]
+            self.settings.tgt_sr = cpt["config"][-1]
            net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=self.is_half)
            net_g.eval()
            net_g.load_state_dict(cpt["weight"], strict=False)
@ -113,14 +113,7 @@ class RVC:

        # ONNXモデル生成
        if onnx_model_file != None:
-            # self.onnx_session = ModelWrapper(onnx_model_file, is_half=True)
            self.onnx_session = ModelWrapper(onnx_model_file, is_half=self.is_half)
-            # input_info = self.onnx_session.get_inputs()
-            # for i in input_info:
-            #     print("input", i)
-            # output_info = self.onnx_session.get_outputs()
-            # for i in output_info:
-            #     print("output", i)
        return self.get_info()

    def update_setteings(self, key: str, val: any):
@ -170,7 +163,7 @@ class RVC:
        return data

    def get_processing_sampling_rate(self):
-        return self.tgt_sr
+        return self.settings.tgt_sr
        # return 24000

    def generate_input(self, newData: any, inputSize: int, crossfadeSize: int):
@ -215,13 +208,13 @@ class RVC:
        convertSize = data[1]
        vol = data[2]

-        audio = resampy.resample(audio, self.tgt_sr, 16000)
+        audio = resampy.resample(audio, self.settings.tgt_sr, 16000)

        if vol < self.settings.silentThreshold:
            return np.zeros(convertSize).astype(np.int16)

        with torch.no_grad():
-            vc = VC(self.tgt_sr, dev, self.is_half)
+            vc = VC(self.settings.tgt_sr, dev, self.is_half)
            sid = 0
            times = [0, 0, 0]
            f0_up_key = self.settings.tran
@ -255,14 +248,14 @@ class RVC:
        convertSize = data[1]
        vol = data[2]
        print("audio len 02,", len(audio))
-        audio = resampy.resample(audio, self.tgt_sr, 16000)
+        audio = resampy.resample(audio, self.settings.tgt_sr, 16000)
        print("audio len 03,", len(audio))

        if vol < self.settings.silentThreshold:
            return np.zeros(convertSize).astype(np.int16)

        with torch.no_grad():
-            vc = VC(self.tgt_sr, dev, self.is_half)
+            vc = VC(self.settings.tgt_sr, dev, self.is_half)
            sid = 0
            times = [0, 0, 0]
            f0_up_key = self.settings.tran