add sampling rate to rvc

2025-03-15 04:13:57 +03:00 · 2023-04-08 04:39:04 +09:00 · 2023-04-08 04:39:04 +09:00 · 3fefc528ab
commit 3fefc528ab
parent 36c6798f14
8 changed files with 158 additions and 19 deletions
--- a/client/demo/dist/assets/gui_settings/RVC.json
+++ b/client/demo/dist/assets/gui_settings/RVC.json
@ -51,6 +51,10 @@
                "options": {
                    "showFramework": true
                }
            },
            {
                "name": "modelSamplingRate",
                "options": {}
            }
        ],
        "deviceSetting": [
@ -160,6 +164,10 @@
            {
                "name": "trancateNumThreshold",
                "options": {}
            },
            {
                "name": "rvcQuality",
                "options": {}
            }
        ]
    },
--- a/client/demo/dist/index.js
+++ b/client/demo/dist/index.js
--- a/client/demo/public/assets/gui_settings/RVC.json
+++ b/client/demo/public/assets/gui_settings/RVC.json
@ -51,6 +51,10 @@
                "options": {
                    "showFramework": true
                }
            },
            {
                "name": "modelSamplingRate",
                "options": {}
            }
        ],
        "deviceSetting": [
@ -160,6 +164,10 @@
            {
                "name": "trancateNumThreshold",
                "options": {}
            },
            {
                "name": "rvcQuality",
                "options": {}
            }
        ]
    },
--- a/client/demo/src/components/demo/002_ComponentGenerator.tsx
+++ b/client/demo/src/components/demo/002_ComponentGenerator.tsx
@ -34,6 +34,8 @@ import { CrossFadeEndRateRow, CrossFadeEndRateRowProps } from "./components/807_
 import { DownSamplingModeRow, DownSamplingModeRowProps } from "./components/808_DownSamplingModeRow"
 import { TrancateNumTresholdRow, TrancateNumTresholdRowProps } from "./components/809_TrancateNumTresholdRow"
 import { IndexRatioRow, IndexRatioRowProps } from "./components/609_IndexRatioRow"
 import { RVCQualityRow, RVCQualityRowProps } from "./components/810_RVCQuality"
 import { ModelSamplingRateRow, ModelSamplingRateRowProps } from "./components/303_ModelSamplingRateRow"
 export const catalog: { [key: string]: (props: any) => JSX.Element } = {}
@ -61,6 +63,7 @@ const initialize = () => {
    addToCatalog("modelUploader", (props: ModelUploaderRowProps) => { return <ModelUploaderRow {...props} /> })
    addToCatalog("framework", (props: FrameworkRowProps) => { return <FrameworkRow {...props} /> })
    addToCatalog("modelSamplingRate", (props: ModelSamplingRateRowProps) => { return <ModelSamplingRateRow {...props} /> })
    addToCatalog("audioInput", (props: AudioInputRowProps) => { return <AudioInputRow {...props} /> })
@ -96,6 +99,8 @@ const initialize = () => {
    addToCatalog("crossFadeEndRate", (props: CrossFadeEndRateRowProps) => { return <CrossFadeEndRateRow {...props} /> })
    addToCatalog("downSamplingMode", (props: DownSamplingModeRowProps) => { return <DownSamplingModeRow {...props} /> })
    addToCatalog("trancateNumThreshold", (props: TrancateNumTresholdRowProps) => { return <TrancateNumTresholdRow {...props} /> })
    addToCatalog("rvcQuality", (props: RVCQualityRowProps) => { return <RVCQualityRow {...props} /> })
 }
--- a/client/demo/src/components/demo/components/303_ModelSamplingRateRow.tsx
+++ b/client/demo/src/components/demo/components/303_ModelSamplingRateRow.tsx
@ -0,0 +1,43 @@
 import React, { useMemo } from "react"
 import { fileSelector, ModelSamplingRate } from "@dannadori/voice-changer-client-js"
 import { useAppState } from "../../../001_provider/001_AppStateProvider"
 export type ModelSamplingRateRowProps = {
 }
 export const ModelSamplingRateRow = (_props: ModelSamplingRateRowProps) => {
    const appState = useAppState()
    const modelSamplingRateRow = useMemo(() => {
        const onModelSamplingRateChanged = (val: ModelSamplingRate) => {
            appState.serverSetting.updateServerSettings({
                ...appState.serverSetting.serverSetting,
                modelSamplingRate: val
            })
        }
        return (
            <div className="body-row split-3-3-4 left-padding-1 guided">
                <div className="body-item-title left-padding-2">Model Sampling Rate</div>
                <div className="body-item-text">
                    <div></div>
                </div>
                <div className="body-button-container">
                    <select className="body-select" value={appState.serverSetting.serverSetting.modelSamplingRate} onChange={(e) => {
                        onModelSamplingRateChanged(e.target.value as unknown as ModelSamplingRate)
                    }}>
                        {
                            Object.values(ModelSamplingRate).map(x => {
                                return <option key={x} value={x}>{x}</option>
                            })
                        }
                    </select>
                </div>
            </div>
        )
    }, [appState.serverSetting.serverSetting, appState.serverSetting.updateServerSettings])
    return modelSamplingRateRow
 }
--- a/client/demo/src/components/demo/components/810_RVCQuality.tsx
+++ b/client/demo/src/components/demo/components/810_RVCQuality.tsx
@ -0,0 +1,31 @@
 import React, { useMemo } from "react"
 import { useAppState } from "../../../001_provider/001_AppStateProvider"
 export type RVCQualityRowProps = {
 }
 export const RVCQualityRow = (_props: RVCQualityRowProps) => {
    const appState = useAppState()
    const trancateNumTresholdRow = useMemo(() => {
        const onRVCQualityChanged = (val: number) => {
            appState.serverSetting.updateServerSettings({
                ...appState.serverSetting.serverSetting,
                rvcQuality: val
            })
        }
        return (
            <div className="body-row split-3-7 left-padding-1 guided">
                <div className="body-item-title left-padding-1">RCV Quality</div>
                <div className="body-input-container">
                    <select value={appState.serverSetting.serverSetting.rvcQuality} onChange={(e) => { onRVCQualityChanged(Number(e.target.value)) }}>
                        <option value="0" >low</option>
                        <option value="1" >high</option>
                    </select>
                </div>
            </div>
        )
    }, [appState.serverSetting.serverSetting, appState.serverSetting.updateServerSettings])
    return trancateNumTresholdRow
 }
--- a/client/lib/src/const.ts
+++ b/client/lib/src/const.ts
@ -25,6 +25,14 @@ export const InputSampleRate = {
 } as const
 export type InputSampleRate = typeof InputSampleRate[keyof typeof InputSampleRate]
 export const ModelSamplingRate = {
    "48000": 48000,
    "40000": 40000,
    "32000": 32000
 } as const
 export type ModelSamplingRate = typeof InputSampleRate[keyof typeof InputSampleRate]
 export const CrossFadeOverlapSize = {
    "1024": 1024,
    "2048": 2048,
@ -79,6 +87,9 @@ export const ServerSettingKey = {
    "clusterInferRatio": "clusterInferRatio",
    "indexRatio": "indexRatio",
    "rvcQuality": "rvcQuality",
    "modelSamplingRate": "modelSamplingRate",
    "inputSampleRate": "inputSampleRate",
 } as const
@ -109,6 +120,8 @@ export type VoiceChangerServerSetting = {
    clusterInferRatio: number // so-vits-svc
    indexRatio: number // RVC
    rvcQuality: number // 0:low, 1:high
    modelSamplingRate: ModelSamplingRate // 32000,40000,48000
    inputSampleRate: InputSampleRate
 }
@ -147,6 +160,8 @@ export const DefaultServerSetting_MMVCv15: ServerInfo = {
    clusterInferRatio: 0,
    indexRatio: 0,
    rvcQuality: 0,
    modelSamplingRate: 48000,
    inputSampleRate: 24000,
@ -181,6 +196,9 @@ export const DefaultServerSetting_MMVCv13: ServerInfo = {
    clusterInferRatio: 0,
    indexRatio: 0,
    rvcQuality: 0,
    modelSamplingRate: 48000,
    inputSampleRate: 24000,
@ -219,6 +237,9 @@ export const DefaultServerSetting_so_vits_svc_40: ServerInfo = {
    clusterInferRatio: 0.1,
    indexRatio: 0,
    rvcQuality: 0,
    modelSamplingRate: 48000,
    inputSampleRate: 24000,
@ -257,6 +278,9 @@ export const DefaultServerSetting_so_vits_svc_40_c: ServerInfo = {
    clusterInferRatio: 0.1,
    indexRatio: 0,
    rvcQuality: 0,
    modelSamplingRate: 48000,
    inputSampleRate: 24000,
@ -294,6 +318,9 @@ export const DefaultServerSetting_so_vits_svc_40v2: ServerInfo = {
    clusterInferRatio: 0.1,
    indexRatio: 0,
    rvcQuality: 0,
    modelSamplingRate: 48000,
    inputSampleRate: 24000,
@ -333,6 +360,8 @@ export const DefaultServerSetting_RVC: ServerInfo = {
    clusterInferRatio: 0.1,
    indexRatio: 0,
    rvcQuality: 0,
    modelSamplingRate: 48000,
    inputSampleRate: 24000,
--- a/server/voice_changer/RVC/RVC.py
+++ b/server/voice_changer/RVC/RVC.py
@ -28,7 +28,7 @@ from const import HUBERT_ONNX_MODEL_PATH
 import pyworld as pw
 from vc_infer_pipeline import VC
-from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono
+from infer_pack.models import SynthesizerTrnMs256NSFsid
 from fairseq import checkpoint_utils
 providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
@ -52,14 +52,15 @@ class RVCSettings():
    configFile: str = ""
    indexRatio: float = 0
-    quality: int = 0
+    rvcQuality: int = 0
    modelSamplingRate: int = 48000
    speakers: dict[str, int] = field(
        default_factory=lambda: {}
    )
    # ↓mutableな物だけ列挙
-    intData = ["gpu", "dstId", "tran", "predictF0", "extraConvertSize", "quality"]
+    intData = ["gpu", "dstId", "tran", "predictF0", "extraConvertSize", "rvcQuality", "modelSamplingRate"]
    floatData = ["noiceScale", "silentThreshold", "indexRatio"]
    strData = ["framework", "f0Detector"]
@ -82,7 +83,6 @@ class RVC:
        self.index_file = index_file
        self.is_half = is_half
        self.tgt_sr = 40000
        try:
            hubert_path = self.params["hubert"]
            models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task([hubert_path], suffix="",)
@ -103,7 +103,7 @@ class RVC:
        # PyTorchモデル生成
        if pyTorch_model_file != None:
            cpt = torch.load(pyTorch_model_file, map_location="cpu")
-            self.tgt_sr = cpt["config"][-1]
+            self.settings.tgt_sr = cpt["config"][-1]
            net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=self.is_half)
            net_g.eval()
            net_g.load_state_dict(cpt["weight"], strict=False)
@ -113,14 +113,7 @@ class RVC:
        # ONNXモデル生成
        if onnx_model_file != None:
            # self.onnx_session = ModelWrapper(onnx_model_file, is_half=True)
            self.onnx_session = ModelWrapper(onnx_model_file, is_half=self.is_half)
            # input_info = self.onnx_session.get_inputs()
            # for i in input_info:
            #     print("input", i)
            # output_info = self.onnx_session.get_outputs()
            # for i in output_info:
            #     print("output", i)
        return self.get_info()
    def update_setteings(self, key: str, val: any):
@ -170,7 +163,7 @@ class RVC:
        return data
    def get_processing_sampling_rate(self):
-        return self.tgt_sr
+        return self.settings.tgt_sr
        # return 24000
    def generate_input(self, newData: any, inputSize: int, crossfadeSize: int):
@ -215,13 +208,13 @@ class RVC:
        convertSize = data[1]
        vol = data[2]
-        audio = resampy.resample(audio, self.tgt_sr, 16000)
+        audio = resampy.resample(audio, self.settings.tgt_sr, 16000)
        if vol < self.settings.silentThreshold:
            return np.zeros(convertSize).astype(np.int16)
        with torch.no_grad():
-            vc = VC(self.tgt_sr, dev, self.is_half)
+            vc = VC(self.settings.tgt_sr, dev, self.is_half)
            sid = 0
            times = [0, 0, 0]
            f0_up_key = self.settings.tran
@ -255,14 +248,14 @@ class RVC:
        convertSize = data[1]
        vol = data[2]
        print("audio len 02,", len(audio))
-        audio = resampy.resample(audio, self.tgt_sr, 16000)
+        audio = resampy.resample(audio, self.settings.tgt_sr, 16000)
        print("audio len 03,", len(audio))
        if vol < self.settings.silentThreshold:
            return np.zeros(convertSize).astype(np.int16)
        with torch.no_grad():
-            vc = VC(self.tgt_sr, dev, self.is_half)
+            vc = VC(self.settings.tgt_sr, dev, self.is_half)
            sid = 0
            times = [0, 0, 0]
            f0_up_key = self.settings.tran