add sampling rate to rvc

This commit is contained in:
wataru 2023-04-08 04:39:04 +09:00
parent 36c6798f14
commit 3fefc528ab
8 changed files with 158 additions and 19 deletions

View File

@ -51,6 +51,10 @@
"options": {
"showFramework": true
}
},
{
"name": "modelSamplingRate",
"options": {}
}
],
"deviceSetting": [
@ -160,6 +164,10 @@
{
"name": "trancateNumThreshold",
"options": {}
},
{
"name": "rvcQuality",
"options": {}
}
]
},

File diff suppressed because one or more lines are too long

View File

@ -51,6 +51,10 @@
"options": {
"showFramework": true
}
},
{
"name": "modelSamplingRate",
"options": {}
}
],
"deviceSetting": [
@ -160,6 +164,10 @@
{
"name": "trancateNumThreshold",
"options": {}
},
{
"name": "rvcQuality",
"options": {}
}
]
},

View File

@ -34,6 +34,8 @@ import { CrossFadeEndRateRow, CrossFadeEndRateRowProps } from "./components/807_
import { DownSamplingModeRow, DownSamplingModeRowProps } from "./components/808_DownSamplingModeRow"
import { TrancateNumTresholdRow, TrancateNumTresholdRowProps } from "./components/809_TrancateNumTresholdRow"
import { IndexRatioRow, IndexRatioRowProps } from "./components/609_IndexRatioRow"
import { RVCQualityRow, RVCQualityRowProps } from "./components/810_RVCQuality"
import { ModelSamplingRateRow, ModelSamplingRateRowProps } from "./components/303_ModelSamplingRateRow"
export const catalog: { [key: string]: (props: any) => JSX.Element } = {}
@ -61,6 +63,7 @@ const initialize = () => {
addToCatalog("modelUploader", (props: ModelUploaderRowProps) => { return <ModelUploaderRow {...props} /> })
addToCatalog("framework", (props: FrameworkRowProps) => { return <FrameworkRow {...props} /> })
addToCatalog("modelSamplingRate", (props: ModelSamplingRateRowProps) => { return <ModelSamplingRateRow {...props} /> })
addToCatalog("audioInput", (props: AudioInputRowProps) => { return <AudioInputRow {...props} /> })
@ -96,6 +99,8 @@ const initialize = () => {
addToCatalog("crossFadeEndRate", (props: CrossFadeEndRateRowProps) => { return <CrossFadeEndRateRow {...props} /> })
addToCatalog("downSamplingMode", (props: DownSamplingModeRowProps) => { return <DownSamplingModeRow {...props} /> })
addToCatalog("trancateNumThreshold", (props: TrancateNumTresholdRowProps) => { return <TrancateNumTresholdRow {...props} /> })
addToCatalog("rvcQuality", (props: RVCQualityRowProps) => { return <RVCQualityRow {...props} /> })
}

View File

@ -0,0 +1,43 @@
import React, { useMemo } from "react"
import { fileSelector, ModelSamplingRate } from "@dannadori/voice-changer-client-js"
import { useAppState } from "../../../001_provider/001_AppStateProvider"
export type ModelSamplingRateRowProps = {
}
export const ModelSamplingRateRow = (_props: ModelSamplingRateRowProps) => {
const appState = useAppState()
const modelSamplingRateRow = useMemo(() => {
const onModelSamplingRateChanged = (val: ModelSamplingRate) => {
appState.serverSetting.updateServerSettings({
...appState.serverSetting.serverSetting,
modelSamplingRate: val
})
}
return (
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title left-padding-2">Model Sampling Rate</div>
<div className="body-item-text">
<div></div>
</div>
<div className="body-button-container">
<select className="body-select" value={appState.serverSetting.serverSetting.modelSamplingRate} onChange={(e) => {
onModelSamplingRateChanged(e.target.value as unknown as ModelSamplingRate)
}}>
{
Object.values(ModelSamplingRate).map(x => {
return <option key={x} value={x}>{x}</option>
})
}
</select>
</div>
</div>
)
}, [appState.serverSetting.serverSetting, appState.serverSetting.updateServerSettings])
return modelSamplingRateRow
}

View File

@ -0,0 +1,31 @@
import React, { useMemo } from "react"
import { useAppState } from "../../../001_provider/001_AppStateProvider"
export type RVCQualityRowProps = {
}
export const RVCQualityRow = (_props: RVCQualityRowProps) => {
const appState = useAppState()
const trancateNumTresholdRow = useMemo(() => {
const onRVCQualityChanged = (val: number) => {
appState.serverSetting.updateServerSettings({
...appState.serverSetting.serverSetting,
rvcQuality: val
})
}
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">RCV Quality</div>
<div className="body-input-container">
<select value={appState.serverSetting.serverSetting.rvcQuality} onChange={(e) => { onRVCQualityChanged(Number(e.target.value)) }}>
<option value="0" >low</option>
<option value="1" >high</option>
</select>
</div>
</div>
)
}, [appState.serverSetting.serverSetting, appState.serverSetting.updateServerSettings])
return trancateNumTresholdRow
}

View File

@ -25,6 +25,14 @@ export const InputSampleRate = {
} as const
export type InputSampleRate = typeof InputSampleRate[keyof typeof InputSampleRate]
export const ModelSamplingRate = {
"48000": 48000,
"40000": 40000,
"32000": 32000
} as const
export type ModelSamplingRate = typeof InputSampleRate[keyof typeof InputSampleRate]
export const CrossFadeOverlapSize = {
"1024": 1024,
"2048": 2048,
@ -79,6 +87,9 @@ export const ServerSettingKey = {
"clusterInferRatio": "clusterInferRatio",
"indexRatio": "indexRatio",
"rvcQuality": "rvcQuality",
"modelSamplingRate": "modelSamplingRate",
"inputSampleRate": "inputSampleRate",
} as const
@ -109,6 +120,8 @@ export type VoiceChangerServerSetting = {
clusterInferRatio: number // so-vits-svc
indexRatio: number // RVC
rvcQuality: number // 0:low, 1:high
modelSamplingRate: ModelSamplingRate // 32000,40000,48000
inputSampleRate: InputSampleRate
}
@ -147,6 +160,8 @@ export const DefaultServerSetting_MMVCv15: ServerInfo = {
clusterInferRatio: 0,
indexRatio: 0,
rvcQuality: 0,
modelSamplingRate: 48000,
inputSampleRate: 24000,
@ -181,6 +196,9 @@ export const DefaultServerSetting_MMVCv13: ServerInfo = {
clusterInferRatio: 0,
indexRatio: 0,
rvcQuality: 0,
modelSamplingRate: 48000,
inputSampleRate: 24000,
@ -219,6 +237,9 @@ export const DefaultServerSetting_so_vits_svc_40: ServerInfo = {
clusterInferRatio: 0.1,
indexRatio: 0,
rvcQuality: 0,
modelSamplingRate: 48000,
inputSampleRate: 24000,
@ -257,6 +278,9 @@ export const DefaultServerSetting_so_vits_svc_40_c: ServerInfo = {
clusterInferRatio: 0.1,
indexRatio: 0,
rvcQuality: 0,
modelSamplingRate: 48000,
inputSampleRate: 24000,
@ -294,6 +318,9 @@ export const DefaultServerSetting_so_vits_svc_40v2: ServerInfo = {
clusterInferRatio: 0.1,
indexRatio: 0,
rvcQuality: 0,
modelSamplingRate: 48000,
inputSampleRate: 24000,
@ -333,6 +360,8 @@ export const DefaultServerSetting_RVC: ServerInfo = {
clusterInferRatio: 0.1,
indexRatio: 0,
rvcQuality: 0,
modelSamplingRate: 48000,
inputSampleRate: 24000,

View File

@ -28,7 +28,7 @@ from const import HUBERT_ONNX_MODEL_PATH
import pyworld as pw
from vc_infer_pipeline import VC
from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono
from infer_pack.models import SynthesizerTrnMs256NSFsid
from fairseq import checkpoint_utils
providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
@ -52,14 +52,15 @@ class RVCSettings():
configFile: str = ""
indexRatio: float = 0
quality: int = 0
rvcQuality: int = 0
modelSamplingRate: int = 48000
speakers: dict[str, int] = field(
default_factory=lambda: {}
)
# ↓mutableな物だけ列挙
intData = ["gpu", "dstId", "tran", "predictF0", "extraConvertSize", "quality"]
intData = ["gpu", "dstId", "tran", "predictF0", "extraConvertSize", "rvcQuality", "modelSamplingRate"]
floatData = ["noiceScale", "silentThreshold", "indexRatio"]
strData = ["framework", "f0Detector"]
@ -82,7 +83,6 @@ class RVC:
self.index_file = index_file
self.is_half = is_half
self.tgt_sr = 40000
try:
hubert_path = self.params["hubert"]
models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task([hubert_path], suffix="",)
@ -103,7 +103,7 @@ class RVC:
# PyTorchモデル生成
if pyTorch_model_file != None:
cpt = torch.load(pyTorch_model_file, map_location="cpu")
self.tgt_sr = cpt["config"][-1]
self.settings.tgt_sr = cpt["config"][-1]
net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=self.is_half)
net_g.eval()
net_g.load_state_dict(cpt["weight"], strict=False)
@ -113,14 +113,7 @@ class RVC:
# ONNXモデル生成
if onnx_model_file != None:
# self.onnx_session = ModelWrapper(onnx_model_file, is_half=True)
self.onnx_session = ModelWrapper(onnx_model_file, is_half=self.is_half)
# input_info = self.onnx_session.get_inputs()
# for i in input_info:
# print("input", i)
# output_info = self.onnx_session.get_outputs()
# for i in output_info:
# print("output", i)
return self.get_info()
def update_setteings(self, key: str, val: any):
@ -170,7 +163,7 @@ class RVC:
return data
def get_processing_sampling_rate(self):
return self.tgt_sr
return self.settings.tgt_sr
# return 24000
def generate_input(self, newData: any, inputSize: int, crossfadeSize: int):
@ -215,13 +208,13 @@ class RVC:
convertSize = data[1]
vol = data[2]
audio = resampy.resample(audio, self.tgt_sr, 16000)
audio = resampy.resample(audio, self.settings.tgt_sr, 16000)
if vol < self.settings.silentThreshold:
return np.zeros(convertSize).astype(np.int16)
with torch.no_grad():
vc = VC(self.tgt_sr, dev, self.is_half)
vc = VC(self.settings.tgt_sr, dev, self.is_half)
sid = 0
times = [0, 0, 0]
f0_up_key = self.settings.tran
@ -255,14 +248,14 @@ class RVC:
convertSize = data[1]
vol = data[2]
print("audio len 02,", len(audio))
audio = resampy.resample(audio, self.tgt_sr, 16000)
audio = resampy.resample(audio, self.settings.tgt_sr, 16000)
print("audio len 03,", len(audio))
if vol < self.settings.silentThreshold:
return np.zeros(convertSize).astype(np.int16)
with torch.no_grad():
vc = VC(self.tgt_sr, dev, self.is_half)
vc = VC(self.settings.tgt_sr, dev, self.is_half)
sid = 0
times = [0, 0, 0]
f0_up_key = self.settings.tran