mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-01-23 13:35:12 +03:00
add sampling rate to rvc
This commit is contained in:
parent
36c6798f14
commit
3fefc528ab
@ -51,6 +51,10 @@
|
||||
"options": {
|
||||
"showFramework": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "modelSamplingRate",
|
||||
"options": {}
|
||||
}
|
||||
],
|
||||
"deviceSetting": [
|
||||
@ -160,6 +164,10 @@
|
||||
{
|
||||
"name": "trancateNumThreshold",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "rvcQuality",
|
||||
"options": {}
|
||||
}
|
||||
]
|
||||
},
|
||||
|
26
client/demo/dist/index.js
vendored
26
client/demo/dist/index.js
vendored
File diff suppressed because one or more lines are too long
@ -51,6 +51,10 @@
|
||||
"options": {
|
||||
"showFramework": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "modelSamplingRate",
|
||||
"options": {}
|
||||
}
|
||||
],
|
||||
"deviceSetting": [
|
||||
@ -160,6 +164,10 @@
|
||||
{
|
||||
"name": "trancateNumThreshold",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "rvcQuality",
|
||||
"options": {}
|
||||
}
|
||||
]
|
||||
},
|
||||
|
@ -34,6 +34,8 @@ import { CrossFadeEndRateRow, CrossFadeEndRateRowProps } from "./components/807_
|
||||
import { DownSamplingModeRow, DownSamplingModeRowProps } from "./components/808_DownSamplingModeRow"
|
||||
import { TrancateNumTresholdRow, TrancateNumTresholdRowProps } from "./components/809_TrancateNumTresholdRow"
|
||||
import { IndexRatioRow, IndexRatioRowProps } from "./components/609_IndexRatioRow"
|
||||
import { RVCQualityRow, RVCQualityRowProps } from "./components/810_RVCQuality"
|
||||
import { ModelSamplingRateRow, ModelSamplingRateRowProps } from "./components/303_ModelSamplingRateRow"
|
||||
|
||||
export const catalog: { [key: string]: (props: any) => JSX.Element } = {}
|
||||
|
||||
@ -61,6 +63,7 @@ const initialize = () => {
|
||||
|
||||
addToCatalog("modelUploader", (props: ModelUploaderRowProps) => { return <ModelUploaderRow {...props} /> })
|
||||
addToCatalog("framework", (props: FrameworkRowProps) => { return <FrameworkRow {...props} /> })
|
||||
addToCatalog("modelSamplingRate", (props: ModelSamplingRateRowProps) => { return <ModelSamplingRateRow {...props} /> })
|
||||
|
||||
|
||||
addToCatalog("audioInput", (props: AudioInputRowProps) => { return <AudioInputRow {...props} /> })
|
||||
@ -96,6 +99,8 @@ const initialize = () => {
|
||||
addToCatalog("crossFadeEndRate", (props: CrossFadeEndRateRowProps) => { return <CrossFadeEndRateRow {...props} /> })
|
||||
addToCatalog("downSamplingMode", (props: DownSamplingModeRowProps) => { return <DownSamplingModeRow {...props} /> })
|
||||
addToCatalog("trancateNumThreshold", (props: TrancateNumTresholdRowProps) => { return <TrancateNumTresholdRow {...props} /> })
|
||||
addToCatalog("rvcQuality", (props: RVCQualityRowProps) => { return <RVCQualityRow {...props} /> })
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,43 @@
|
||||
import React, { useMemo } from "react"
|
||||
import { fileSelector, ModelSamplingRate } from "@dannadori/voice-changer-client-js"
|
||||
import { useAppState } from "../../../001_provider/001_AppStateProvider"
|
||||
|
||||
export type ModelSamplingRateRowProps = {
|
||||
}
|
||||
|
||||
export const ModelSamplingRateRow = (_props: ModelSamplingRateRowProps) => {
|
||||
const appState = useAppState()
|
||||
|
||||
const modelSamplingRateRow = useMemo(() => {
|
||||
const onModelSamplingRateChanged = (val: ModelSamplingRate) => {
|
||||
appState.serverSetting.updateServerSettings({
|
||||
...appState.serverSetting.serverSetting,
|
||||
modelSamplingRate: val
|
||||
})
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="body-row split-3-3-4 left-padding-1 guided">
|
||||
<div className="body-item-title left-padding-2">Model Sampling Rate</div>
|
||||
<div className="body-item-text">
|
||||
<div></div>
|
||||
</div>
|
||||
<div className="body-button-container">
|
||||
<select className="body-select" value={appState.serverSetting.serverSetting.modelSamplingRate} onChange={(e) => {
|
||||
onModelSamplingRateChanged(e.target.value as unknown as ModelSamplingRate)
|
||||
}}>
|
||||
{
|
||||
Object.values(ModelSamplingRate).map(x => {
|
||||
return <option key={x} value={x}>{x}</option>
|
||||
})
|
||||
}
|
||||
</select>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}, [appState.serverSetting.serverSetting, appState.serverSetting.updateServerSettings])
|
||||
|
||||
return modelSamplingRateRow
|
||||
}
|
@ -0,0 +1,31 @@
|
||||
import React, { useMemo } from "react"
|
||||
import { useAppState } from "../../../001_provider/001_AppStateProvider"
|
||||
|
||||
export type RVCQualityRowProps = {
|
||||
}
|
||||
|
||||
export const RVCQualityRow = (_props: RVCQualityRowProps) => {
|
||||
const appState = useAppState()
|
||||
|
||||
const trancateNumTresholdRow = useMemo(() => {
|
||||
const onRVCQualityChanged = (val: number) => {
|
||||
appState.serverSetting.updateServerSettings({
|
||||
...appState.serverSetting.serverSetting,
|
||||
rvcQuality: val
|
||||
})
|
||||
}
|
||||
return (
|
||||
<div className="body-row split-3-7 left-padding-1 guided">
|
||||
<div className="body-item-title left-padding-1">RCV Quality</div>
|
||||
<div className="body-input-container">
|
||||
<select value={appState.serverSetting.serverSetting.rvcQuality} onChange={(e) => { onRVCQualityChanged(Number(e.target.value)) }}>
|
||||
<option value="0" >low</option>
|
||||
<option value="1" >high</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}, [appState.serverSetting.serverSetting, appState.serverSetting.updateServerSettings])
|
||||
|
||||
return trancateNumTresholdRow
|
||||
}
|
@ -25,6 +25,14 @@ export const InputSampleRate = {
|
||||
} as const
|
||||
export type InputSampleRate = typeof InputSampleRate[keyof typeof InputSampleRate]
|
||||
|
||||
export const ModelSamplingRate = {
|
||||
"48000": 48000,
|
||||
"40000": 40000,
|
||||
"32000": 32000
|
||||
} as const
|
||||
export type ModelSamplingRate = typeof InputSampleRate[keyof typeof InputSampleRate]
|
||||
|
||||
|
||||
export const CrossFadeOverlapSize = {
|
||||
"1024": 1024,
|
||||
"2048": 2048,
|
||||
@ -79,6 +87,9 @@ export const ServerSettingKey = {
|
||||
"clusterInferRatio": "clusterInferRatio",
|
||||
|
||||
"indexRatio": "indexRatio",
|
||||
"rvcQuality": "rvcQuality",
|
||||
"modelSamplingRate": "modelSamplingRate",
|
||||
|
||||
|
||||
"inputSampleRate": "inputSampleRate",
|
||||
} as const
|
||||
@ -109,6 +120,8 @@ export type VoiceChangerServerSetting = {
|
||||
clusterInferRatio: number // so-vits-svc
|
||||
|
||||
indexRatio: number // RVC
|
||||
rvcQuality: number // 0:low, 1:high
|
||||
modelSamplingRate: ModelSamplingRate // 32000,40000,48000
|
||||
|
||||
inputSampleRate: InputSampleRate
|
||||
}
|
||||
@ -147,6 +160,8 @@ export const DefaultServerSetting_MMVCv15: ServerInfo = {
|
||||
clusterInferRatio: 0,
|
||||
|
||||
indexRatio: 0,
|
||||
rvcQuality: 0,
|
||||
modelSamplingRate: 48000,
|
||||
|
||||
inputSampleRate: 24000,
|
||||
|
||||
@ -181,6 +196,9 @@ export const DefaultServerSetting_MMVCv13: ServerInfo = {
|
||||
clusterInferRatio: 0,
|
||||
|
||||
indexRatio: 0,
|
||||
rvcQuality: 0,
|
||||
modelSamplingRate: 48000,
|
||||
|
||||
|
||||
inputSampleRate: 24000,
|
||||
|
||||
@ -219,6 +237,9 @@ export const DefaultServerSetting_so_vits_svc_40: ServerInfo = {
|
||||
clusterInferRatio: 0.1,
|
||||
|
||||
indexRatio: 0,
|
||||
rvcQuality: 0,
|
||||
modelSamplingRate: 48000,
|
||||
|
||||
|
||||
inputSampleRate: 24000,
|
||||
|
||||
@ -257,6 +278,9 @@ export const DefaultServerSetting_so_vits_svc_40_c: ServerInfo = {
|
||||
clusterInferRatio: 0.1,
|
||||
|
||||
indexRatio: 0,
|
||||
rvcQuality: 0,
|
||||
modelSamplingRate: 48000,
|
||||
|
||||
|
||||
inputSampleRate: 24000,
|
||||
|
||||
@ -294,6 +318,9 @@ export const DefaultServerSetting_so_vits_svc_40v2: ServerInfo = {
|
||||
clusterInferRatio: 0.1,
|
||||
|
||||
indexRatio: 0,
|
||||
rvcQuality: 0,
|
||||
modelSamplingRate: 48000,
|
||||
|
||||
|
||||
inputSampleRate: 24000,
|
||||
|
||||
@ -333,6 +360,8 @@ export const DefaultServerSetting_RVC: ServerInfo = {
|
||||
clusterInferRatio: 0.1,
|
||||
|
||||
indexRatio: 0,
|
||||
rvcQuality: 0,
|
||||
modelSamplingRate: 48000,
|
||||
|
||||
inputSampleRate: 24000,
|
||||
|
||||
|
@ -28,7 +28,7 @@ from const import HUBERT_ONNX_MODEL_PATH
|
||||
import pyworld as pw
|
||||
|
||||
from vc_infer_pipeline import VC
|
||||
from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono
|
||||
from infer_pack.models import SynthesizerTrnMs256NSFsid
|
||||
from fairseq import checkpoint_utils
|
||||
providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
|
||||
|
||||
@ -52,14 +52,15 @@ class RVCSettings():
|
||||
configFile: str = ""
|
||||
|
||||
indexRatio: float = 0
|
||||
quality: int = 0
|
||||
rvcQuality: int = 0
|
||||
modelSamplingRate: int = 48000
|
||||
|
||||
speakers: dict[str, int] = field(
|
||||
default_factory=lambda: {}
|
||||
)
|
||||
|
||||
# ↓mutableな物だけ列挙
|
||||
intData = ["gpu", "dstId", "tran", "predictF0", "extraConvertSize", "quality"]
|
||||
intData = ["gpu", "dstId", "tran", "predictF0", "extraConvertSize", "rvcQuality", "modelSamplingRate"]
|
||||
floatData = ["noiceScale", "silentThreshold", "indexRatio"]
|
||||
strData = ["framework", "f0Detector"]
|
||||
|
||||
@ -82,7 +83,6 @@ class RVC:
|
||||
self.index_file = index_file
|
||||
self.is_half = is_half
|
||||
|
||||
self.tgt_sr = 40000
|
||||
try:
|
||||
hubert_path = self.params["hubert"]
|
||||
models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task([hubert_path], suffix="",)
|
||||
@ -103,7 +103,7 @@ class RVC:
|
||||
# PyTorchモデル生成
|
||||
if pyTorch_model_file != None:
|
||||
cpt = torch.load(pyTorch_model_file, map_location="cpu")
|
||||
self.tgt_sr = cpt["config"][-1]
|
||||
self.settings.tgt_sr = cpt["config"][-1]
|
||||
net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=self.is_half)
|
||||
net_g.eval()
|
||||
net_g.load_state_dict(cpt["weight"], strict=False)
|
||||
@ -113,14 +113,7 @@ class RVC:
|
||||
|
||||
# ONNXモデル生成
|
||||
if onnx_model_file != None:
|
||||
# self.onnx_session = ModelWrapper(onnx_model_file, is_half=True)
|
||||
self.onnx_session = ModelWrapper(onnx_model_file, is_half=self.is_half)
|
||||
# input_info = self.onnx_session.get_inputs()
|
||||
# for i in input_info:
|
||||
# print("input", i)
|
||||
# output_info = self.onnx_session.get_outputs()
|
||||
# for i in output_info:
|
||||
# print("output", i)
|
||||
return self.get_info()
|
||||
|
||||
def update_setteings(self, key: str, val: any):
|
||||
@ -170,7 +163,7 @@ class RVC:
|
||||
return data
|
||||
|
||||
def get_processing_sampling_rate(self):
|
||||
return self.tgt_sr
|
||||
return self.settings.tgt_sr
|
||||
# return 24000
|
||||
|
||||
def generate_input(self, newData: any, inputSize: int, crossfadeSize: int):
|
||||
@ -215,13 +208,13 @@ class RVC:
|
||||
convertSize = data[1]
|
||||
vol = data[2]
|
||||
|
||||
audio = resampy.resample(audio, self.tgt_sr, 16000)
|
||||
audio = resampy.resample(audio, self.settings.tgt_sr, 16000)
|
||||
|
||||
if vol < self.settings.silentThreshold:
|
||||
return np.zeros(convertSize).astype(np.int16)
|
||||
|
||||
with torch.no_grad():
|
||||
vc = VC(self.tgt_sr, dev, self.is_half)
|
||||
vc = VC(self.settings.tgt_sr, dev, self.is_half)
|
||||
sid = 0
|
||||
times = [0, 0, 0]
|
||||
f0_up_key = self.settings.tran
|
||||
@ -255,14 +248,14 @@ class RVC:
|
||||
convertSize = data[1]
|
||||
vol = data[2]
|
||||
print("audio len 02,", len(audio))
|
||||
audio = resampy.resample(audio, self.tgt_sr, 16000)
|
||||
audio = resampy.resample(audio, self.settings.tgt_sr, 16000)
|
||||
print("audio len 03,", len(audio))
|
||||
|
||||
if vol < self.settings.silentThreshold:
|
||||
return np.zeros(convertSize).astype(np.int16)
|
||||
|
||||
with torch.no_grad():
|
||||
vc = VC(self.tgt_sr, dev, self.is_half)
|
||||
vc = VC(self.settings.tgt_sr, dev, self.is_half)
|
||||
sid = 0
|
||||
times = [0, 0, 0]
|
||||
f0_up_key = self.settings.tran
|
||||
|
Loading…
Reference in New Issue
Block a user