mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-03-15 04:13:57 +03:00
add sampling rate to rvc
This commit is contained in:
parent
36c6798f14
commit
3fefc528ab
@ -51,6 +51,10 @@
|
|||||||
"options": {
|
"options": {
|
||||||
"showFramework": true
|
"showFramework": true
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "modelSamplingRate",
|
||||||
|
"options": {}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"deviceSetting": [
|
"deviceSetting": [
|
||||||
@ -160,6 +164,10 @@
|
|||||||
{
|
{
|
||||||
"name": "trancateNumThreshold",
|
"name": "trancateNumThreshold",
|
||||||
"options": {}
|
"options": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "rvcQuality",
|
||||||
|
"options": {}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
26
client/demo/dist/index.js
vendored
26
client/demo/dist/index.js
vendored
File diff suppressed because one or more lines are too long
@ -51,6 +51,10 @@
|
|||||||
"options": {
|
"options": {
|
||||||
"showFramework": true
|
"showFramework": true
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "modelSamplingRate",
|
||||||
|
"options": {}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"deviceSetting": [
|
"deviceSetting": [
|
||||||
@ -160,6 +164,10 @@
|
|||||||
{
|
{
|
||||||
"name": "trancateNumThreshold",
|
"name": "trancateNumThreshold",
|
||||||
"options": {}
|
"options": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "rvcQuality",
|
||||||
|
"options": {}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -34,6 +34,8 @@ import { CrossFadeEndRateRow, CrossFadeEndRateRowProps } from "./components/807_
|
|||||||
import { DownSamplingModeRow, DownSamplingModeRowProps } from "./components/808_DownSamplingModeRow"
|
import { DownSamplingModeRow, DownSamplingModeRowProps } from "./components/808_DownSamplingModeRow"
|
||||||
import { TrancateNumTresholdRow, TrancateNumTresholdRowProps } from "./components/809_TrancateNumTresholdRow"
|
import { TrancateNumTresholdRow, TrancateNumTresholdRowProps } from "./components/809_TrancateNumTresholdRow"
|
||||||
import { IndexRatioRow, IndexRatioRowProps } from "./components/609_IndexRatioRow"
|
import { IndexRatioRow, IndexRatioRowProps } from "./components/609_IndexRatioRow"
|
||||||
|
import { RVCQualityRow, RVCQualityRowProps } from "./components/810_RVCQuality"
|
||||||
|
import { ModelSamplingRateRow, ModelSamplingRateRowProps } from "./components/303_ModelSamplingRateRow"
|
||||||
|
|
||||||
export const catalog: { [key: string]: (props: any) => JSX.Element } = {}
|
export const catalog: { [key: string]: (props: any) => JSX.Element } = {}
|
||||||
|
|
||||||
@ -61,6 +63,7 @@ const initialize = () => {
|
|||||||
|
|
||||||
addToCatalog("modelUploader", (props: ModelUploaderRowProps) => { return <ModelUploaderRow {...props} /> })
|
addToCatalog("modelUploader", (props: ModelUploaderRowProps) => { return <ModelUploaderRow {...props} /> })
|
||||||
addToCatalog("framework", (props: FrameworkRowProps) => { return <FrameworkRow {...props} /> })
|
addToCatalog("framework", (props: FrameworkRowProps) => { return <FrameworkRow {...props} /> })
|
||||||
|
addToCatalog("modelSamplingRate", (props: ModelSamplingRateRowProps) => { return <ModelSamplingRateRow {...props} /> })
|
||||||
|
|
||||||
|
|
||||||
addToCatalog("audioInput", (props: AudioInputRowProps) => { return <AudioInputRow {...props} /> })
|
addToCatalog("audioInput", (props: AudioInputRowProps) => { return <AudioInputRow {...props} /> })
|
||||||
@ -96,6 +99,8 @@ const initialize = () => {
|
|||||||
addToCatalog("crossFadeEndRate", (props: CrossFadeEndRateRowProps) => { return <CrossFadeEndRateRow {...props} /> })
|
addToCatalog("crossFadeEndRate", (props: CrossFadeEndRateRowProps) => { return <CrossFadeEndRateRow {...props} /> })
|
||||||
addToCatalog("downSamplingMode", (props: DownSamplingModeRowProps) => { return <DownSamplingModeRow {...props} /> })
|
addToCatalog("downSamplingMode", (props: DownSamplingModeRowProps) => { return <DownSamplingModeRow {...props} /> })
|
||||||
addToCatalog("trancateNumThreshold", (props: TrancateNumTresholdRowProps) => { return <TrancateNumTresholdRow {...props} /> })
|
addToCatalog("trancateNumThreshold", (props: TrancateNumTresholdRowProps) => { return <TrancateNumTresholdRow {...props} /> })
|
||||||
|
addToCatalog("rvcQuality", (props: RVCQualityRowProps) => { return <RVCQualityRow {...props} /> })
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,43 @@
|
|||||||
|
import React, { useMemo } from "react"
|
||||||
|
import { fileSelector, ModelSamplingRate } from "@dannadori/voice-changer-client-js"
|
||||||
|
import { useAppState } from "../../../001_provider/001_AppStateProvider"
|
||||||
|
|
||||||
|
export type ModelSamplingRateRowProps = {
|
||||||
|
}
|
||||||
|
|
||||||
|
export const ModelSamplingRateRow = (_props: ModelSamplingRateRowProps) => {
|
||||||
|
const appState = useAppState()
|
||||||
|
|
||||||
|
const modelSamplingRateRow = useMemo(() => {
|
||||||
|
const onModelSamplingRateChanged = (val: ModelSamplingRate) => {
|
||||||
|
appState.serverSetting.updateServerSettings({
|
||||||
|
...appState.serverSetting.serverSetting,
|
||||||
|
modelSamplingRate: val
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="body-row split-3-3-4 left-padding-1 guided">
|
||||||
|
<div className="body-item-title left-padding-2">Model Sampling Rate</div>
|
||||||
|
<div className="body-item-text">
|
||||||
|
<div></div>
|
||||||
|
</div>
|
||||||
|
<div className="body-button-container">
|
||||||
|
<select className="body-select" value={appState.serverSetting.serverSetting.modelSamplingRate} onChange={(e) => {
|
||||||
|
onModelSamplingRateChanged(e.target.value as unknown as ModelSamplingRate)
|
||||||
|
}}>
|
||||||
|
{
|
||||||
|
Object.values(ModelSamplingRate).map(x => {
|
||||||
|
return <option key={x} value={x}>{x}</option>
|
||||||
|
})
|
||||||
|
}
|
||||||
|
</select>
|
||||||
|
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}, [appState.serverSetting.serverSetting, appState.serverSetting.updateServerSettings])
|
||||||
|
|
||||||
|
return modelSamplingRateRow
|
||||||
|
}
|
@ -0,0 +1,31 @@
|
|||||||
|
import React, { useMemo } from "react"
|
||||||
|
import { useAppState } from "../../../001_provider/001_AppStateProvider"
|
||||||
|
|
||||||
|
export type RVCQualityRowProps = {
|
||||||
|
}
|
||||||
|
|
||||||
|
export const RVCQualityRow = (_props: RVCQualityRowProps) => {
|
||||||
|
const appState = useAppState()
|
||||||
|
|
||||||
|
const trancateNumTresholdRow = useMemo(() => {
|
||||||
|
const onRVCQualityChanged = (val: number) => {
|
||||||
|
appState.serverSetting.updateServerSettings({
|
||||||
|
...appState.serverSetting.serverSetting,
|
||||||
|
rvcQuality: val
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return (
|
||||||
|
<div className="body-row split-3-7 left-padding-1 guided">
|
||||||
|
<div className="body-item-title left-padding-1">RCV Quality</div>
|
||||||
|
<div className="body-input-container">
|
||||||
|
<select value={appState.serverSetting.serverSetting.rvcQuality} onChange={(e) => { onRVCQualityChanged(Number(e.target.value)) }}>
|
||||||
|
<option value="0" >low</option>
|
||||||
|
<option value="1" >high</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}, [appState.serverSetting.serverSetting, appState.serverSetting.updateServerSettings])
|
||||||
|
|
||||||
|
return trancateNumTresholdRow
|
||||||
|
}
|
@ -25,6 +25,14 @@ export const InputSampleRate = {
|
|||||||
} as const
|
} as const
|
||||||
export type InputSampleRate = typeof InputSampleRate[keyof typeof InputSampleRate]
|
export type InputSampleRate = typeof InputSampleRate[keyof typeof InputSampleRate]
|
||||||
|
|
||||||
|
export const ModelSamplingRate = {
|
||||||
|
"48000": 48000,
|
||||||
|
"40000": 40000,
|
||||||
|
"32000": 32000
|
||||||
|
} as const
|
||||||
|
export type ModelSamplingRate = typeof InputSampleRate[keyof typeof InputSampleRate]
|
||||||
|
|
||||||
|
|
||||||
export const CrossFadeOverlapSize = {
|
export const CrossFadeOverlapSize = {
|
||||||
"1024": 1024,
|
"1024": 1024,
|
||||||
"2048": 2048,
|
"2048": 2048,
|
||||||
@ -79,6 +87,9 @@ export const ServerSettingKey = {
|
|||||||
"clusterInferRatio": "clusterInferRatio",
|
"clusterInferRatio": "clusterInferRatio",
|
||||||
|
|
||||||
"indexRatio": "indexRatio",
|
"indexRatio": "indexRatio",
|
||||||
|
"rvcQuality": "rvcQuality",
|
||||||
|
"modelSamplingRate": "modelSamplingRate",
|
||||||
|
|
||||||
|
|
||||||
"inputSampleRate": "inputSampleRate",
|
"inputSampleRate": "inputSampleRate",
|
||||||
} as const
|
} as const
|
||||||
@ -109,6 +120,8 @@ export type VoiceChangerServerSetting = {
|
|||||||
clusterInferRatio: number // so-vits-svc
|
clusterInferRatio: number // so-vits-svc
|
||||||
|
|
||||||
indexRatio: number // RVC
|
indexRatio: number // RVC
|
||||||
|
rvcQuality: number // 0:low, 1:high
|
||||||
|
modelSamplingRate: ModelSamplingRate // 32000,40000,48000
|
||||||
|
|
||||||
inputSampleRate: InputSampleRate
|
inputSampleRate: InputSampleRate
|
||||||
}
|
}
|
||||||
@ -147,6 +160,8 @@ export const DefaultServerSetting_MMVCv15: ServerInfo = {
|
|||||||
clusterInferRatio: 0,
|
clusterInferRatio: 0,
|
||||||
|
|
||||||
indexRatio: 0,
|
indexRatio: 0,
|
||||||
|
rvcQuality: 0,
|
||||||
|
modelSamplingRate: 48000,
|
||||||
|
|
||||||
inputSampleRate: 24000,
|
inputSampleRate: 24000,
|
||||||
|
|
||||||
@ -181,6 +196,9 @@ export const DefaultServerSetting_MMVCv13: ServerInfo = {
|
|||||||
clusterInferRatio: 0,
|
clusterInferRatio: 0,
|
||||||
|
|
||||||
indexRatio: 0,
|
indexRatio: 0,
|
||||||
|
rvcQuality: 0,
|
||||||
|
modelSamplingRate: 48000,
|
||||||
|
|
||||||
|
|
||||||
inputSampleRate: 24000,
|
inputSampleRate: 24000,
|
||||||
|
|
||||||
@ -219,6 +237,9 @@ export const DefaultServerSetting_so_vits_svc_40: ServerInfo = {
|
|||||||
clusterInferRatio: 0.1,
|
clusterInferRatio: 0.1,
|
||||||
|
|
||||||
indexRatio: 0,
|
indexRatio: 0,
|
||||||
|
rvcQuality: 0,
|
||||||
|
modelSamplingRate: 48000,
|
||||||
|
|
||||||
|
|
||||||
inputSampleRate: 24000,
|
inputSampleRate: 24000,
|
||||||
|
|
||||||
@ -257,6 +278,9 @@ export const DefaultServerSetting_so_vits_svc_40_c: ServerInfo = {
|
|||||||
clusterInferRatio: 0.1,
|
clusterInferRatio: 0.1,
|
||||||
|
|
||||||
indexRatio: 0,
|
indexRatio: 0,
|
||||||
|
rvcQuality: 0,
|
||||||
|
modelSamplingRate: 48000,
|
||||||
|
|
||||||
|
|
||||||
inputSampleRate: 24000,
|
inputSampleRate: 24000,
|
||||||
|
|
||||||
@ -294,6 +318,9 @@ export const DefaultServerSetting_so_vits_svc_40v2: ServerInfo = {
|
|||||||
clusterInferRatio: 0.1,
|
clusterInferRatio: 0.1,
|
||||||
|
|
||||||
indexRatio: 0,
|
indexRatio: 0,
|
||||||
|
rvcQuality: 0,
|
||||||
|
modelSamplingRate: 48000,
|
||||||
|
|
||||||
|
|
||||||
inputSampleRate: 24000,
|
inputSampleRate: 24000,
|
||||||
|
|
||||||
@ -333,6 +360,8 @@ export const DefaultServerSetting_RVC: ServerInfo = {
|
|||||||
clusterInferRatio: 0.1,
|
clusterInferRatio: 0.1,
|
||||||
|
|
||||||
indexRatio: 0,
|
indexRatio: 0,
|
||||||
|
rvcQuality: 0,
|
||||||
|
modelSamplingRate: 48000,
|
||||||
|
|
||||||
inputSampleRate: 24000,
|
inputSampleRate: 24000,
|
||||||
|
|
||||||
|
@ -28,7 +28,7 @@ from const import HUBERT_ONNX_MODEL_PATH
|
|||||||
import pyworld as pw
|
import pyworld as pw
|
||||||
|
|
||||||
from vc_infer_pipeline import VC
|
from vc_infer_pipeline import VC
|
||||||
from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono
|
from infer_pack.models import SynthesizerTrnMs256NSFsid
|
||||||
from fairseq import checkpoint_utils
|
from fairseq import checkpoint_utils
|
||||||
providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
|
providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
|
||||||
|
|
||||||
@ -52,14 +52,15 @@ class RVCSettings():
|
|||||||
configFile: str = ""
|
configFile: str = ""
|
||||||
|
|
||||||
indexRatio: float = 0
|
indexRatio: float = 0
|
||||||
quality: int = 0
|
rvcQuality: int = 0
|
||||||
|
modelSamplingRate: int = 48000
|
||||||
|
|
||||||
speakers: dict[str, int] = field(
|
speakers: dict[str, int] = field(
|
||||||
default_factory=lambda: {}
|
default_factory=lambda: {}
|
||||||
)
|
)
|
||||||
|
|
||||||
# ↓mutableな物だけ列挙
|
# ↓mutableな物だけ列挙
|
||||||
intData = ["gpu", "dstId", "tran", "predictF0", "extraConvertSize", "quality"]
|
intData = ["gpu", "dstId", "tran", "predictF0", "extraConvertSize", "rvcQuality", "modelSamplingRate"]
|
||||||
floatData = ["noiceScale", "silentThreshold", "indexRatio"]
|
floatData = ["noiceScale", "silentThreshold", "indexRatio"]
|
||||||
strData = ["framework", "f0Detector"]
|
strData = ["framework", "f0Detector"]
|
||||||
|
|
||||||
@ -82,7 +83,6 @@ class RVC:
|
|||||||
self.index_file = index_file
|
self.index_file = index_file
|
||||||
self.is_half = is_half
|
self.is_half = is_half
|
||||||
|
|
||||||
self.tgt_sr = 40000
|
|
||||||
try:
|
try:
|
||||||
hubert_path = self.params["hubert"]
|
hubert_path = self.params["hubert"]
|
||||||
models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task([hubert_path], suffix="",)
|
models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task([hubert_path], suffix="",)
|
||||||
@ -103,7 +103,7 @@ class RVC:
|
|||||||
# PyTorchモデル生成
|
# PyTorchモデル生成
|
||||||
if pyTorch_model_file != None:
|
if pyTorch_model_file != None:
|
||||||
cpt = torch.load(pyTorch_model_file, map_location="cpu")
|
cpt = torch.load(pyTorch_model_file, map_location="cpu")
|
||||||
self.tgt_sr = cpt["config"][-1]
|
self.settings.tgt_sr = cpt["config"][-1]
|
||||||
net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=self.is_half)
|
net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=self.is_half)
|
||||||
net_g.eval()
|
net_g.eval()
|
||||||
net_g.load_state_dict(cpt["weight"], strict=False)
|
net_g.load_state_dict(cpt["weight"], strict=False)
|
||||||
@ -113,14 +113,7 @@ class RVC:
|
|||||||
|
|
||||||
# ONNXモデル生成
|
# ONNXモデル生成
|
||||||
if onnx_model_file != None:
|
if onnx_model_file != None:
|
||||||
# self.onnx_session = ModelWrapper(onnx_model_file, is_half=True)
|
|
||||||
self.onnx_session = ModelWrapper(onnx_model_file, is_half=self.is_half)
|
self.onnx_session = ModelWrapper(onnx_model_file, is_half=self.is_half)
|
||||||
# input_info = self.onnx_session.get_inputs()
|
|
||||||
# for i in input_info:
|
|
||||||
# print("input", i)
|
|
||||||
# output_info = self.onnx_session.get_outputs()
|
|
||||||
# for i in output_info:
|
|
||||||
# print("output", i)
|
|
||||||
return self.get_info()
|
return self.get_info()
|
||||||
|
|
||||||
def update_setteings(self, key: str, val: any):
|
def update_setteings(self, key: str, val: any):
|
||||||
@ -170,7 +163,7 @@ class RVC:
|
|||||||
return data
|
return data
|
||||||
|
|
||||||
def get_processing_sampling_rate(self):
|
def get_processing_sampling_rate(self):
|
||||||
return self.tgt_sr
|
return self.settings.tgt_sr
|
||||||
# return 24000
|
# return 24000
|
||||||
|
|
||||||
def generate_input(self, newData: any, inputSize: int, crossfadeSize: int):
|
def generate_input(self, newData: any, inputSize: int, crossfadeSize: int):
|
||||||
@ -215,13 +208,13 @@ class RVC:
|
|||||||
convertSize = data[1]
|
convertSize = data[1]
|
||||||
vol = data[2]
|
vol = data[2]
|
||||||
|
|
||||||
audio = resampy.resample(audio, self.tgt_sr, 16000)
|
audio = resampy.resample(audio, self.settings.tgt_sr, 16000)
|
||||||
|
|
||||||
if vol < self.settings.silentThreshold:
|
if vol < self.settings.silentThreshold:
|
||||||
return np.zeros(convertSize).astype(np.int16)
|
return np.zeros(convertSize).astype(np.int16)
|
||||||
|
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
vc = VC(self.tgt_sr, dev, self.is_half)
|
vc = VC(self.settings.tgt_sr, dev, self.is_half)
|
||||||
sid = 0
|
sid = 0
|
||||||
times = [0, 0, 0]
|
times = [0, 0, 0]
|
||||||
f0_up_key = self.settings.tran
|
f0_up_key = self.settings.tran
|
||||||
@ -255,14 +248,14 @@ class RVC:
|
|||||||
convertSize = data[1]
|
convertSize = data[1]
|
||||||
vol = data[2]
|
vol = data[2]
|
||||||
print("audio len 02,", len(audio))
|
print("audio len 02,", len(audio))
|
||||||
audio = resampy.resample(audio, self.tgt_sr, 16000)
|
audio = resampy.resample(audio, self.settings.tgt_sr, 16000)
|
||||||
print("audio len 03,", len(audio))
|
print("audio len 03,", len(audio))
|
||||||
|
|
||||||
if vol < self.settings.silentThreshold:
|
if vol < self.settings.silentThreshold:
|
||||||
return np.zeros(convertSize).astype(np.int16)
|
return np.zeros(convertSize).astype(np.int16)
|
||||||
|
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
vc = VC(self.tgt_sr, dev, self.is_half)
|
vc = VC(self.settings.tgt_sr, dev, self.is_half)
|
||||||
sid = 0
|
sid = 0
|
||||||
times = [0, 0, 0]
|
times = [0, 0, 0]
|
||||||
f0_up_key = self.settings.tran
|
f0_up_key = self.settings.tran
|
||||||
|
Loading…
x
Reference in New Issue
Block a user