sending sample rate

This commit is contained in:
wataru 2023-04-20 17:17:43 +09:00
parent 2706810b4e
commit e073eed95c
12 changed files with 58 additions and 382 deletions

File diff suppressed because one or more lines are too long

View File

@ -37,7 +37,6 @@ import { IndexRatioRow, IndexRatioRowProps } from "./components/609_IndexRatioRo
import { RVCQualityRow, RVCQualityRowProps } from "./components/810_RVCQualityRow"
import { ModelSamplingRateRow, ModelSamplingRateRowProps } from "./components/303_ModelSamplingRateRow"
// import { OnnxExportRow, OnnxExportRowProps } from "./components/304_OnnxExportRow"
import { SolaEnableRow, SolaEnableRowProps } from "./components/811_SolaEnableRow"
import { EnableEnhancerRow, EnableEnhancerRowProps } from "./components/610_EnableEnhancerRow"
import { DstIdRow2, DstIdRow2Props } from "./components/602v2_DstIdRow2"
import { SilenceFrontRow, SilenceFrontRowProps } from "./components/812_SilenceFrontRow"
@ -107,7 +106,6 @@ const initialize = () => {
addToCatalog("downSamplingMode", (props: DownSamplingModeRowProps) => { return <DownSamplingModeRow {...props} /> })
addToCatalog("trancateNumThreshold", (props: TrancateNumTresholdRowProps) => { return <TrancateNumTresholdRow {...props} /> })
addToCatalog("rvcQuality", (props: RVCQualityRowProps) => { return <RVCQualityRow {...props} /> })
addToCatalog("solaEnable", (props: SolaEnableRowProps) => { return <SolaEnableRow {...props} /> })
addToCatalog("silenceFront", (props: SilenceFrontRowProps) => { return <SilenceFrontRow {...props} /> })

View File

@ -12,10 +12,10 @@ export const NoiseScaleRow = (_props: NoiseScaleRowProps) => {
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title left-padding-1 ">Noise Scale</div>
<div>
<input type="range" className="body-item-input-slider" min="0" max="1" step="0.1" value={appState.serverSetting.serverSetting.noiceScale || 0} onChange={(e) => {
appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, noiceScale: Number(e.target.value) })
<input type="range" className="body-item-input-slider" min="0" max="1" step="0.1" value={appState.serverSetting.serverSetting.noiseScale || 0} onChange={(e) => {
appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, noiseScale: Number(e.target.value) })
}}></input>
<span className="body-item-input-slider-val">{appState.serverSetting.serverSetting.noiceScale}</span>
<span className="body-item-input-slider-val">{appState.serverSetting.serverSetting.noiseScale}</span>
</div>
<div className="body-button-container">
</div>

View File

@ -1,31 +0,0 @@
import React, { useMemo } from "react"
import { useAppState } from "../../../001_provider/001_AppStateProvider"
export type SolaEnableRowProps = {
}
export const SolaEnableRow = (_props: SolaEnableRowProps) => {
const appState = useAppState()
const trancateNumTresholdRow = useMemo(() => {
const onSolaEnableChanged = (val: number) => {
appState.serverSetting.updateServerSettings({
...appState.serverSetting.serverSetting,
solaEnabled: val
})
}
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Sola enable</div>
<div className="body-input-container">
<select value={appState.serverSetting.serverSetting.solaEnabled} onChange={(e) => { onSolaEnableChanged(Number(e.target.value)) }}>
<option value="0" >disable</option>
<option value="1" >enable</option>
</select>
</div>
</div>
)
}, [appState.serverSetting.serverSetting, appState.serverSetting.updateServerSettings])
return trancateNumTresholdRow
}

View File

@ -74,7 +74,6 @@ export const ServerSettingKey = {
"crossFadeOffsetRate": "crossFadeOffsetRate",
"crossFadeEndRate": "crossFadeEndRate",
"crossFadeOverlapSize": "crossFadeOverlapSize",
"solaEnabled": "solaEnabled",
"framework": "framework",
"onnxExecutionProvider": "onnxExecutionProvider",
@ -84,7 +83,7 @@ export const ServerSettingKey = {
"recordIO": "recordIO",
"tran": "tran",
"noiceScale": "noiceScale",
"noiseScale": "noiseScale",
"predictF0": "predictF0",
"silentThreshold": "silentThreshold",
"extraConvertSize": "extraConvertSize",
@ -111,7 +110,6 @@ export type VoiceChangerServerSetting = {
crossFadeOffsetRate: number,
crossFadeEndRate: number,
crossFadeOverlapSize: CrossFadeOverlapSize,
solaEnabled: number,
framework: Framework
onnxExecutionProvider: OnnxExecutionProvider,
@ -121,7 +119,7 @@ export type VoiceChangerServerSetting = {
recordIO: number // 0:off, 1:on
tran: number // so-vits-svc
noiceScale: number // so-vits-svc
noiseScale: number // so-vits-svc
predictF0: number // so-vits-svc
silentThreshold: number // so-vits-svc
extraConvertSize: number// so-vits-svc
@ -149,248 +147,35 @@ export type ServerInfo = VoiceChangerServerSetting & {
export type ServerInfoSoVitsSVC = ServerInfo & {
speakers: { [key: string]: number }
}
export const DefaultServerSetting_MMVCv15: ServerInfo = {
srcId: 0,
dstId: 101,
gpu: 0,
export const DefaultServerSetting: ServerInfo = {
// VC Common
inputSampleRate: 48000,
crossFadeOffsetRate: 0.0,
crossFadeEndRate: 1.0,
crossFadeOverlapSize: CrossFadeOverlapSize[1024],
solaEnabled: 0,
framework: Framework.PyTorch,
f0Factor: 1.0,
onnxExecutionProvider: OnnxExecutionProvider.CPUExecutionProvider,
f0Detector: F0Detector.dio,
recordIO: 0,
tran: 0,
noiceScale: 0,
predictF0: 0,
silentThreshold: 0,
extraConvertSize: 0,
clusterInferRatio: 0,
indexRatio: 0,
rvcQuality: 0,
modelSamplingRate: 48000,
silenceFront: 0,
enableEnhancer: 0,
enhancerTune: 0,
inputSampleRate: 48000,
//
status: "ok",
configFile: "",
pyTorchModelFile: "",
onnxModelFile: "",
onnxExecutionProviders: []
}
export const DefaultServerSetting_MMVCv13: ServerInfo = {
srcId: 107,
dstId: 100,
gpu: 0,
crossFadeOffsetRate: 0.0,
crossFadeEndRate: 1.0,
crossFadeOverlapSize: CrossFadeOverlapSize[1024],
solaEnabled: 0,
framework: Framework.ONNX,
f0Factor: 1.0,
onnxExecutionProvider: OnnxExecutionProvider.CPUExecutionProvider,
f0Detector: F0Detector.dio,
recordIO: 0,
tran: 0,
noiceScale: 0,
predictF0: 0,
silentThreshold: 0,
extraConvertSize: 0,
clusterInferRatio: 0,
indexRatio: 0,
rvcQuality: 0,
modelSamplingRate: 48000,
silenceFront: 0,
enableEnhancer: 0,
enhancerTune: 0,
inputSampleRate: 48000,
//
status: "ok",
configFile: "",
pyTorchModelFile: "",
onnxModelFile: "",
onnxExecutionProviders: []
}
export const DefaultServerSetting_so_vits_svc_40: ServerInfo = {
srcId: 0,
dstId: 0,
gpu: 0,
crossFadeOffsetRate: 0.0,
crossFadeEndRate: 1.0,
crossFadeOverlapSize: CrossFadeOverlapSize[1024],
solaEnabled: 0,
framework: Framework.PyTorch,
f0Factor: 1.0,
onnxExecutionProvider: OnnxExecutionProvider.CPUExecutionProvider,
f0Detector: F0Detector.dio,
recordIO: 0,
// tran: 0,
// noiceScale: 0,
// predictF0: 0,
// silentThreshold: 0,
tran: 10,
noiceScale: 0.3,
predictF0: 0,
silentThreshold: 0.00001,
extraConvertSize: 1024 * 32,
clusterInferRatio: 0.1,
indexRatio: 0,
rvcQuality: 0,
modelSamplingRate: 48000,
silenceFront: 0,
enableEnhancer: 0,
enhancerTune: 0,
inputSampleRate: 48000,
//
status: "ok",
configFile: "",
pyTorchModelFile: "",
onnxModelFile: "",
onnxExecutionProviders: []
}
export const DefaultServerSetting_so_vits_svc_40_c: ServerInfo = {
srcId: 0,
dstId: 0,
gpu: 0,
crossFadeOffsetRate: 0.0,
crossFadeEndRate: 1.0,
crossFadeOverlapSize: CrossFadeOverlapSize[1024],
solaEnabled: 0,
framework: Framework.ONNX,
f0Factor: 1.0,
onnxExecutionProvider: OnnxExecutionProvider.CPUExecutionProvider,
f0Detector: F0Detector.dio,
recordIO: 0,
// tran: 0,
// noiceScale: 0,
// predictF0: 0,
// silentThreshold: 0,
tran: 10,
noiceScale: 0.3,
predictF0: 0,
silentThreshold: 0.00001,
extraConvertSize: 1024 * 32,
clusterInferRatio: 0.1,
indexRatio: 0,
rvcQuality: 0,
modelSamplingRate: 48000,
silenceFront: 0,
enableEnhancer: 0,
enhancerTune: 0,
inputSampleRate: 48000,
//
status: "ok",
configFile: "",
pyTorchModelFile: "",
onnxModelFile: "",
onnxExecutionProviders: []
}
export const DefaultServerSetting_so_vits_svc_40v2: ServerInfo = {
srcId: 0,
dstId: 0,
gpu: 0,
crossFadeOffsetRate: 0.0,
crossFadeEndRate: 1.0,
crossFadeOverlapSize: CrossFadeOverlapSize[1024],
solaEnabled: 0,
framework: Framework.PyTorch,
f0Factor: 1.0,
onnxExecutionProvider: OnnxExecutionProvider.CPUExecutionProvider,
f0Detector: F0Detector.dio,
recordIO: 0,
// tran: 0,
// noiceScale: 0,
// predictF0: 0,
// silentThreshold: 0,
tran: 10,
noiceScale: 0.3,
predictF0: 0,
silentThreshold: 0.00001,
extraConvertSize: 1024 * 32,
clusterInferRatio: 0.1,
indexRatio: 0,
rvcQuality: 0,
modelSamplingRate: 48000,
silenceFront: 0,
enableEnhancer: 0,
enhancerTune: 0,
inputSampleRate: 48000,
//
status: "ok",
configFile: "",
pyTorchModelFile: "",
onnxModelFile: "",
onnxExecutionProviders: []
}
export const DefaultServerSetting_DDSP_SVC: ServerInfo = {
// VC Specific
srcId: 0,
dstId: 1,
gpu: 0,
crossFadeOffsetRate: 0.0,
crossFadeEndRate: 1.0,
crossFadeOverlapSize: CrossFadeOverlapSize[1024],
solaEnabled: 0,
framework: Framework.PyTorch,
f0Factor: 1.0,
onnxExecutionProvider: OnnxExecutionProvider.CPUExecutionProvider,
f0Detector: F0Detector.dio,
recordIO: 0,
// tran: 0,
// noiceScale: 0,
// predictF0: 0,
// silentThreshold: 0,
tran: 10,
noiceScale: 0.3,
tran: 0,
noiseScale: 0,
predictF0: 0,
silentThreshold: 0.00001,
extraConvertSize: 1024 * 32,
clusterInferRatio: 0.1,
silentThreshold: 0,
extraConvertSize: 0,
clusterInferRatio: 0,
indexRatio: 0,
rvcQuality: 0,
@ -400,7 +185,6 @@ export const DefaultServerSetting_DDSP_SVC: ServerInfo = {
enableEnhancer: 0,
enhancerTune: 0,
inputSampleRate: 48000,
//
status: "ok",
@ -409,51 +193,31 @@ export const DefaultServerSetting_DDSP_SVC: ServerInfo = {
onnxModelFile: "",
onnxExecutionProviders: []
}
export const DefaultServerSetting_MMVCv15: ServerInfo = {
...DefaultServerSetting, dstId: 101,
}
export const DefaultServerSetting_MMVCv13: ServerInfo = {
...DefaultServerSetting, srcId: 107, dstId: 100,
}
export const DefaultServerSetting_so_vits_svc_40: ServerInfo = {
...DefaultServerSetting, tran: 10, noiseScale: 0.3, extraConvertSize: 1024 * 32, clusterInferRatio: 0.1,
}
export const DefaultServerSetting_so_vits_svc_40_c: ServerInfo = {
...DefaultServerSetting, tran: 10, noiseScale: 0.3, extraConvertSize: 1024 * 32, clusterInferRatio: 0.1,
}
export const DefaultServerSetting_so_vits_svc_40v2: ServerInfo = {
...DefaultServerSetting, tran: 10, noiseScale: 0.3, extraConvertSize: 1024 * 32, clusterInferRatio: 0.1,
}
export const DefaultServerSetting_DDSP_SVC: ServerInfo = {
...DefaultServerSetting, dstId: 1, tran: 10, extraConvertSize: 1024 * 32, clusterInferRatio: 0.1, enableEnhancer: 1
}
export const DefaultServerSetting_RVC: ServerInfo = {
srcId: 0,
dstId: 0,
gpu: 0,
crossFadeOffsetRate: 0.1,
crossFadeEndRate: 0.8,
crossFadeOverlapSize: CrossFadeOverlapSize[1024],
solaEnabled: 1,
framework: Framework.PyTorch,
f0Factor: 1.0,
onnxExecutionProvider: OnnxExecutionProvider.CPUExecutionProvider,
f0Detector: F0Detector.harvest,
recordIO: 0,
// tran: 0,
// noiceScale: 0,
// predictF0: 0,
// silentThreshold: 0,
tran: 10,
noiceScale: 0.3,
predictF0: 0,
silentThreshold: 0.00001,
extraConvertSize: 1024 * 32,
clusterInferRatio: 0.1,
indexRatio: 0,
rvcQuality: 0,
modelSamplingRate: 48000,
silenceFront: 1,
enableEnhancer: 0,
enhancerTune: 0,
inputSampleRate: 48000,
//
status: "ok",
configFile: "",
pyTorchModelFile: "",
onnxModelFile: "",
onnxExecutionProviders: []
...DefaultServerSetting, tran: 10, extraConvertSize: 1024 * 32,
}
///////////////////////
@ -503,41 +267,25 @@ export type WorkletNodeSetting = {
export const DefaultWorkletNodeSetting: WorkletNodeSetting = {
serverUrl: "",
protocol: "sio",
sendingSampleRate: 24000,
sendingSampleRate: 48000,
inputChunkNum: 48,
downSamplingMode: "average"
}
export const DefaultWorkletNodeSetting_so_vits_svc_40: WorkletNodeSetting = {
serverUrl: "",
protocol: "sio",
sendingSampleRate: 24000,
inputChunkNum: 128,
downSamplingMode: "average"
...DefaultWorkletNodeSetting, inputChunkNum: 128,
}
export const DefaultWorkletNodeSetting_so_vits_svc_40v2: WorkletNodeSetting = {
serverUrl: "",
protocol: "sio",
sendingSampleRate: 24000,
inputChunkNum: 128,
downSamplingMode: "average"
...DefaultWorkletNodeSetting, inputChunkNum: 128,
}
export const DefaultWorkletNodeSetting_DDSP_SVC: WorkletNodeSetting = {
serverUrl: "",
protocol: "sio",
sendingSampleRate: 48000,
inputChunkNum: 256,
downSamplingMode: "average"
...DefaultWorkletNodeSetting, inputChunkNum: 256,
}
export const DefaultWorkletNodeSetting_RVC: WorkletNodeSetting = {
serverUrl: "",
protocol: "sio",
sendingSampleRate: 48000,
inputChunkNum: 256,
downSamplingMode: "average"
...DefaultWorkletNodeSetting, inputChunkNum: 256,
}
///////////////////////

View File

@ -98,7 +98,7 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
const cachedServerSetting = await getItem(INDEXEDDB_KEY_SERVER)
let initialSetting: ServerInfo
if (cachedServerSetting) {
initialSetting = { ...defaultServerSetting, ...cachedServerSetting as ServerInfo }
initialSetting = { ...defaultServerSetting, ...cachedServerSetting as ServerInfo, inputSampleRate: 48000 }// sample rateは時限措置
console.log("Initial Setting1:", initialSetting)
} else {
initialSetting = { ...defaultServerSetting }

View File

@ -45,7 +45,7 @@ export const useWorkletNodeSetting = (props: UseWorkletNodeSettingProps): Workle
const loadCache = async () => {
const setting = await getItem(INDEXEDDB_KEY_WORKLETNODE) as WorkletNodeSetting
if (setting) {
_setWorkletNodeSetting(setting)
_setWorkletNodeSetting({ ...setting, sendingSampleRate: 48000 }) // sample rateは時限措置
}
}
loadCache()

View File

@ -51,7 +51,7 @@ class DDSP_SVCSettings():
# ↓mutableな物だけ列挙
intData = ["gpu", "dstId", "tran", "predictF0", "extraConvertSize", "enableEnhancer", "enhancerTune"]
floatData = ["noiceScale", "silentThreshold", "clusterInferRatio"]
floatData = ["silentThreshold", "clusterInferRatio"]
strData = ["framework", "f0Detector"]
@ -229,33 +229,7 @@ class DDSP_SVC:
print("[Voice Changer] No onnx session.")
raise NoModeLoadedException("ONNX")
seg_units = data[0]
# f0 = data[1]
# convertSize = data[2]
# vol = data[3]
if vol < self.settings.silentThreshold:
return np.zeros(convertSize).astype(np.int16)
c, f0, uv = [x.numpy() for x in data]
audio1 = self.onnx_session.run(
["audio"],
{
"c": c,
"f0": f0,
"g": np.array([self.settings.dstId]).astype(np.int64),
"uv": np.array([self.settings.dstId]).astype(np.int64),
"predict_f0": np.array([self.settings.dstId]).astype(np.int64),
"noice_scale": np.array([self.settings.dstId]).astype(np.int64),
})[0][0, 0] * self.hps.data.max_wav_value
audio1 = audio1 * vol
result = audio1
return result
raise NoModeLoadedException("ONNX")
def _pyTorch_inference(self, data):

View File

@ -41,7 +41,6 @@ class RVCSettings():
f0Detector: str = "pm" # pm or harvest
tran: int = 20
noiceScale: float = 0.3
predictF0: int = 0 # 0:False, 1:True
silentThreshold: float = 0.00001
extraConvertSize: int = 1024 * 32
@ -63,7 +62,7 @@ class RVCSettings():
# ↓mutableな物だけ列挙
intData = ["gpu", "dstId", "tran", "predictF0", "extraConvertSize", "rvcQuality", "modelSamplingRate", "silenceFront"]
floatData = ["noiceScale", "silentThreshold", "indexRatio"]
floatData = ["silentThreshold", "indexRatio"]
strData = ["framework", "f0Detector"]

View File

@ -40,7 +40,7 @@ class SoVitsSvc40Settings():
f0Detector: str = "dio" # dio or harvest
tran: int = 20
noiceScale: float = 0.3
noiseScale: float = 0.3
predictF0: int = 0 # 0:False, 1:True
silentThreshold: float = 0.00001
extraConvertSize: int = 1024 * 32
@ -57,7 +57,7 @@ class SoVitsSvc40Settings():
# ↓mutableな物だけ列挙
intData = ["gpu", "dstId", "tran", "predictF0", "extraConvertSize"]
floatData = ["noiceScale", "silentThreshold", "clusterInferRatio"]
floatData = ["noiseScale", "silentThreshold", "clusterInferRatio"]
strData = ["framework", "f0Detector"]
@ -308,7 +308,7 @@ class SoVitsSvc40:
"f0": f0.astype(np.float32),
"uv": uv.astype(np.float32),
"g": sid_target.astype(np.int64),
"noice_scale": np.array([self.settings.noiceScale]).astype(np.float32),
"noise_scale": np.array([self.settings.noiseScale]).astype(np.float32),
# "predict_f0": np.array([self.settings.dstId]).astype(np.int64),
@ -342,7 +342,7 @@ class SoVitsSvc40:
# audio1 = self.net_g.infer(c, f0=f0, g=sid_target, uv=uv, predict_f0=True, noice_scale=0.1)[0][0, 0].data.float()
predict_f0_flag = True if self.settings.predictF0 == 1 else False
audio1 = self.net_g.infer(c, f0=f0, g=sid_target, uv=uv, predict_f0=predict_f0_flag,
noice_scale=self.settings.noiceScale)
noice_scale=self.settings.noiseScale)
audio1 = audio1[0][0].data.float()
# audio1 = self.net_g.infer(c, f0=f0, g=sid_target, uv=uv, predict_f0=predict_f0_flag,
# noice_scale=self.settings.noiceScale)[0][0, 0].data.float()

View File

@ -36,7 +36,7 @@ class SoVitsSvc40v2Settings():
f0Detector: str = "dio" # dio or harvest
tran: int = 20
noiceScale: float = 0.3
noiseScale: float = 0.3
predictF0: int = 0 # 0:False, 1:True
silentThreshold: float = 0.00001
extraConvertSize: int = 1024 * 32
@ -53,7 +53,7 @@ class SoVitsSvc40v2Settings():
# ↓mutableな物だけ列挙
intData = ["gpu", "dstId", "tran", "predictF0", "extraConvertSize"]
floatData = ["noiceScale", "silentThreshold", "clusterInferRatio"]
floatData = ["noiseScale", "silentThreshold", "clusterInferRatio"]
strData = ["framework", "f0Detector"]
@ -300,7 +300,7 @@ class SoVitsSvc40v2:
# audio1 = self.net_g.infer(c, f0=f0, g=sid_target, uv=uv, predict_f0=True, noice_scale=0.1)[0][0, 0].data.float()
predict_f0_flag = True if self.settings.predictF0 == 1 else False
audio1 = self.net_g.infer(c, f0=f0, g=sid_target, uv=uv, predict_f0=predict_f0_flag,
noice_scale=self.settings.noiceScale)[0][0, 0].data.float()
noice_scale=self.settings.noiseScale)[0][0, 0].data.float()
audio1 = audio1 * self.hps.data.max_wav_value
audio1 = audio1 * vol

View File

@ -31,13 +31,12 @@ class VoiceChangerSettings():
crossFadeOffsetRate: float = 0.1
crossFadeEndRate: float = 0.9
crossFadeOverlapSize: int = 4096
solaEnabled: int = 1 # 0:off, 1:on
recordIO: int = 0 # 0:off, 1:on
# ↓mutableな物だけ列挙
intData: list[str] = field(
default_factory=lambda: ["inputSampleRate", "crossFadeOverlapSize", "recordIO", "solaEnabled"]
default_factory=lambda: ["inputSampleRate", "crossFadeOverlapSize", "recordIO"]
)
floatData: list[str] = field(
default_factory=lambda: ["crossFadeOffsetRate", "crossFadeEndRate"]