WIP:diffusion svc config

This commit is contained in:
w-okada 2023-07-15 18:35:11 +09:00
parent 01291dc4e3
commit 2c4503ade8
8 changed files with 289 additions and 149 deletions

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,81 @@
import React, { useMemo } from "react";
import { useAppState } from "../../../001_provider/001_AppStateProvider";
import { DiffusionSVCModelSlot } from "@dannadori/voice-changer-client-js";
export type DiffusionSVCSettingAreaProps = {};
export const DiffusionSVCSettingArea = (_props: DiffusionSVCSettingAreaProps) => {
const { serverSetting } = useAppState();
const selected = useMemo(() => {
if (serverSetting.serverSetting.modelSlotIndex == undefined) {
return;
}
return serverSetting.serverSetting.modelSlots[serverSetting.serverSetting.modelSlotIndex];
}, [serverSetting.serverSetting.modelSlotIndex, serverSetting.serverSetting.modelSlots]);
const settingArea = useMemo(() => {
if (!selected) {
return <></>;
}
if (selected.voiceChangerType != "Diffusion-SVC") {
return <></>;
}
const kStepRow = (
<div className="character-area-control">
<div className="character-area-control-title">k-step:</div>
<div className="character-area-control-field">
<div className="character-area-slider-control">
<span className="character-area-slider-control-kind"></span>
<span className="character-area-slider-control-slider">
<input
type="range"
min="0"
max={(selected as DiffusionSVCModelSlot).kStepMax}
step="1"
value={serverSetting.serverSetting.kStep}
onChange={(e) => {
serverSetting.updateServerSettings({ ...serverSetting.serverSetting, kStep: Number(e.target.value) });
}}
></input>
</span>
<span className="character-area-slider-control-val">{serverSetting.serverSetting.kStep}</span>
</div>
</div>
</div>
);
const speedUpRow = (
<div className="character-area-control">
<div className="character-area-control-title">speedup</div>
<div className="character-area-control-field">
<div className="character-area-slider-control">
<span className="character-area-slider-control-kind"></span>
<span className="character-area-slider-control-slider">
<input
type="range"
min="0"
max={serverSetting.serverSetting.kStep}
step="1"
value={serverSetting.serverSetting.speedUp}
onChange={(e) => {
serverSetting.updateServerSettings({ ...serverSetting.serverSetting, speedUp: Number(e.target.value) });
}}
></input>
</span>
<span className="character-area-slider-control-val">{serverSetting.serverSetting.speedUp}</span>
</div>
</div>
</div>
);
return (
<>
{kStepRow}
{speedUpRow}
</>
);
}, [serverSetting.serverSetting, serverSetting.updateServerSettings, selected]);
return settingArea;
};

View File

@ -1,88 +1,95 @@
import React, { useEffect, useMemo, useState } from "react"
import { useAppState } from "../../../001_provider/001_AppStateProvider"
import { useGuiState } from "../001_GuiStateProvider"
import { OnnxExporterInfo } from "@dannadori/voice-changer-client-js"
import { useMessageBuilder } from "../../../hooks/useMessageBuilder"
import { TuningArea } from "./101-1_TuningArea"
import { IndexArea } from "./101-2_IndexArea"
import { SpeakerArea } from "./101-3_SpeakerArea"
import { F0FactorArea } from "./101-4_F0FactorArea"
import { SoVitsSVC40SettingArea } from "./101-5_so-vits-svc40SettingArea"
import { DDSPSVC30SettingArea } from "./101-6_ddsp-svc30SettingArea"
export type CharacterAreaProps = {
}
import React, { useEffect, useMemo, useState } from "react";
import { useAppState } from "../../../001_provider/001_AppStateProvider";
import { useGuiState } from "../001_GuiStateProvider";
import { OnnxExporterInfo } from "@dannadori/voice-changer-client-js";
import { useMessageBuilder } from "../../../hooks/useMessageBuilder";
import { TuningArea } from "./101-1_TuningArea";
import { IndexArea } from "./101-2_IndexArea";
import { SpeakerArea } from "./101-3_SpeakerArea";
import { F0FactorArea } from "./101-4_F0FactorArea";
import { SoVitsSVC40SettingArea } from "./101-5_so-vits-svc40SettingArea";
import { DDSPSVC30SettingArea } from "./101-6_ddsp-svc30SettingArea";
import { DiffusionSVCSettingArea } from "./101-7_diffusion-svcSettingArea";
export type CharacterAreaProps = {};
export const CharacterArea = (_props: CharacterAreaProps) => {
const { serverSetting, initializedRef, volume, bufferingTime, performance, setting, setVoiceChangerClientSetting, start, stop } = useAppState()
const guiState = useGuiState()
const messageBuilderState = useMessageBuilder()
const { serverSetting, initializedRef, volume, bufferingTime, performance, setting, setVoiceChangerClientSetting, start, stop } = useAppState();
const guiState = useGuiState();
const messageBuilderState = useMessageBuilder();
useMemo(() => {
messageBuilderState.setMessage(__filename, "terms_of_use", { "ja": "利用規約", "en": "terms of use" })
messageBuilderState.setMessage(__filename, "export_to_onnx", { "ja": "onnx出力", "en": "export to onnx" })
messageBuilderState.setMessage(__filename, "save_default", { "ja": "設定保存", "en": "save setting" })
messageBuilderState.setMessage(__filename, "alert_onnx", { "ja": "ボイチェン中はonnx出力できません", "en": "cannot export onnx when voice conversion is enabled" })
}, [])
messageBuilderState.setMessage(__filename, "terms_of_use", { ja: "利用規約", en: "terms of use" });
messageBuilderState.setMessage(__filename, "export_to_onnx", { ja: "onnx出力", en: "export to onnx" });
messageBuilderState.setMessage(__filename, "save_default", { ja: "設定保存", en: "save setting" });
messageBuilderState.setMessage(__filename, "alert_onnx", { ja: "ボイチェン中はonnx出力できません", en: "cannot export onnx when voice conversion is enabled" });
}, []);
const selected = useMemo(() => {
if (serverSetting.serverSetting.modelSlotIndex == undefined) {
return
return;
}
return serverSetting.serverSetting.modelSlots[serverSetting.serverSetting.modelSlotIndex]
}, [serverSetting.serverSetting.modelSlotIndex, serverSetting.serverSetting.modelSlots])
return serverSetting.serverSetting.modelSlots[serverSetting.serverSetting.modelSlotIndex];
}, [serverSetting.serverSetting.modelSlotIndex, serverSetting.serverSetting.modelSlots]);
useEffect(() => {
const vol = document.getElementById("status-vol") as HTMLSpanElement
const buf = document.getElementById("status-buf") as HTMLSpanElement
const res = document.getElementById("status-res") as HTMLSpanElement
const vol = document.getElementById("status-vol") as HTMLSpanElement;
const buf = document.getElementById("status-buf") as HTMLSpanElement;
const res = document.getElementById("status-res") as HTMLSpanElement;
if (!vol || !buf || !res) {
return
return;
}
vol.innerText = volume.toFixed(4)
buf.innerText = bufferingTime.toString()
res.innerText = performance.responseTime.toString()
}, [volume, bufferingTime, performance])
vol.innerText = volume.toFixed(4);
buf.innerText = bufferingTime.toString();
res.innerText = performance.responseTime.toString();
}, [volume, bufferingTime, performance]);
const portrait = useMemo(() => {
if (!selected) {
return <></>
return <></>;
}
const icon = selected.iconFile.length > 0 ? selected.iconFile : "./assets/icons/human.png"
const selectedTermOfUseUrlLink = selected.termsOfUseUrl ? <a href={selected.termsOfUseUrl} target="_blank" rel="noopener noreferrer" className="portrait-area-terms-of-use-link">[{messageBuilderState.getMessage(__filename, "terms_of_use")}]</a> : <></>
const icon = selected.iconFile.length > 0 ? selected.iconFile : "./assets/icons/human.png";
const selectedTermOfUseUrlLink = selected.termsOfUseUrl ? (
<a href={selected.termsOfUseUrl} target="_blank" rel="noopener noreferrer" className="portrait-area-terms-of-use-link">
[{messageBuilderState.getMessage(__filename, "terms_of_use")}]
</a>
) : (
<></>
);
return (
<div className="portrait-area">
<div className="portrait-container">
<img className="portrait" src={icon} alt={selected.name} />
<div className="portrait-area-status">
<p><span className="portrait-area-status-vctype">{selected.voiceChangerType}</span></p>
<p>vol: <span id="status-vol">0</span></p>
<p>buf: <span id="status-buf">0</span> ms</p>
<p>res: <span id="status-res">0</span> ms</p>
</div>
<div className="portrait-area-terms-of-use">
{selectedTermOfUseUrlLink}
<p>
<span className="portrait-area-status-vctype">{selected.voiceChangerType}</span>
</p>
<p>
vol: <span id="status-vol">0</span>
</p>
<p>
buf: <span id="status-buf">0</span> ms
</p>
<p>
res: <span id="status-res">0</span> ms
</p>
</div>
<div className="portrait-area-terms-of-use">{selectedTermOfUseUrlLink}</div>
</div>
</div>
)
}, [selected])
);
}, [selected]);
const [startWithAudioContextCreate, setStartWithAudioContextCreate] = useState<boolean>(false)
const [startWithAudioContextCreate, setStartWithAudioContextCreate] = useState<boolean>(false);
useEffect(() => {
if (!startWithAudioContextCreate) {
return
return;
}
guiState.setIsConverting(true)
start()
}, [startWithAudioContextCreate])
guiState.setIsConverting(true);
start();
}, [startWithAudioContextCreate]);
const startControl = useMemo(() => {
const onStartClicked = async () => {
@ -90,82 +97,86 @@ export const CharacterArea = (_props: CharacterAreaProps) => {
if (!initializedRef.current) {
while (true) {
await new Promise<void>((resolve) => {
setTimeout(resolve, 500)
})
setTimeout(resolve, 500);
});
if (initializedRef.current) {
break
break;
}
}
setStartWithAudioContextCreate(true)
setStartWithAudioContextCreate(true);
} else {
guiState.setIsConverting(true)
await start()
guiState.setIsConverting(true);
await start();
}
} else {
serverSetting.updateServerSettings({ ...serverSetting.serverSetting, serverAudioStated: 1 })
guiState.setIsConverting(true)
serverSetting.updateServerSettings({ ...serverSetting.serverSetting, serverAudioStated: 1 });
guiState.setIsConverting(true);
}
}
};
const onStopClicked = async () => {
if (serverSetting.serverSetting.enableServerAudio == 0) {
guiState.setIsConverting(false)
await stop()
guiState.setIsConverting(false);
await stop();
} else {
guiState.setIsConverting(false)
serverSetting.updateServerSettings({ ...serverSetting.serverSetting, serverAudioStated: 0 })
guiState.setIsConverting(false);
serverSetting.updateServerSettings({ ...serverSetting.serverSetting, serverAudioStated: 0 });
}
}
const startClassName = guiState.isConverting ? "character-area-control-button-active" : "character-area-control-button-stanby"
const stopClassName = guiState.isConverting ? "character-area-control-button-stanby" : "character-area-control-button-active"
};
const startClassName = guiState.isConverting ? "character-area-control-button-active" : "character-area-control-button-stanby";
const stopClassName = guiState.isConverting ? "character-area-control-button-stanby" : "character-area-control-button-active";
return (
<div className="character-area-control">
<div className="character-area-control-buttons">
<div onClick={onStartClicked} className={startClassName}>start</div>
<div onClick={onStopClicked} className={stopClassName}>stop</div>
<div onClick={onStartClicked} className={startClassName}>
start
</div>
<div onClick={onStopClicked} className={stopClassName}>
stop
</div>
</div>
</div>
)
}, [
guiState.isConverting,
start,
stop,
serverSetting.serverSetting,
serverSetting.updateServerSettings
])
);
}, [guiState.isConverting, start, stop, serverSetting.serverSetting, serverSetting.updateServerSettings]);
const gainControl = useMemo(() => {
const currentInputGain = serverSetting.serverSetting.enableServerAudio == 0 ? setting.voiceChangerClientSetting.inputGain : serverSetting.serverSetting.serverInputAudioGain
const inputValueUpdatedAction = serverSetting.serverSetting.enableServerAudio == 0 ?
async (val: number) => {
await setVoiceChangerClientSetting({ ...setting.voiceChangerClientSetting, inputGain: val })
} :
async (val: number) => {
await serverSetting.updateServerSettings({ ...serverSetting.serverSetting, serverInputAudioGain: val })
}
const currentOutputGain = serverSetting.serverSetting.enableServerAudio == 0 ? setting.voiceChangerClientSetting.outputGain : serverSetting.serverSetting.serverOutputAudioGain
const outputValueUpdatedAction = serverSetting.serverSetting.enableServerAudio == 0 ?
async (val: number) => {
await setVoiceChangerClientSetting({ ...setting.voiceChangerClientSetting, outputGain: val })
} :
async (val: number) => {
await serverSetting.updateServerSettings({ ...serverSetting.serverSetting, serverOutputAudioGain: val })
}
const currentInputGain = serverSetting.serverSetting.enableServerAudio == 0 ? setting.voiceChangerClientSetting.inputGain : serverSetting.serverSetting.serverInputAudioGain;
const inputValueUpdatedAction =
serverSetting.serverSetting.enableServerAudio == 0
? async (val: number) => {
await setVoiceChangerClientSetting({ ...setting.voiceChangerClientSetting, inputGain: val });
}
: async (val: number) => {
await serverSetting.updateServerSettings({ ...serverSetting.serverSetting, serverInputAudioGain: val });
};
const currentOutputGain = serverSetting.serverSetting.enableServerAudio == 0 ? setting.voiceChangerClientSetting.outputGain : serverSetting.serverSetting.serverOutputAudioGain;
const outputValueUpdatedAction =
serverSetting.serverSetting.enableServerAudio == 0
? async (val: number) => {
await setVoiceChangerClientSetting({ ...setting.voiceChangerClientSetting, outputGain: val });
}
: async (val: number) => {
await serverSetting.updateServerSettings({ ...serverSetting.serverSetting, serverOutputAudioGain: val });
};
return (
<div className="character-area-control">
<div className="character-area-control-title">
GAIN:
</div>
<div className="character-area-control-title">GAIN:</div>
<div className="character-area-control-field">
<div className="character-area-slider-control">
<span className="character-area-slider-control-kind">in</span>
<span className="character-area-slider-control-slider">
<input type="range" min="0.1" max="10.0" step="0.1" value={currentInputGain} onChange={(e) => {
inputValueUpdatedAction(Number(e.target.value))
}}></input>
<input
type="range"
min="0.1"
max="10.0"
step="0.1"
value={currentInputGain}
onChange={(e) => {
inputValueUpdatedAction(Number(e.target.value));
}}
></input>
</span>
<span className="character-area-slider-control-val">{currentInputGain}</span>
</div>
@ -173,64 +184,72 @@ export const CharacterArea = (_props: CharacterAreaProps) => {
<div className="character-area-slider-control">
<span className="character-area-slider-control-kind">out</span>
<span className="character-area-slider-control-slider">
<input type="range" min="0.1" max="10.0" step="0.1" value={currentOutputGain} onChange={(e) => {
outputValueUpdatedAction(Number(e.target.value))
}}></input>
<input
type="range"
min="0.1"
max="10.0"
step="0.1"
value={currentOutputGain}
onChange={(e) => {
outputValueUpdatedAction(Number(e.target.value));
}}
></input>
</span>
<span className="character-area-slider-control-val">{currentOutputGain}</span>
</div>
</div>
</div>
)
}, [serverSetting.serverSetting, setting, setVoiceChangerClientSetting, serverSetting.updateServerSettings])
);
}, [serverSetting.serverSetting, setting, setVoiceChangerClientSetting, serverSetting.updateServerSettings]);
const modelSlotControl = useMemo(() => {
if (!selected) {
return <></>
return <></>;
}
const onUpdateDefaultClicked = async () => {
await serverSetting.updateModelDefault()
}
await serverSetting.updateModelDefault();
};
const onnxExportButtonAction = async () => {
if (guiState.isConverting) {
alert(messageBuilderState.getMessage(__filename, "alert_onnx"))
return
alert(messageBuilderState.getMessage(__filename, "alert_onnx"));
return;
}
document.getElementById("dialog")?.classList.add("dialog-container-show")
guiState.stateControls.showWaitingCheckbox.updateState(true)
const res = await serverSetting.getOnnx() as OnnxExporterInfo
const a = document.createElement("a")
a.href = res.path
document.getElementById("dialog")?.classList.add("dialog-container-show");
guiState.stateControls.showWaitingCheckbox.updateState(true);
const res = (await serverSetting.getOnnx()) as OnnxExporterInfo;
const a = document.createElement("a");
a.href = res.path;
a.download = res.filename;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
guiState.stateControls.showWaitingCheckbox.updateState(false)
guiState.stateControls.showWaitingCheckbox.updateState(false);
};
}
const exportOnnx = selected.voiceChangerType == "RVC" && selected.modelFile.endsWith("pth") ? (
<div className="character-area-button" onClick={onnxExportButtonAction}>{messageBuilderState.getMessage(__filename, "export_to_onnx")}</div>
) : <></>
const exportOnnx =
selected.voiceChangerType == "RVC" && selected.modelFile.endsWith("pth") ? (
<div className="character-area-button" onClick={onnxExportButtonAction}>
{messageBuilderState.getMessage(__filename, "export_to_onnx")}
</div>
) : (
<></>
);
return (
<div className="character-area-control">
<div className="character-area-control-title">
</div>
<div className="character-area-control-title"></div>
<div className="character-area-control-field">
<div className="character-area-buttons">
<div className="character-area-button" onClick={onUpdateDefaultClicked}>{messageBuilderState.getMessage(__filename, "save_default")}</div>
<div className="character-area-button" onClick={onUpdateDefaultClicked}>
{messageBuilderState.getMessage(__filename, "save_default")}
</div>
{exportOnnx}
</div>
</div>
</div>
)
}, [selected, serverSetting.getOnnx, serverSetting.updateModelDefault])
);
}, [selected, serverSetting.getOnnx, serverSetting.updateModelDefault]);
const characterArea = useMemo(() => {
return (
@ -245,11 +264,12 @@ export const CharacterArea = (_props: CharacterAreaProps) => {
<F0FactorArea />
<SoVitsSVC40SettingArea />
<DDSPSVC30SettingArea />
<DiffusionSVCSettingArea />
{modelSlotControl}
</div>
</div>
)
}, [portrait, startControl, gainControl, modelSlotControl])
);
}, [portrait, startControl, gainControl, modelSlotControl]);
return characterArea
}
return characterArea;
};

View File

@ -122,6 +122,8 @@ export const ServerSettingKey = {
"kStep": "kStep",
"threshold": "threshold",
"speedUp": "speedUp",
"inputSampleRate": "inputSampleRate",
"enableDirectML": "enableDirectML",
} as const
@ -181,6 +183,9 @@ export type VoiceChangerServerSetting = {
kStep: number// DDSP-SVC
threshold: number// DDSP-SVC
speedUp: number // Diffusion-SVC
inputSampleRate: InputSampleRate
enableDirectML: number
}
@ -260,7 +265,21 @@ export type DDSPSVCModelSlot = ModelSlot & {
speakers: { [key: number]: string }
}
export type ModelSlotUnion = RVCModelSlot | MMVCv13ModelSlot | MMVCv15ModelSlot | SoVitsSvc40ModelSlot | DDSPSVCModelSlot
export type DiffusionSVCModelSlot = ModelSlot & {
modelFile: string
dstId: number
samplingRate: number
defaultTune: number
defaultKstep : number
defaultSpeedup: number
kStepMax: number
speakers: { [key: number]: string }
}
export type ModelSlotUnion = RVCModelSlot | MMVCv13ModelSlot | MMVCv15ModelSlot | SoVitsSvc40ModelSlot | DDSPSVCModelSlot | DiffusionSVCModelSlot
type ServerAudioDevice = {
kind: "audioinput" | "audiooutput",
@ -362,6 +381,8 @@ export const DefaultServerSetting: ServerInfo = {
kStep: 120,
threshold: -45,
speedUp: 10,
enableDirectML: 0,
//
status: "ok",

View File

@ -32,7 +32,7 @@ class DiffusionSVC(VoiceChangerModel):
self.feature_buffer: FeatureInOut | None = None
self.prevVol = 0.0
self.slotInfo = slotInfo
def initialize(self):
print("[Voice Changer] [DiffusionSVC] Initializing... ")
@ -138,7 +138,7 @@ class DiffusionSVC(VoiceChangerModel):
if self.pipeline is None:
return np.zeros(convertSize).astype(np.int16) * np.sqrt(vol)
# device = self.pipeline.device
# audio = torch.from_numpy(audio).to(device=device, dtype=torch.float32)
# audio = self.resampler16K(audio)
@ -146,6 +146,8 @@ class DiffusionSVC(VoiceChangerModel):
f0_up_key = self.settings.tran
protect = 0
kStep = self.settings.kStep
speedUp = self.settings.speedUp
embOutputLayer = 12
useFinalProj = False
silenceFrontSec = self.settings.extraConvertSize / self.slotInfo.samplingRate if self.settings.silenceFront else 0. # extaraConvertSize(既にモデルのサンプリングレートにリサンプリング済み)の秒数。モデルのサンプリングレートで処理(★1)。
@ -157,6 +159,8 @@ class DiffusionSVC(VoiceChangerModel):
pitchf,
feature,
f0_up_key,
kStep,
speedUp,
silenceFrontSec,
embOutputLayer,
useFinalProj,
@ -172,7 +176,7 @@ class DiffusionSVC(VoiceChangerModel):
# raise e
return
def __del__(self):
del self.pipeline

View File

@ -11,8 +11,8 @@ class DiffusionSVCSettings:
silentThreshold: float = 0.00001
extraConvertSize: int = 1024 * 4
kstep: int = 20
speedup: int = 10
kStep: int = 20
speedUp: int = 10
silenceFront: int = 1 # 0:off, 1:on
modelSamplingRate: int = 44100
@ -26,7 +26,8 @@ class DiffusionSVCSettings:
"dstId",
"tran",
"extraConvertSize",
"kstep",
"kStep",
"speedUp",
"silenceFront",
]
floatData = ["silentThreshold"]

View File

@ -105,8 +105,8 @@ class DiffusionSVCInferencer(Inferencer):
volume: torch.Tensor,
mask: torch.Tensor,
sid: torch.Tensor,
infer_speedup: int,
k_step: int,
infer_speedup: int,
silence_front: float,
) -> torch.Tensor:
gt_spec = self.naive_model_call(feats, pitch, volume, spk_id=sid, spk_mix_dict=None, aug_shift=0, spk_emb=None)

View File

@ -105,6 +105,8 @@ class Pipeline(object):
pitchf, # np.array [m]
feature, # np.array [m, feat]
f0_up_key,
k_step,
infer_speedup,
silence_front,
embOutputLayer,
useFinalProj,
@ -164,8 +166,8 @@ class Pipeline(object):
volume,
mask,
sid,
infer_speedup=10,
k_step=20,
k_step,
infer_speedup,
silence_front=silence_front
).to(dtype=torch.float32),
-1.0,