skipdiffusion

This commit is contained in:
w-okada 2023-08-06 04:50:42 +09:00
parent 6d4c138821
commit e18138b5d6
8 changed files with 78 additions and 16 deletions

File diff suppressed because one or more lines are too long

View File

@ -23,6 +23,26 @@ export const DiffusionSVCSettingArea = (_props: DiffusionSVCSettingAreaProps) =>
return <></>;
}
const skipDiffusionClass = serverSetting.serverSetting.skipDiffusion == 0 ? "character-area-toggle-button" : "character-area-toggle-button-active";
const skipDiffRow = (
<div className="character-area-control">
<div className="character-area-control-title">Boost</div>
<div className="character-area-control-field">
<div className="character-area-buttons">
<div
className={skipDiffusionClass}
onClick={() => {
serverSetting.updateServerSettings({ ...serverSetting.serverSetting, skipDiffusion: serverSetting.serverSetting.skipDiffusion == 0 ? 1 : 0 });
}}
>
skip diff
</div>
</div>
</div>
</div>
);
const skipValues = getDivisors(serverSetting.serverSetting.kStep);
skipValues.pop();
@ -82,6 +102,7 @@ export const DiffusionSVCSettingArea = (_props: DiffusionSVCSettingAreaProps) =>
);
return (
<>
{skipDiffRow}
{kStepRow}
{speedUpRow}
</>

View File

@ -1338,6 +1338,7 @@ body {
.character-area-control {
display: flex;
gap: 3px;
align-items: center;
.character-area-control-buttons {
display: flex;
flex-direction: row;
@ -1405,6 +1406,35 @@ body {
.character-area-button:hover {
border: solid 2px #faa;
}
.character-area-toggle-button {
border: solid 2px #999;
color: white;
background: #666;
cursor: pointer;
font-size: 0.8rem;
border-radius: 5px;
height: 1.2rem;
padding-left: 2px;
padding-right: 2px;
}
.character-area-toggle-button:hover {
border: solid 2px #faa;
}
.character-area-toggle-button-active {
border: solid 2px #999;
color: white;
background: #844;
cursor: pointer;
font-size: 0.8rem;
border-radius: 5px;
height: 1.2rem;
padding-left: 2px;
padding-right: 2px;
}
}
}
}

View File

@ -124,6 +124,7 @@ export const ServerSettingKey = {
"threshold": "threshold",
"speedUp": "speedUp",
"skipDiffusion": "skipDiffusion",
"inputSampleRate": "inputSampleRate",
"enableDirectML": "enableDirectML",
@ -186,7 +187,7 @@ export type VoiceChangerServerSetting = {
threshold: number// DDSP-SVC
speedUp: number // Diffusion-SVC
skipDiffusion: number // Diffusion-SVC 0:off, 1:on
inputSampleRate: InputSampleRate
enableDirectML: number
@ -403,6 +404,7 @@ export const DefaultServerSetting: ServerInfo = {
threshold: -45,
speedUp: 10,
skipDiffusion: 1,
enableDirectML: 0,
//

View File

@ -180,7 +180,8 @@ class DiffusionSVC(VoiceChangerModel):
silenceFrontSec,
embOutputLayer,
useFinalProj,
protect
protect,
skip_diffusion=self.settings.skipDiffusion,
)
result = audio_out.detach().cpu().numpy()
return result

View File

@ -13,7 +13,7 @@ class DiffusionSVCSettings:
kStep: int = 20
speedUp: int = 10
skipDiffusion: int = 0 # 0:off, 1:on
skipDiffusion: int = 1 # 0:off, 1:on
silenceFront: int = 1 # 0:off, 1:on
modelSamplingRate: int = 44100
@ -30,6 +30,7 @@ class DiffusionSVCSettings:
"kStep",
"speedUp",
"silenceFront",
"skipDiffusion",
]
floatData = ["silentThreshold"]
strData = ["f0Detector"]

View File

@ -112,25 +112,27 @@ class DiffusionSVCInferencer(Inferencer):
k_step: int,
infer_speedup: int,
silence_front: float,
skip_diffusion: bool = True,
) -> torch.Tensor:
with Timer("pre-process", False) as t:
gt_spec = self.naive_model_call(feats, pitch, volume, spk_id=sid, spk_mix_dict=None, aug_shift=0, spk_emb=None)
# gt_spec = self.vocoder.extract(audio_t, 16000)
# gt_spec = torch.cat((gt_spec, gt_spec[:, -1:, :]), 1)
# print("[ ----Timer::1: ]", t.secs)
with Timer("pre-process", False) as t:
out_mel = self.__call__(feats, pitch, volume, spk_id=sid, spk_mix_dict=None, aug_shift=0, gt_spec=gt_spec, infer_speedup=infer_speedup, method='dpm-solver', k_step=k_step, use_tqdm=False, spk_emb=None)
with Timer("pre-process", True) as t:
if skip_diffusion == 0:
out_mel = self.__call__(feats, pitch, volume, spk_id=sid, spk_mix_dict=None, aug_shift=0, gt_spec=gt_spec, infer_speedup=infer_speedup, method='dpm-solver', k_step=k_step, use_tqdm=False, spk_emb=None)
gt_spec = out_mel
print("[ ----Timer::2: ]", t.secs)
# print("[ ----Timer::2: ]", t.secs)
with Timer("pre-process", False) as t: # NOQA
if self.vocoder_onnx is None:
start_frame = int(silence_front * self.vocoder.vocoder_sample_rate / self.vocoder.vocoder_hop_size)
out_wav = self.mel2wav(out_mel, pitch, start_frame=start_frame)
out_wav = self.mel2wav(gt_spec, pitch, start_frame=start_frame)
out_wav *= mask
else:
out_wav = self.vocoder_onnx.infer(out_mel, pitch, silence_front, mask)
out_wav = self.vocoder_onnx.infer(gt_spec, pitch, silence_front, mask)
# print("[ ----Timer::3: ]", t.secs)
return out_wav.squeeze()

View File

@ -21,11 +21,16 @@ class Inferencer(Protocol):
def infer(
self,
audio_t: torch.Tensor,
feats: torch.Tensor,
pitch_length: torch.Tensor,
pitch: torch.Tensor | None,
pitchf: torch.Tensor | None,
pitch: torch.Tensor,
volume: torch.Tensor,
mask: torch.Tensor,
sid: torch.Tensor,
k_step: int,
infer_speedup: int,
silence_front: float,
skip_diffusion: bool = True,
) -> torch.Tensor:
...