mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-03-13 19:34:02 +03:00
skipdiffusion
This commit is contained in:
parent
6d4c138821
commit
e18138b5d6
6
client/demo/dist/index.js
vendored
6
client/demo/dist/index.js
vendored
File diff suppressed because one or more lines are too long
@ -23,6 +23,26 @@ export const DiffusionSVCSettingArea = (_props: DiffusionSVCSettingAreaProps) =>
|
||||
return <></>;
|
||||
}
|
||||
|
||||
const skipDiffusionClass = serverSetting.serverSetting.skipDiffusion == 0 ? "character-area-toggle-button" : "character-area-toggle-button-active";
|
||||
|
||||
const skipDiffRow = (
|
||||
<div className="character-area-control">
|
||||
<div className="character-area-control-title">Boost</div>
|
||||
<div className="character-area-control-field">
|
||||
<div className="character-area-buttons">
|
||||
<div
|
||||
className={skipDiffusionClass}
|
||||
onClick={() => {
|
||||
serverSetting.updateServerSettings({ ...serverSetting.serverSetting, skipDiffusion: serverSetting.serverSetting.skipDiffusion == 0 ? 1 : 0 });
|
||||
}}
|
||||
>
|
||||
skip diff
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
|
||||
const skipValues = getDivisors(serverSetting.serverSetting.kStep);
|
||||
skipValues.pop();
|
||||
|
||||
@ -82,6 +102,7 @@ export const DiffusionSVCSettingArea = (_props: DiffusionSVCSettingAreaProps) =>
|
||||
);
|
||||
return (
|
||||
<>
|
||||
{skipDiffRow}
|
||||
{kStepRow}
|
||||
{speedUpRow}
|
||||
</>
|
||||
|
@ -1338,6 +1338,7 @@ body {
|
||||
.character-area-control {
|
||||
display: flex;
|
||||
gap: 3px;
|
||||
align-items: center;
|
||||
.character-area-control-buttons {
|
||||
display: flex;
|
||||
flex-direction: row;
|
||||
@ -1405,6 +1406,35 @@ body {
|
||||
.character-area-button:hover {
|
||||
border: solid 2px #faa;
|
||||
}
|
||||
.character-area-toggle-button {
|
||||
border: solid 2px #999;
|
||||
color: white;
|
||||
background: #666;
|
||||
|
||||
cursor: pointer;
|
||||
|
||||
font-size: 0.8rem;
|
||||
border-radius: 5px;
|
||||
height: 1.2rem;
|
||||
padding-left: 2px;
|
||||
padding-right: 2px;
|
||||
}
|
||||
.character-area-toggle-button:hover {
|
||||
border: solid 2px #faa;
|
||||
}
|
||||
.character-area-toggle-button-active {
|
||||
border: solid 2px #999;
|
||||
color: white;
|
||||
background: #844;
|
||||
|
||||
cursor: pointer;
|
||||
|
||||
font-size: 0.8rem;
|
||||
border-radius: 5px;
|
||||
height: 1.2rem;
|
||||
padding-left: 2px;
|
||||
padding-right: 2px;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -124,6 +124,7 @@ export const ServerSettingKey = {
|
||||
"threshold": "threshold",
|
||||
|
||||
"speedUp": "speedUp",
|
||||
"skipDiffusion": "skipDiffusion",
|
||||
|
||||
"inputSampleRate": "inputSampleRate",
|
||||
"enableDirectML": "enableDirectML",
|
||||
@ -186,7 +187,7 @@ export type VoiceChangerServerSetting = {
|
||||
threshold: number// DDSP-SVC
|
||||
|
||||
speedUp: number // Diffusion-SVC
|
||||
|
||||
skipDiffusion: number // Diffusion-SVC 0:off, 1:on
|
||||
|
||||
inputSampleRate: InputSampleRate
|
||||
enableDirectML: number
|
||||
@ -403,6 +404,7 @@ export const DefaultServerSetting: ServerInfo = {
|
||||
threshold: -45,
|
||||
|
||||
speedUp: 10,
|
||||
skipDiffusion: 1,
|
||||
|
||||
enableDirectML: 0,
|
||||
//
|
||||
|
@ -180,7 +180,8 @@ class DiffusionSVC(VoiceChangerModel):
|
||||
silenceFrontSec,
|
||||
embOutputLayer,
|
||||
useFinalProj,
|
||||
protect
|
||||
protect,
|
||||
skip_diffusion=self.settings.skipDiffusion,
|
||||
)
|
||||
result = audio_out.detach().cpu().numpy()
|
||||
return result
|
||||
|
@ -13,7 +13,7 @@ class DiffusionSVCSettings:
|
||||
|
||||
kStep: int = 20
|
||||
speedUp: int = 10
|
||||
skipDiffusion: int = 0 # 0:off, 1:on
|
||||
skipDiffusion: int = 1 # 0:off, 1:on
|
||||
|
||||
silenceFront: int = 1 # 0:off, 1:on
|
||||
modelSamplingRate: int = 44100
|
||||
@ -30,6 +30,7 @@ class DiffusionSVCSettings:
|
||||
"kStep",
|
||||
"speedUp",
|
||||
"silenceFront",
|
||||
"skipDiffusion",
|
||||
]
|
||||
floatData = ["silentThreshold"]
|
||||
strData = ["f0Detector"]
|
||||
|
@ -112,25 +112,27 @@ class DiffusionSVCInferencer(Inferencer):
|
||||
k_step: int,
|
||||
infer_speedup: int,
|
||||
silence_front: float,
|
||||
skip_diffusion: bool = True,
|
||||
) -> torch.Tensor:
|
||||
with Timer("pre-process", False) as t:
|
||||
gt_spec = self.naive_model_call(feats, pitch, volume, spk_id=sid, spk_mix_dict=None, aug_shift=0, spk_emb=None)
|
||||
# gt_spec = self.vocoder.extract(audio_t, 16000)
|
||||
# gt_spec = torch.cat((gt_spec, gt_spec[:, -1:, :]), 1)
|
||||
|
||||
# print("[ ----Timer::1: ]", t.secs)
|
||||
|
||||
with Timer("pre-process", False) as t:
|
||||
out_mel = self.__call__(feats, pitch, volume, spk_id=sid, spk_mix_dict=None, aug_shift=0, gt_spec=gt_spec, infer_speedup=infer_speedup, method='dpm-solver', k_step=k_step, use_tqdm=False, spk_emb=None)
|
||||
with Timer("pre-process", True) as t:
|
||||
if skip_diffusion == 0:
|
||||
out_mel = self.__call__(feats, pitch, volume, spk_id=sid, spk_mix_dict=None, aug_shift=0, gt_spec=gt_spec, infer_speedup=infer_speedup, method='dpm-solver', k_step=k_step, use_tqdm=False, spk_emb=None)
|
||||
gt_spec = out_mel
|
||||
print("[ ----Timer::2: ]", t.secs)
|
||||
|
||||
|
||||
# print("[ ----Timer::2: ]", t.secs)
|
||||
with Timer("pre-process", False) as t: # NOQA
|
||||
if self.vocoder_onnx is None:
|
||||
start_frame = int(silence_front * self.vocoder.vocoder_sample_rate / self.vocoder.vocoder_hop_size)
|
||||
out_wav = self.mel2wav(out_mel, pitch, start_frame=start_frame)
|
||||
out_wav = self.mel2wav(gt_spec, pitch, start_frame=start_frame)
|
||||
out_wav *= mask
|
||||
else:
|
||||
out_wav = self.vocoder_onnx.infer(out_mel, pitch, silence_front, mask)
|
||||
out_wav = self.vocoder_onnx.infer(gt_spec, pitch, silence_front, mask)
|
||||
# print("[ ----Timer::3: ]", t.secs)
|
||||
|
||||
return out_wav.squeeze()
|
||||
|
@ -21,11 +21,16 @@ class Inferencer(Protocol):
|
||||
|
||||
def infer(
|
||||
self,
|
||||
audio_t: torch.Tensor,
|
||||
feats: torch.Tensor,
|
||||
pitch_length: torch.Tensor,
|
||||
pitch: torch.Tensor | None,
|
||||
pitchf: torch.Tensor | None,
|
||||
pitch: torch.Tensor,
|
||||
volume: torch.Tensor,
|
||||
mask: torch.Tensor,
|
||||
sid: torch.Tensor,
|
||||
k_step: int,
|
||||
infer_speedup: int,
|
||||
silence_front: float,
|
||||
skip_diffusion: bool = True,
|
||||
) -> torch.Tensor:
|
||||
...
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user