Add chihaya_jinja_sample

Web Edition improvement(16k test)

bugfix:
- merge slot
- servermode append error
This commit is contained in:
w-okada 2023-11-29 00:30:52 +09:00
parent b24c781a72
commit 17597fdaab
10 changed files with 81 additions and 81 deletions

3
.gitignore vendored
View File

@ -58,6 +58,9 @@ server/samples_0003_o.json
server/samples_0003_t2.json server/samples_0003_t2.json
server/samples_0003_o2.json server/samples_0003_o2.json
server/samples_0003_d2.json server/samples_0003_d2.json
server/samples_0004_t.json
server/samples_0004_o.json
server/samples_0004_d.json
server/test_official_v1_v2.json server/test_official_v1_v2.json
server/test_ddpn_v1_v2.json server/test_ddpn_v1_v2.json

View File

@ -45,6 +45,7 @@ export type WebInfoStateAndMethod = WebInfoState & {
const ModelSampleRateStr = { const ModelSampleRateStr = {
"40k": "40k", "40k": "40k",
"32k": "32k", "32k": "32k",
"16k": "16k",
} as const; } as const;
type ModelSampleRateStr = (typeof ModelSampleRateStr)[keyof typeof ModelSampleRateStr]; type ModelSampleRateStr = (typeof ModelSampleRateStr)[keyof typeof ModelSampleRateStr];
@ -71,18 +72,22 @@ const noF0ModelUrl: { [modelType in VoiceChangerType]: { [inputLength in InputLe
"24000": { "24000": {
"40k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_40k_nof0_24000.bin", "40k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_40k_nof0_24000.bin",
"32k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_32k_nof0_24000.bin", "32k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_32k_nof0_24000.bin",
"16k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_16k_nof0_24000.bin",
}, },
"16000": { "16000": {
"40k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_40k_nof0_16000.bin", "40k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_40k_nof0_16000.bin",
"32k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_32k_nof0_16000.bin", "32k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_32k_nof0_16000.bin",
"16k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_16k_nof0_16000.bin",
}, },
"12000": { "12000": {
"40k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_40k_nof0_12000.bin", "40k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_40k_nof0_12000.bin",
"32k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_32k_nof0_12000.bin", "32k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_32k_nof0_12000.bin",
"16k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_16k_nof0_12000.bin",
}, },
"8000": { "8000": {
"40k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_40k_nof0_8000.bin", "40k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_40k_nof0_8000.bin",
"32k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_32k_nof0_8000.bin", "32k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_32k_nof0_8000.bin",
"16k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_16k_nof0_8000.bin",
}, },
}, },
}; };
@ -109,18 +114,22 @@ const f0ModelUrl: { [modelType in VoiceChangerType]: { [inputLength in InputLeng
"24000": { "24000": {
"40k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_40k_f0_24000.bin", "40k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_40k_f0_24000.bin",
"32k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_32k_f0_24000.bin", "32k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_32k_f0_24000.bin",
"16k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_16k_f0_24000.bin",
}, },
"16000": { "16000": {
"40k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_40k_f0_16000.bin", "40k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_40k_f0_16000.bin",
"32k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_32k_f0_16000.bin", "32k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_32k_f0_16000.bin",
"16k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_16k_f0_16000.bin",
}, },
"12000": { "12000": {
"40k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_40k_f0_12000.bin", "40k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_40k_f0_12000.bin",
"32k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_32k_f0_12000.bin", "32k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_32k_f0_12000.bin",
"16k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_16k_f0_12000.bin",
}, },
"8000": { "8000": {
"40k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_40k_f0_8000.bin", "40k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_40k_f0_8000.bin",
"32k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_32k_f0_8000.bin", "32k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_32k_f0_8000.bin",
"16k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_16k_f0_8000.bin",
}, },
}, },
}; };

View File

@ -50,7 +50,7 @@ export const Portrait = (_props: PortraitProps) => {
} }
vol.innerText = volume.toFixed(4); vol.innerText = volume.toFixed(4);
if (webEdition) { if (webEdition) {
buf.innerText = webInfoState.responseTimeInfo.realDuration.toString() ?? "0"; buf.innerText = bufferingTime.toString();
res.innerText = webInfoState.responseTimeInfo.responseTime.toString() ?? "0"; res.innerText = webInfoState.responseTimeInfo.responseTime.toString() ?? "0";
rtf.innerText = webInfoState.responseTimeInfo.rtf.toString() ?? "0"; rtf.innerText = webInfoState.responseTimeInfo.rtf.toString() ?? "0";
} else { } else {

View File

@ -55,6 +55,7 @@ export const WebEditionSettingArea = (_props: WebEditionSettingAreaProps) => {
</div> </div>
); );
const sr16KClassName = "character-area-control-button" + (webInfoState.voiceChangerConfig.sampleRate == "16k" ? " character-area-control-button-active" : " character-area-control-button-stanby");
const sr32KClassName = "character-area-control-button" + (webInfoState.voiceChangerConfig.sampleRate == "32k" ? " character-area-control-button-active" : " character-area-control-button-stanby"); const sr32KClassName = "character-area-control-button" + (webInfoState.voiceChangerConfig.sampleRate == "32k" ? " character-area-control-button-active" : " character-area-control-button-stanby");
const sr40KClassName = "character-area-control-button" + (webInfoState.voiceChangerConfig.sampleRate == "40k" ? " character-area-control-button-active" : " character-area-control-button-stanby"); const sr40KClassName = "character-area-control-button" + (webInfoState.voiceChangerConfig.sampleRate == "40k" ? " character-area-control-button-active" : " character-area-control-button-stanby");
const sampleRate = ( const sampleRate = (
@ -64,6 +65,15 @@ export const WebEditionSettingArea = (_props: WebEditionSettingAreaProps) => {
<div className="character-area-slider-control"> <div className="character-area-slider-control">
<span className="character-area-slider-control-kind"></span> <span className="character-area-slider-control-kind"></span>
<span className="character-area-control-buttons"> <span className="character-area-control-buttons">
<span
className={!readyForConfig ? "character-area-control-button-disable" : sr16KClassName}
onClick={() => {
if (webInfoState.voiceChangerConfig.sampleRate == "16k" || !readyForConfig) return;
webInfoState.setVoiceChangerConfig("rvcv2", "16k", webInfoState.voiceChangerConfig.useF0, webInfoState.voiceChangerConfig.inputLength);
}}
>
16k
</span>
<span <span
className={!readyForConfig ? "character-area-control-button-disable" : sr32KClassName} className={!readyForConfig ? "character-area-control-button-disable" : sr32KClassName}
onClick={() => { onClick={() => {

View File

@ -98,11 +98,9 @@ RVCSampleMode: TypeAlias = Literal[
def getSampleJsonAndModelIds(mode: RVCSampleMode): def getSampleJsonAndModelIds(mode: RVCSampleMode):
if mode == "production": if mode == "production":
return [ return [
# "https://huggingface.co/wok000/vcclient_model/raw/main/samples_0001.json", "https://huggingface.co/wok000/vcclient_model/raw/main/samples_0004_t.json",
# "https://huggingface.co/wok000/vcclient_model/raw/main/samples_0002.json", "https://huggingface.co/wok000/vcclient_model/raw/main/samples_0004_o.json",
"https://huggingface.co/wok000/vcclient_model/raw/main/samples_0003_t2.json", "https://huggingface.co/wok000/vcclient_model/raw/main/samples_0004_d.json",
"https://huggingface.co/wok000/vcclient_model/raw/main/samples_0003_o2.json",
"https://huggingface.co/wok000/vcclient_model/raw/main/samples_0003_d2.json",
], [ ], [
("Tsukuyomi-chan_o", {"useIndex": False}), ("Tsukuyomi-chan_o", {"useIndex": False}),
("Amitaro_o", {"useIndex": False}), ("Amitaro_o", {"useIndex": False}),

View File

@ -7,7 +7,7 @@ from voice_changer.DiffusionSVC.inferencer.diffusion_svc_model.diffusion.vocoder
from voice_changer.DiffusionSVC.inferencer.onnx.VocoderOnnx import VocoderOnnx from voice_changer.DiffusionSVC.inferencer.onnx.VocoderOnnx import VocoderOnnx
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
from voice_changer.utils.Timer import Timer from voice_changer.utils.Timer import Timer2
class DiffusionSVCInferencer(Inferencer): class DiffusionSVCInferencer(Inferencer):
@ -49,18 +49,14 @@ class DiffusionSVCInferencer(Inferencer):
return model_block_size, model_sampling_rate return model_block_size, model_sampling_rate
@torch.no_grad() # 最基本推理代码,将输入标准化为tensor,只与mel打交道 @torch.no_grad() # 最基本推理代码,将输入标准化为tensor,只与mel打交道
def __call__(self, units, f0, volume, spk_id=1, spk_mix_dict=None, aug_shift=0, def __call__(self, units, f0, volume, spk_id=1, spk_mix_dict=None, aug_shift=0, gt_spec=None, infer_speedup=10, method="dpm-solver", k_step=None, use_tqdm=True, spk_emb=None):
gt_spec=None, infer_speedup=10, method='dpm-solver', k_step=None, use_tqdm=True,
spk_emb=None):
if self.diff_args.model.k_step_max is not None: if self.diff_args.model.k_step_max is not None:
if k_step is None: if k_step is None:
raise ValueError("k_step must not None when Shallow Diffusion Model inferring") raise ValueError("k_step must not None when Shallow Diffusion Model inferring")
if k_step > int(self.diff_args.model.k_step_max): if k_step > int(self.diff_args.model.k_step_max):
raise ValueError("k_step must <= k_step_max of Shallow Diffusion Model") raise ValueError("k_step must <= k_step_max of Shallow Diffusion Model")
if gt_spec is None: if gt_spec is None:
raise ValueError("gt_spec must not None when Shallow Diffusion Model inferring, gt_spec can from " raise ValueError("gt_spec must not None when Shallow Diffusion Model inferring, gt_spec can from " "input mel or output of naive model")
"input mel or output of naive model")
aug_shift = torch.from_numpy(np.array([[float(aug_shift)]])).float().to(self.dev) aug_shift = torch.from_numpy(np.array([[float(aug_shift)]])).float().to(self.dev)
@ -75,8 +71,7 @@ class DiffusionSVCInferencer(Inferencer):
return self.diff_model(units, f0, volume, spk_id=spk_id, spk_mix_dict=spk_mix_dict, aug_shift=aug_shift, gt_spec=gt_spec, infer=True, infer_speedup=infer_speedup, method=method, k_step=k_step, use_tqdm=use_tqdm, spk_emb=spk_emb, spk_emb_dict=spk_emb_dict) return self.diff_model(units, f0, volume, spk_id=spk_id, spk_mix_dict=spk_mix_dict, aug_shift=aug_shift, gt_spec=gt_spec, infer=True, infer_speedup=infer_speedup, method=method, k_step=k_step, use_tqdm=use_tqdm, spk_emb=spk_emb, spk_emb_dict=spk_emb_dict)
@torch.no_grad() @torch.no_grad()
def naive_model_call(self, units, f0, volume, spk_id=1, spk_mix_dict=None, def naive_model_call(self, units, f0, volume, spk_id=1, spk_mix_dict=None, aug_shift=0, spk_emb=None):
aug_shift=0, spk_emb=None):
# spk_id # spk_id
spk_emb_dict = None spk_emb_dict = None
if self.diff_args.model.use_speaker_encoder: # with speaker encoder if self.diff_args.model.use_speaker_encoder: # with speaker encoder
@ -85,9 +80,7 @@ class DiffusionSVCInferencer(Inferencer):
else: else:
spk_id = torch.LongTensor(np.array([[int(spk_id)]])).to(self.dev) spk_id = torch.LongTensor(np.array([[int(spk_id)]])).to(self.dev)
aug_shift = torch.from_numpy(np.array([[float(aug_shift)]])).float().to(self.dev) aug_shift = torch.from_numpy(np.array([[float(aug_shift)]])).float().to(self.dev)
out_spec = self.naive_model(units, f0, volume, spk_id=spk_id, spk_mix_dict=spk_mix_dict, out_spec = self.naive_model(units, f0, volume, spk_id=spk_id, spk_mix_dict=spk_mix_dict, aug_shift=aug_shift, infer=True, spk_emb=spk_emb, spk_emb_dict=spk_emb_dict)
aug_shift=aug_shift, infer=True,
spk_emb=spk_emb, spk_emb_dict=spk_emb_dict)
return out_spec return out_spec
@torch.no_grad() @torch.no_grad()
@ -114,19 +107,18 @@ class DiffusionSVCInferencer(Inferencer):
silence_front: float, silence_front: float,
skip_diffusion: bool = True, skip_diffusion: bool = True,
) -> torch.Tensor: ) -> torch.Tensor:
with Timer("pre-process", False) as t: with Timer2("pre-process", False) as t:
gt_spec = self.naive_model_call(feats, pitch, volume, spk_id=sid, spk_mix_dict=None, aug_shift=0, spk_emb=None) gt_spec = self.naive_model_call(feats, pitch, volume, spk_id=sid, spk_mix_dict=None, aug_shift=0, spk_emb=None)
# print("[ ----Timer::1: ]", t.secs) # print("[ ----Timer::1: ]", t.secs)
with Timer("pre-process", False) as t: with Timer2("pre-process", False) as t:
if skip_diffusion == 0: if skip_diffusion == 0:
out_mel = self.__call__(feats, pitch, volume, spk_id=sid, spk_mix_dict=None, aug_shift=0, gt_spec=gt_spec, infer_speedup=infer_speedup, method='dpm-solver', k_step=k_step, use_tqdm=False, spk_emb=None) out_mel = self.__call__(feats, pitch, volume, spk_id=sid, spk_mix_dict=None, aug_shift=0, gt_spec=gt_spec, infer_speedup=infer_speedup, method="dpm-solver", k_step=k_step, use_tqdm=False, spk_emb=None)
gt_spec = out_mel gt_spec = out_mel
# print("[ ----Timer::2: ]", t.secs) # print("[ ----Timer::2: ]", t.secs)
with Timer2("pre-process", False) as t: # NOQA
with Timer("pre-process", False) as t: # NOQA
if self.vocoder_onnx is None: if self.vocoder_onnx is None:
start_frame = int(silence_front * self.vocoder.vocoder_sample_rate / self.vocoder.vocoder_hop_size) start_frame = int(silence_front * self.vocoder.vocoder_sample_rate / self.vocoder.vocoder_hop_size)
out_wav = self.mel2wav(gt_spec, pitch, start_frame=start_frame) out_wav = self.mel2wav(gt_spec, pitch, start_frame=start_frame)

View File

@ -17,7 +17,7 @@ from voice_changer.RVC.embedder.Embedder import Embedder
from voice_changer.common.VolumeExtractor import VolumeExtractor from voice_changer.common.VolumeExtractor import VolumeExtractor
from torchaudio.transforms import Resample from torchaudio.transforms import Resample
from voice_changer.utils.Timer import Timer from voice_changer.utils.Timer import Timer2
logger = VoiceChangaerLogger.get_instance().getLogger() logger = VoiceChangaerLogger.get_instance().getLogger()
@ -45,7 +45,7 @@ class Pipeline(object):
device, device,
isHalf, isHalf,
resamplerIn: Resample, resamplerIn: Resample,
resamplerOut: Resample resamplerOut: Resample,
): ):
self.inferencer = inferencer self.inferencer = inferencer
inferencer_block_size, inferencer_sampling_rate = inferencer.getConfig() inferencer_block_size, inferencer_sampling_rate = inferencer.getConfig()
@ -64,7 +64,7 @@ class Pipeline(object):
logger.info("GENERATE INFERENCER" + str(self.inferencer)) logger.info("GENERATE INFERENCER" + str(self.inferencer))
logger.info("GENERATE EMBEDDER" + str(self.embedder)) logger.info("GENERATE EMBEDDER" + str(self.embedder))
logger.info("GENERATE PITCH EXTRACTOR" + str(self.pitchExtractor)) logger.info("GENERATE PITCH EXTRACTOR" + str(self.pitchExtractor))
self.targetSR = targetSR self.targetSR = targetSR
self.device = device self.device = device
self.isHalf = False self.isHalf = False
@ -103,7 +103,7 @@ class Pipeline(object):
skip_diffusion=True, skip_diffusion=True,
): ):
# print("---------- pipe line --------------------") # print("---------- pipe line --------------------")
with Timer("pre-process", False) as t: with Timer2("pre-process", False) as t:
audio_t = torch.from_numpy(audio).float().unsqueeze(0).to(self.device) audio_t = torch.from_numpy(audio).float().unsqueeze(0).to(self.device)
audio16k = self.resamplerIn(audio_t) audio16k = self.resamplerIn(audio_t)
volume, mask = self.extract_volume_and_mask(audio16k, threshold=-60.0) volume, mask = self.extract_volume_and_mask(audio16k, threshold=-60.0)
@ -111,7 +111,7 @@ class Pipeline(object):
n_frames = int(audio16k.size(-1) // self.hop_size + 1) n_frames = int(audio16k.size(-1) // self.hop_size + 1)
# print("[Timer::1: ]", t.secs) # print("[Timer::1: ]", t.secs)
with Timer("pre-process", False) as t: with Timer2("pre-process", False) as t:
# ピッチ検出 # ピッチ検出
try: try:
# pitch = self.pitchExtractor.extract( # pitch = self.pitchExtractor.extract(
@ -141,8 +141,7 @@ class Pipeline(object):
feats = feats.view(1, -1) feats = feats.view(1, -1)
# print("[Timer::2: ]", t.secs) # print("[Timer::2: ]", t.secs)
with Timer("pre-process", False) as t: with Timer2("pre-process", False) as t:
# embedding # embedding
with autocast(enabled=self.isHalf): with autocast(enabled=self.isHalf):
try: try:
@ -156,28 +155,17 @@ class Pipeline(object):
raise DeviceChangingException() raise DeviceChangingException()
else: else:
raise e raise e
feats = F.interpolate(feats.permute(0, 2, 1), size=int(n_frames), mode='nearest').permute(0, 2, 1) feats = F.interpolate(feats.permute(0, 2, 1), size=int(n_frames), mode="nearest").permute(0, 2, 1)
# print("[Timer::3: ]", t.secs) # print("[Timer::3: ]", t.secs)
with Timer("pre-process", False) as t: with Timer2("pre-process", False) as t:
# 推論実行 # 推論実行
try: try:
with torch.no_grad(): with torch.no_grad():
with autocast(enabled=self.isHalf): with autocast(enabled=self.isHalf):
audio1 = ( audio1 = (
torch.clip( torch.clip(
self.inferencer.infer( self.inferencer.infer(audio16k, feats, pitch.unsqueeze(-1), volume, mask, sid, k_step, infer_speedup, silence_front=silence_front, skip_diffusion=skip_diffusion).to(dtype=torch.float32),
audio16k,
feats,
pitch.unsqueeze(-1),
volume,
mask,
sid,
k_step,
infer_speedup,
silence_front=silence_front,
skip_diffusion=skip_diffusion
).to(dtype=torch.float32),
-1.0, -1.0,
1.0, 1.0,
) )
@ -191,7 +179,7 @@ class Pipeline(object):
raise e raise e
# print("[Timer::4: ]", t.secs) # print("[Timer::4: ]", t.secs)
with Timer("pre-process", False) as t: # NOQA with Timer2("pre-process", False) as t: # NOQA
feats_buffer = feats.squeeze(0).detach().cpu() feats_buffer = feats.squeeze(0).detach().cpu()
if pitch is not None: if pitch is not None:
pitch_buffer = pitch.squeeze(0).detach().cpu() pitch_buffer = pitch.squeeze(0).detach().cpu()

View File

@ -9,7 +9,7 @@ from mods.log_control import VoiceChangaerLogger
from voice_changer.Local.AudioDeviceList import checkSamplingRate, list_audio_device from voice_changer.Local.AudioDeviceList import checkSamplingRate, list_audio_device
import time import time
import sounddevice as sd import sounddevice as sd
from voice_changer.utils.Timer import Timer from voice_changer.utils.Timer import Timer2
import librosa import librosa
from voice_changer.utils.VoiceChangerModel import AudioInOut from voice_changer.utils.VoiceChangerModel import AudioInOut
@ -139,7 +139,7 @@ class ServerDevice:
return out_wav, times return out_wav, times
def _processDataWithTime(self, indata: np.ndarray): def _processDataWithTime(self, indata: np.ndarray):
with Timer("all_inference_time") as t: with Timer2("all_inference_time", False) as t:
out_wav, times = self._processData(indata) out_wav, times = self._processData(indata)
all_inference_time = t.secs all_inference_time = t.secs
self.performance = [all_inference_time] + times self.performance = [all_inference_time] + times

View File

@ -364,7 +364,7 @@ class VoiceChangerManager(ServerDeviceCallbacks):
req = json.loads(request) req = json.loads(request)
req = ModelMergerRequest(**req) req = ModelMergerRequest(**req)
req.files = [MergeElement(**f) for f in req.files] req.files = [MergeElement(**f) for f in req.files]
slot = len(self.modelSlotManager.getAllSlotInfo()) - 1 slot = len(self.modelSlotManager.getAllSlotInfo()) - 2 # Beatrice-JVS が追加されたので -1 -> -2
if req.voiceChangerType == "RVC": if req.voiceChangerType == "RVC":
merged = RVCModelMerger.merge_models(self.params, req, slot) merged = RVCModelMerger.merge_models(self.params, req, slot)
loadParam = LoadModelParams(voiceChangerType="RVC", slot=slot, isSampleMode=False, sampleId="", files=[LoadModelParamFile(name=os.path.basename(merged), kind="rvcModel", dir="")], params={}) loadParam = LoadModelParams(voiceChangerType="RVC", slot=slot, isSampleMode=False, sampleId="", files=[LoadModelParamFile(name=os.path.basename(merged), kind="rvcModel", dir="")], params={})

View File

@ -3,45 +3,45 @@ import inspect
from typing import Dict, List from typing import Dict, List
class Timer(object): # class Timer(object):
storedSecs: Dict[str, Dict[str, List[float]]] = {} # Class variable # storedSecs: Dict[str, Dict[str, List[float]]] = {} # Class variable
def __init__(self, title: str, enalbe: bool = True): # def __init__(self, title: str, enalbe: bool = True):
self.title = title # self.title = title
self.enable = enalbe # self.enable = enalbe
self.secs = 0 # self.secs = 0
self.msecs = 0 # self.msecs = 0
self.avrSecs = 0 # self.avrSecs = 0
if self.enable is False: # if self.enable is False:
return # return
self.maxStores = 10 # self.maxStores = 10
current_frame = inspect.currentframe() # current_frame = inspect.currentframe()
caller_frame = inspect.getouterframes(current_frame, 2) # caller_frame = inspect.getouterframes(current_frame, 2)
frame = caller_frame[1] # frame = caller_frame[1]
filename = frame.filename # filename = frame.filename
line_number = frame.lineno # line_number = frame.lineno
self.key = f"{title}_{filename}_{line_number}" # self.key = f"{title}_{filename}_{line_number}"
if self.key not in self.storedSecs: # if self.key not in self.storedSecs:
self.storedSecs[self.key] = {} # self.storedSecs[self.key] = {}
def __enter__(self): # def __enter__(self):
if self.enable is False: # if self.enable is False:
return # return
self.start = time.time() # self.start = time.time()
return self # return self
def __exit__(self, *_): # def __exit__(self, *_):
if self.enable is False: # if self.enable is False:
return # return
self.end = time.time() # self.end = time.time()
self.secs = self.end - self.start # self.secs = self.end - self.start
self.msecs = self.secs * 1000 # millisecs # self.msecs = self.secs * 1000 # millisecs
self.storedSecs[self.key].append(self.secs) # self.storedSecs[self.key].append(self.secs)
self.storedSecs[self.key] = self.storedSecs[self.key][-self.maxStores :] # self.storedSecs[self.key] = self.storedSecs[self.key][-self.maxStores :]
self.avrSecs = sum(self.storedSecs[self.key]) / len(self.storedSecs[self.key]) # self.avrSecs = sum(self.storedSecs[self.key]) / len(self.storedSecs[self.key])
class Timer2(object): class Timer2(object):