diff --git a/server/voice_changer/RVC/pipeline/Pipeline.py b/server/voice_changer/RVC/pipeline/Pipeline.py index 0de9c6df..c9ab5ad5 100644 --- a/server/voice_changer/RVC/pipeline/Pipeline.py +++ b/server/voice_changer/RVC/pipeline/Pipeline.py @@ -189,6 +189,12 @@ class Pipeline(object): pitch = pitch[:, :p_len] pitchf = pitchf[:, :p_len] + feats_len = feats.shape[1] + if pitch is not None and pitchf is not None: + pitch = pitch[:, -feats_len:] + pitchf = pitchf[:, -feats_len:] + p_len = torch.tensor([feats_len], device=self.device).long() + # pitchの推定が上手くいかない(pitchf=0)場合、検索前の特徴を混ぜる # pitchffの作り方の疑問はあるが、本家通りなので、このまま使うことにする。 # https://github.com/w-okada/voice-changer/pull/276#issuecomment-1571336929 @@ -201,23 +207,11 @@ class Pipeline(object): feats = feats.to(feats0.dtype) p_len = torch.tensor([p_len], device=self.device).long() - feats_buffer = feats.squeeze(0).detach().cpu() - if pitchf is not None: - pitchf_buffer = pitchf.squeeze(0).detach().cpu() - else: - pitchf_buffer = None # apply silent front for inference if type(self.inferencer) in [OnnxRVCInferencer, OnnxRVCInferencerNono]: npyOffset = math.floor(silence_front * 16000) // 360 feats = feats[:, npyOffset * 2 :, :] - - feats_len = feats.shape[1] - if pitch is not None and pitchf is not None: - pitch = pitch[:, -feats_len:] - pitchf = pitchf[:, -feats_len:] - p_len = torch.tensor([feats_len], device=self.device).long() - # 推論実行 try: with torch.no_grad(): @@ -237,6 +231,12 @@ class Pipeline(object): else: raise e + feats_buffer = feats.squeeze(0).detach().cpu() + if pitchf is not None: + pitchf_buffer = pitchf.squeeze(0).detach().cpu() + else: + pitchf_buffer = None + del p_len, padding_mask, pitch, pitchf, feats torch.cuda.empty_cache()