WIP: RMVPE

This commit is contained in:
w-okada 2023-07-15 09:34:29 +09:00
parent 7d7702bb79
commit 3ffacaed97
4 changed files with 8 additions and 4 deletions

View File

@ -9,7 +9,6 @@ from Exceptions import (
NotEnoughDataExtimateF0,
)
from voice_changer.DiffusionSVC.inferencer.Inferencer import Inferencer
from voice_changer.DiffusionSVC.inferencer.diffusion_svc_model.F0Extractor import F0_Extractor
from voice_changer.DiffusionSVC.pitchExtractor.PitchExtractor import PitchExtractor
from voice_changer.RVC.embedder.Embedder import Embedder

View File

@ -3,6 +3,7 @@ import numpy as np
from const import PitchExtractorType
from voice_changer.DiffusionSVC.pitchExtractor.PitchExtractor import PitchExtractor
from voice_changer.DiffusionSVC.pitchExtractor.rmvpe.rmvpe import RMVPE
from scipy.ndimage import zoom
class RMVPEPitchExtractor(PitchExtractor):
@ -24,7 +25,9 @@ class RMVPEPitchExtractor(PitchExtractor):
def extract(self, audio: torch.Tensor, pitch, f0_up_key, window, silence_front=0):
start_frame = int(silence_front * self.sapmle_rate / window)
real_silence_front = start_frame * window / self.sapmle_rate
audio = audio[int(np.round(real_silence_front * self.sapmle_rate)):]
silented_frames = int(audio.size(0) // window) + 1
print("[RMVPE AUDI]", audio.device)
print("[RMVPE RMVPE]", self.rmvpe.device)
@ -47,6 +50,9 @@ class RMVPEPitchExtractor(PitchExtractor):
# pd = torchcrepe.filter.median(pd, 3)
# f0[pd < 0.1] = 0
# f0 = f0.squeeze()
resize_factor = silented_frames / len(f0)
f0 = zoom(f0, resize_factor, order=0)
pitch[-f0.shape[0]:] = f0[:pitch.shape[0]]
f0 = pitch

View File

@ -249,7 +249,7 @@ class E2E(nn.Module):
)
# else:
# self.fc = nn.Sequential(
# nn.Linear(3 * N_MELS, N_CLASS), nn.Dropout(0.25), nn.Sigmoid()
# nn.Linear(3 * N_MELS, N_CLASS), nn.Dropout(0.25), nn.Sigmoid()
# )
def forward(self, mel):
@ -392,7 +392,7 @@ class RMVPE:
hidden = hidden.astype("float32")
f0 = self.decode(hidden, thred=thred)
return f0
def to_local_average_cents(self, salience, thred=0.05):
# t0 = ttime()
center = np.argmax(salience, axis=1) # 帧长#index

View File

@ -15,4 +15,3 @@ class VoiceChangerParams:
crepe_onnx_full: str
crepe_onnx_tiny: str
rmvpe: str