mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-01-23 21:45:00 +03:00
WIP: RMVPE
This commit is contained in:
parent
7d7702bb79
commit
3ffacaed97
@ -9,7 +9,6 @@ from Exceptions import (
|
||||
NotEnoughDataExtimateF0,
|
||||
)
|
||||
from voice_changer.DiffusionSVC.inferencer.Inferencer import Inferencer
|
||||
from voice_changer.DiffusionSVC.inferencer.diffusion_svc_model.F0Extractor import F0_Extractor
|
||||
from voice_changer.DiffusionSVC.pitchExtractor.PitchExtractor import PitchExtractor
|
||||
|
||||
from voice_changer.RVC.embedder.Embedder import Embedder
|
||||
|
@ -3,6 +3,7 @@ import numpy as np
|
||||
from const import PitchExtractorType
|
||||
from voice_changer.DiffusionSVC.pitchExtractor.PitchExtractor import PitchExtractor
|
||||
from voice_changer.DiffusionSVC.pitchExtractor.rmvpe.rmvpe import RMVPE
|
||||
from scipy.ndimage import zoom
|
||||
|
||||
|
||||
class RMVPEPitchExtractor(PitchExtractor):
|
||||
@ -24,7 +25,9 @@ class RMVPEPitchExtractor(PitchExtractor):
|
||||
def extract(self, audio: torch.Tensor, pitch, f0_up_key, window, silence_front=0):
|
||||
start_frame = int(silence_front * self.sapmle_rate / window)
|
||||
real_silence_front = start_frame * window / self.sapmle_rate
|
||||
|
||||
audio = audio[int(np.round(real_silence_front * self.sapmle_rate)):]
|
||||
silented_frames = int(audio.size(0) // window) + 1
|
||||
|
||||
print("[RMVPE AUDI]", audio.device)
|
||||
print("[RMVPE RMVPE]", self.rmvpe.device)
|
||||
@ -47,6 +50,9 @@ class RMVPEPitchExtractor(PitchExtractor):
|
||||
# pd = torchcrepe.filter.median(pd, 3)
|
||||
# f0[pd < 0.1] = 0
|
||||
# f0 = f0.squeeze()
|
||||
resize_factor = silented_frames / len(f0)
|
||||
f0 = zoom(f0, resize_factor, order=0)
|
||||
|
||||
pitch[-f0.shape[0]:] = f0[:pitch.shape[0]]
|
||||
f0 = pitch
|
||||
|
||||
|
@ -249,7 +249,7 @@ class E2E(nn.Module):
|
||||
)
|
||||
# else:
|
||||
# self.fc = nn.Sequential(
|
||||
# nn.Linear(3 * N_MELS, N_CLASS), nn.Dropout(0.25), nn.Sigmoid()
|
||||
# nn.Linear(3 * N_MELS, N_CLASS), nn.Dropout(0.25), nn.Sigmoid()
|
||||
# )
|
||||
|
||||
def forward(self, mel):
|
||||
@ -392,7 +392,7 @@ class RMVPE:
|
||||
hidden = hidden.astype("float32")
|
||||
f0 = self.decode(hidden, thred=thred)
|
||||
return f0
|
||||
|
||||
|
||||
def to_local_average_cents(self, salience, thred=0.05):
|
||||
# t0 = ttime()
|
||||
center = np.argmax(salience, axis=1) # 帧长#index
|
||||
|
@ -15,4 +15,3 @@ class VoiceChangerParams:
|
||||
crepe_onnx_full: str
|
||||
crepe_onnx_tiny: str
|
||||
rmvpe: str
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user