mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-02-02 16:23:58 +03:00
Added CREPE f0 detection
This commit is contained in:
parent
11255790c8
commit
d62a9fffd4
@ -85,6 +85,7 @@ class EnumInferenceTypes(Enum):
|
||||
class EnumPitchExtractorTypes(Enum):
|
||||
harvest = "harvest"
|
||||
dio = "dio"
|
||||
crepe = "crepe"
|
||||
|
||||
|
||||
class EnumFrameworkTypes(Enum):
|
||||
|
@ -0,0 +1,47 @@
|
||||
import torchcrepe
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
from voice_changer.RVC.pitchExtractor.PitchExtractor import PitchExtractor
|
||||
|
||||
|
||||
class CrepePitchExtractor(PitchExtractor):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
if torch.cuda.is_available():
|
||||
self.device = torch.device('cuda:' + str(torch.cuda.current_device()))
|
||||
else:
|
||||
self.device = torch.device('cpu')
|
||||
|
||||
def extract(self, audio, f0_up_key, sr, window, silence_front=0):
|
||||
n_frames = int(len(audio) // window) + 1
|
||||
start_frame = int(silence_front * sr / window)
|
||||
real_silence_front = start_frame * window / sr
|
||||
|
||||
silence_front_offset = int(np.round(real_silence_front * sr))
|
||||
audio = audio[silence_front_offset:]
|
||||
|
||||
f0_min = 50
|
||||
f0_max = 1100
|
||||
f0_mel_min = 1127 * np.log(1 + f0_min / 700)
|
||||
f0_mel_max = 1127 * np.log(1 + f0_max / 700)
|
||||
|
||||
f0 = torchcrepe.predict(torch.tensor(audio).unsqueeze(0), sr,
|
||||
hop_length=window, fmin=f0_min, fmax=f0_max, model='tiny', batch_size=256,
|
||||
decoder=torchcrepe.decode.weighted_argmax, device=self.device)
|
||||
f0 = f0.squeeze().detach().cpu().numpy()
|
||||
|
||||
f0 = np.pad(f0.astype("float"), (start_frame, n_frames - f0.shape[0] - start_frame))
|
||||
|
||||
f0 *= pow(2, f0_up_key / 12)
|
||||
f0bak = f0.copy()
|
||||
f0_mel = 1127 * np.log(1 + f0 / 700)
|
||||
f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * 254 / (
|
||||
f0_mel_max - f0_mel_min
|
||||
) + 1
|
||||
f0_mel[f0_mel <= 1] = 1
|
||||
f0_mel[f0_mel > 255] = 255
|
||||
f0_coarse = np.rint(f0_mel).astype(np.int)
|
||||
|
||||
return f0_coarse, f0bak
|
||||
|
@ -2,6 +2,7 @@ from typing import Protocol
|
||||
from const import EnumPitchExtractorTypes
|
||||
from voice_changer.RVC.pitchExtractor.DioPitchExtractor import DioPitchExtractor
|
||||
from voice_changer.RVC.pitchExtractor.HarvestPitchExtractor import HarvestPitchExtractor
|
||||
from voice_changer.RVC.pitchExtractor.CrepePitchExtractor import CrepePitchExtractor
|
||||
from voice_changer.RVC.pitchExtractor.PitchExtractor import PitchExtractor
|
||||
|
||||
|
||||
@ -29,6 +30,11 @@ class PitchExtractorManager(Protocol):
|
||||
or pitchExtractorType == EnumPitchExtractorTypes.dio.value
|
||||
):
|
||||
return DioPitchExtractor()
|
||||
elif (
|
||||
pitchExtractorType == EnumPitchExtractorTypes.crepe
|
||||
or pitchExtractorType == EnumPitchExtractorTypes.crepe.value
|
||||
):
|
||||
return CrepePitchExtractor()
|
||||
else:
|
||||
# return hubert as default
|
||||
raise RuntimeError(
|
||||
|
Loading…
Reference in New Issue
Block a user