mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-01-23 13:35:12 +03:00
rmvpe
This commit is contained in:
parent
8974bf78d2
commit
78af3b7fff
@ -21,7 +21,7 @@
|
|||||||
{
|
{
|
||||||
"name": "configArea",
|
"name": "configArea",
|
||||||
"options": {
|
"options": {
|
||||||
"detectors": ["dio", "harvest", "crepe", "crepe_full", "crepe_tiny", "rmvpe"],
|
"detectors": ["dio", "harvest", "crepe", "crepe_full", "crepe_tiny", "rmvpe", "rmvpe_onnx"],
|
||||||
"inputChunkNums": [8, 16, 24, 32, 40, 48, 64, 80, 96, 112, 128, 192, 256, 320, 384, 448, 512, 576, 640, 704, 768, 832, 896, 960, 1024, 2048]
|
"inputChunkNums": [8, 16, 24, 32, 40, 48, 64, 80, 96, 112, 128, 192, 256, 320, 384, 448, 512, 576, 640, 704, 768, 832, 896, 960, 1024, 2048]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
2
client/demo/dist/index.js
vendored
2
client/demo/dist/index.js
vendored
File diff suppressed because one or more lines are too long
@ -21,7 +21,7 @@
|
|||||||
{
|
{
|
||||||
"name": "configArea",
|
"name": "configArea",
|
||||||
"options": {
|
"options": {
|
||||||
"detectors": ["dio", "harvest", "crepe", "crepe_full", "crepe_tiny", "rmvpe"],
|
"detectors": ["dio", "harvest", "crepe", "crepe_full", "crepe_tiny", "rmvpe", "rmvpe_onnx"],
|
||||||
"inputChunkNums": [8, 16, 24, 32, 40, 48, 64, 80, 96, 112, 128, 192, 256, 320, 384, 448, 512, 576, 640, 704, 768, 832, 896, 960, 1024, 2048]
|
"inputChunkNums": [8, 16, 24, 32, 40, 48, 64, 80, 96, 112, 128, 192, 256, 320, 384, 448, 512, 576, 640, 704, 768, 832, 896, 960, 1024, 2048]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -47,6 +47,8 @@ export const F0Detector = {
|
|||||||
"crepe": "crepe",
|
"crepe": "crepe",
|
||||||
"crepe_full": "crepe_full",
|
"crepe_full": "crepe_full",
|
||||||
"crepe_tiny": "crepe_tiny",
|
"crepe_tiny": "crepe_tiny",
|
||||||
|
"rmvpe": "rmvpe",
|
||||||
|
"rmvpe_onnx": "rmvpe_onnx",
|
||||||
} as const
|
} as const
|
||||||
export type F0Detector = typeof F0Detector[keyof typeof F0Detector]
|
export type F0Detector = typeof F0Detector[keyof typeof F0Detector]
|
||||||
|
|
||||||
|
@ -62,6 +62,7 @@ def setupArgParser():
|
|||||||
parser.add_argument("--crepe_onnx_full", type=str, default="pretrain/crepe_onnx_full.onnx", help="path to crepe_onnx_full")
|
parser.add_argument("--crepe_onnx_full", type=str, default="pretrain/crepe_onnx_full.onnx", help="path to crepe_onnx_full")
|
||||||
parser.add_argument("--crepe_onnx_tiny", type=str, default="pretrain/crepe_onnx_tiny.onnx", help="path to crepe_onnx_tiny")
|
parser.add_argument("--crepe_onnx_tiny", type=str, default="pretrain/crepe_onnx_tiny.onnx", help="path to crepe_onnx_tiny")
|
||||||
parser.add_argument("--rmvpe", type=str, default="pretrain/rmvpe.pt", help="path to rmvpe")
|
parser.add_argument("--rmvpe", type=str, default="pretrain/rmvpe.pt", help="path to rmvpe")
|
||||||
|
parser.add_argument("--rmvpe_onnx", type=str, default="pretrain/rmvpe.onnx", help="path to rmvpe onnx")
|
||||||
|
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
@ -103,6 +104,7 @@ voiceChangerParams = VoiceChangerParams(
|
|||||||
crepe_onnx_full=args.crepe_onnx_full,
|
crepe_onnx_full=args.crepe_onnx_full,
|
||||||
crepe_onnx_tiny=args.crepe_onnx_tiny,
|
crepe_onnx_tiny=args.crepe_onnx_tiny,
|
||||||
rmvpe=args.rmvpe,
|
rmvpe=args.rmvpe,
|
||||||
|
rmvpe_onnx=args.rmvpe_onnx,
|
||||||
sample_mode=args.sample_mode,
|
sample_mode=args.sample_mode,
|
||||||
)
|
)
|
||||||
vcparams = VoiceChangerParamsManager.get_instance()
|
vcparams = VoiceChangerParamsManager.get_instance()
|
||||||
|
@ -79,6 +79,7 @@ PitchExtractorType: TypeAlias = Literal[
|
|||||||
"crepe_full",
|
"crepe_full",
|
||||||
"crepe_tiny",
|
"crepe_tiny",
|
||||||
"rmvpe",
|
"rmvpe",
|
||||||
|
"rmvpe_onnx",
|
||||||
]
|
]
|
||||||
|
|
||||||
ServerAudioDeviceType: TypeAlias = Literal[
|
ServerAudioDeviceType: TypeAlias = Literal[
|
||||||
|
@ -18,6 +18,7 @@ def downloadWeight(voiceChangerParams: VoiceChangerParams):
|
|||||||
crepe_onnx_full = voiceChangerParams.crepe_onnx_full
|
crepe_onnx_full = voiceChangerParams.crepe_onnx_full
|
||||||
crepe_onnx_tiny = voiceChangerParams.crepe_onnx_tiny
|
crepe_onnx_tiny = voiceChangerParams.crepe_onnx_tiny
|
||||||
rmvpe = voiceChangerParams.rmvpe
|
rmvpe = voiceChangerParams.rmvpe
|
||||||
|
rmvpe_onnx = voiceChangerParams.rmvpe_onnx
|
||||||
|
|
||||||
weight_files = [content_vec_500_onnx, hubert_base, hubert_base_jp, hubert_soft,
|
weight_files = [content_vec_500_onnx, hubert_base, hubert_base_jp, hubert_soft,
|
||||||
nsf_hifigan, crepe_onnx_full, crepe_onnx_tiny, rmvpe]
|
nsf_hifigan, crepe_onnx_full, crepe_onnx_tiny, rmvpe]
|
||||||
@ -109,6 +110,14 @@ def downloadWeight(voiceChangerParams: VoiceChangerParams):
|
|||||||
"position": 8,
|
"position": 8,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
if os.path.exists(rmvpe_onnx) is False:
|
||||||
|
downloadParams.append(
|
||||||
|
{
|
||||||
|
"url": "https://huggingface.co/wok000/weights_gpl/resolve/main/rmvpe/rmvpe_s.onnx",
|
||||||
|
"saveTo": rmvpe_onnx,
|
||||||
|
"position": 9,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
with ThreadPoolExecutor() as pool:
|
with ThreadPoolExecutor() as pool:
|
||||||
pool.map(download, downloadParams)
|
pool.map(download, downloadParams)
|
||||||
|
@ -6,6 +6,7 @@ from voice_changer.RVC.pitchExtractor.HarvestPitchExtractor import HarvestPitchE
|
|||||||
from voice_changer.RVC.pitchExtractor.CrepePitchExtractor import CrepePitchExtractor
|
from voice_changer.RVC.pitchExtractor.CrepePitchExtractor import CrepePitchExtractor
|
||||||
from voice_changer.RVC.pitchExtractor.PitchExtractor import PitchExtractor
|
from voice_changer.RVC.pitchExtractor.PitchExtractor import PitchExtractor
|
||||||
from voice_changer.RVC.pitchExtractor.RMVPEPitchExtractor import RMVPEPitchExtractor
|
from voice_changer.RVC.pitchExtractor.RMVPEPitchExtractor import RMVPEPitchExtractor
|
||||||
|
from voice_changer.RVC.pitchExtractor.RMVPOnnxEPitchExtractor import RMVPOnnxEPitchExtractor
|
||||||
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
||||||
|
|
||||||
|
|
||||||
@ -40,6 +41,8 @@ class PitchExtractorManager(Protocol):
|
|||||||
return CrepeOnnxPitchExtractor(pitchExtractorType, cls.params.crepe_onnx_full, gpu)
|
return CrepeOnnxPitchExtractor(pitchExtractorType, cls.params.crepe_onnx_full, gpu)
|
||||||
elif pitchExtractorType == "rmvpe":
|
elif pitchExtractorType == "rmvpe":
|
||||||
return RMVPEPitchExtractor(cls.params.rmvpe, gpu)
|
return RMVPEPitchExtractor(cls.params.rmvpe, gpu)
|
||||||
|
elif pitchExtractorType == "rmvpe_onnx":
|
||||||
|
return RMVPOnnxEPitchExtractor(cls.params.rmvpe_onnx, gpu)
|
||||||
else:
|
else:
|
||||||
# return hubert as default
|
# return hubert as default
|
||||||
print("[Voice Changer] PitchExctractor not found", pitchExtractorType)
|
print("[Voice Changer] PitchExctractor not found", pitchExtractorType)
|
||||||
|
@ -0,0 +1,76 @@
|
|||||||
|
import numpy as np
|
||||||
|
from const import PitchExtractorType
|
||||||
|
from voice_changer.DiffusionSVC.pitchExtractor.PitchExtractor import PitchExtractor
|
||||||
|
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
|
||||||
|
import onnxruntime
|
||||||
|
|
||||||
|
|
||||||
|
class RMVPOnnxEPitchExtractor(PitchExtractor):
|
||||||
|
|
||||||
|
def __init__(self, file: str, gpu: int):
|
||||||
|
super().__init__()
|
||||||
|
self.file = file
|
||||||
|
self.pitchExtractorType: PitchExtractorType = "rmvpe_onnx"
|
||||||
|
self.f0_min = 50
|
||||||
|
self.f0_max = 1100
|
||||||
|
self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700)
|
||||||
|
self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700)
|
||||||
|
|
||||||
|
(
|
||||||
|
onnxProviders,
|
||||||
|
onnxProviderOptions,
|
||||||
|
) = DeviceManager.get_instance().getOnnxExecutionProvider(gpu)
|
||||||
|
self.onnxProviders = onnxProviders
|
||||||
|
self.onnxProviderOptions = onnxProviderOptions
|
||||||
|
|
||||||
|
so = onnxruntime.SessionOptions()
|
||||||
|
so.log_severity_level = 3
|
||||||
|
self.onnx_session = onnxruntime.InferenceSession(self.file, sess_options=so, providers=onnxProviders, provider_options=onnxProviderOptions)
|
||||||
|
|
||||||
|
def extract(self, audio, pitchf, f0_up_key, sr, window, silence_front=0):
|
||||||
|
try:
|
||||||
|
# データ変換
|
||||||
|
if isinstance(audio, np.ndarray) is False:
|
||||||
|
audio = audio = audio.cpu().numpy()
|
||||||
|
|
||||||
|
if isinstance(pitchf, np.ndarray) is False:
|
||||||
|
pitchf = pitchf.cpu().numpy().astype(np.float32)
|
||||||
|
|
||||||
|
if audio.ndim != 1:
|
||||||
|
raise RuntimeError(f"Exeption in {self.__class__.__name__} audio.ndim is not 1 (size :{audio.ndim}, {audio.shape})")
|
||||||
|
if pitchf.ndim != 1:
|
||||||
|
raise RuntimeError(f"Exeption in {self.__class__.__name__} pitchf.ndim is not 1 (size :{pitchf.ndim}, {pitchf.shape})")
|
||||||
|
|
||||||
|
# 処理
|
||||||
|
silenceFrontFrame = silence_front * sr
|
||||||
|
startWindow = int(silenceFrontFrame / window) # 小数点以下切り捨て
|
||||||
|
slienceFrontFrameOffset = startWindow * window
|
||||||
|
targetFrameLength = len(audio) - slienceFrontFrameOffset
|
||||||
|
minimumFrames = 0.01 * sr
|
||||||
|
targetFrameLength = max(minimumFrames, targetFrameLength)
|
||||||
|
audio = audio[-targetFrameLength:]
|
||||||
|
audio = np.expand_dims(audio, axis=0)
|
||||||
|
|
||||||
|
output = self.onnx_session.run(
|
||||||
|
["f0", "uv"],
|
||||||
|
{
|
||||||
|
"waveform": audio.astype(np.float32),
|
||||||
|
"threshold": np.array([0.3]).astype(np.float32),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
f0 = output[0].squeeze()
|
||||||
|
|
||||||
|
f0 *= pow(2, f0_up_key / 12)
|
||||||
|
pitchf[-f0.shape[0]:] = f0[: pitchf.shape[0]]
|
||||||
|
|
||||||
|
f0_mel = 1127.0 * np.log(1.0 + pitchf / 700.0)
|
||||||
|
f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - self.f0_mel_min) * 254 / (self.f0_mel_max - self.f0_mel_min) + 1
|
||||||
|
f0_mel[f0_mel <= 1] = 1
|
||||||
|
f0_mel[f0_mel > 255] = 255
|
||||||
|
f0_coarse = np.rint(f0_mel).astype(int)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise RuntimeError(f"Exeption in {self.__class__.__name__}", e)
|
||||||
|
|
||||||
|
return f0_coarse, pitchf
|
@ -15,3 +15,4 @@ class VoiceChangerParams:
|
|||||||
crepe_onnx_full: str
|
crepe_onnx_full: str
|
||||||
crepe_onnx_tiny: str
|
crepe_onnx_tiny: str
|
||||||
rmvpe: str
|
rmvpe: str
|
||||||
|
rmvpe_onnx: str
|
||||||
|
Loading…
Reference in New Issue
Block a user