mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-01-23 21:45:00 +03:00
WIP: Japanese Hubert
This commit is contained in:
parent
6a01467ac8
commit
bfb2de9ea1
@ -1,5 +1,6 @@
|
||||
import sys
|
||||
import os
|
||||
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
|
||||
|
||||
from voice_changer.RVC.pitchExtractor.PitchExtractor import PitchExtractor
|
||||
from voice_changer.RVC.pitchExtractor.PitchExtractorManager import PitchExtractorManager
|
||||
@ -59,6 +60,7 @@ class RVC:
|
||||
embedder: Embedder | None = None
|
||||
inferencer: Inferencer | None = None
|
||||
pitchExtractor: PitchExtractor | None = None
|
||||
deviceManager = DeviceManager.get_instance()
|
||||
|
||||
def __init__(self, params: VoiceChangerParams):
|
||||
self.initialLoad = True
|
||||
@ -70,17 +72,11 @@ class RVC:
|
||||
self.feature_file = None
|
||||
self.index_file = None
|
||||
|
||||
self.gpu_num = torch.cuda.device_count()
|
||||
self.prevVol = 0
|
||||
self.params = params
|
||||
|
||||
self.mps_enabled: bool = (
|
||||
getattr(torch.backends, "mps", None) is not None
|
||||
and torch.backends.mps.is_available()
|
||||
)
|
||||
self.currentSlot = -1
|
||||
print("RVC initialization: ", params)
|
||||
print("mps: ", self.mps_enabled)
|
||||
|
||||
def loadModel(self, props: LoadModelParams):
|
||||
"""
|
||||
@ -109,14 +105,14 @@ class RVC:
|
||||
|
||||
return self.get_info()
|
||||
|
||||
def _getDevice(self):
|
||||
if self.settings.gpu < 0 or (self.gpu_num == 0 and self.mps_enabled is False):
|
||||
dev = torch.device("cpu")
|
||||
elif self.mps_enabled:
|
||||
dev = torch.device("mps")
|
||||
else:
|
||||
dev = torch.device("cuda", index=self.settings.gpu)
|
||||
return dev
|
||||
# def _getDevice(self):
|
||||
# if self.settings.gpu < 0 or (self.gpu_num == 0 and self.mps_enabled is False):
|
||||
# dev = torch.device("cpu")
|
||||
# elif self.mps_enabled:
|
||||
# dev = torch.device("mps")
|
||||
# else:
|
||||
# dev = torch.device("cuda", index=self.settings.gpu)
|
||||
# return dev
|
||||
|
||||
def prepareModel(self, slot: int):
|
||||
if slot < 0:
|
||||
@ -126,7 +122,7 @@ class RVC:
|
||||
filename = (
|
||||
modelSlot.onnxModelFile if modelSlot.isONNX else modelSlot.pyTorchModelFile
|
||||
)
|
||||
dev = self._getDevice()
|
||||
dev = self.deviceManager.getDevice(self.settings.gpu)
|
||||
|
||||
# Inferencerのロード
|
||||
inferencer = InferencerManager.getInferencer(
|
||||
@ -166,12 +162,7 @@ class RVC:
|
||||
|
||||
def switchModel(self):
|
||||
print("[Voice Changer] Switching model..")
|
||||
if self.settings.gpu < 0 or (self.gpu_num == 0 and self.mps_enabled is False):
|
||||
dev = torch.device("cpu")
|
||||
elif self.mps_enabled:
|
||||
dev = torch.device("mps")
|
||||
else:
|
||||
dev = torch.device("cuda", index=self.settings.gpu)
|
||||
dev = self.deviceManager.getDevice(self.settings.gpu)
|
||||
|
||||
# embedderはモデルによらず再利用できる可能性が高いので、Switchのタイミングでこちらで取得
|
||||
try:
|
||||
@ -179,7 +170,7 @@ class RVC:
|
||||
self.next_embedder,
|
||||
self.params.hubert_base,
|
||||
True,
|
||||
torch.device("cuda:0"),
|
||||
dev,
|
||||
)
|
||||
except Exception as e:
|
||||
print("[Voice Changer] load hubert error", e)
|
||||
@ -372,19 +363,10 @@ class RVC:
|
||||
# )
|
||||
# raise NoModeLoadedException("pytorch")
|
||||
|
||||
if self.settings.gpu < 0 or (self.gpu_num == 0 and self.mps_enabled is False):
|
||||
dev = torch.device("cpu")
|
||||
elif self.mps_enabled:
|
||||
dev = torch.device("mps")
|
||||
else:
|
||||
dev = torch.device("cuda", index=self.settings.gpu)
|
||||
|
||||
dev = self.deviceManager.getDevice(self.settings.gpu)
|
||||
self.embedder = self.embedder.to(dev)
|
||||
self.inferencer = self.inferencer.to(dev)
|
||||
|
||||
# self.embedder.printDevice()
|
||||
# self.inferencer.printDevice()
|
||||
|
||||
audio = data[0]
|
||||
convertSize = data[1]
|
||||
vol = data[2]
|
||||
@ -394,35 +376,34 @@ class RVC:
|
||||
if vol < self.settings.silentThreshold:
|
||||
return np.zeros(convertSize).astype(np.int16)
|
||||
|
||||
with torch.no_grad():
|
||||
repeat = 3 if self.is_half else 1
|
||||
repeat *= self.settings.rvcQuality # 0 or 3
|
||||
vc = VC(self.settings.modelSamplingRate, dev, self.is_half, repeat)
|
||||
sid = 0
|
||||
f0_up_key = self.settings.tran
|
||||
f0_method = self.settings.f0Detector
|
||||
index_rate = self.settings.indexRatio
|
||||
if_f0 = 1 if self.settings.modelSlots[self.currentSlot].f0 else 0
|
||||
repeat = 3 if self.is_half else 1
|
||||
repeat *= self.settings.rvcQuality # 0 or 3
|
||||
vc = VC(self.settings.modelSamplingRate, dev, self.is_half, repeat)
|
||||
sid = 0
|
||||
f0_up_key = self.settings.tran
|
||||
f0_method = self.settings.f0Detector
|
||||
index_rate = self.settings.indexRatio
|
||||
if_f0 = 1 if self.settings.modelSlots[self.currentSlot].f0 else 0
|
||||
|
||||
embChannels = self.settings.modelSlots[self.currentSlot].embChannels
|
||||
audio_out = vc.pipeline(
|
||||
self.embedder,
|
||||
self.inferencer,
|
||||
self.pitchExtractor,
|
||||
sid,
|
||||
audio,
|
||||
f0_up_key,
|
||||
f0_method,
|
||||
self.index,
|
||||
self.feature,
|
||||
index_rate,
|
||||
if_f0,
|
||||
silence_front=self.settings.extraConvertSize
|
||||
/ self.settings.modelSamplingRate,
|
||||
embChannels=embChannels,
|
||||
)
|
||||
embChannels = self.settings.modelSlots[self.currentSlot].embChannels
|
||||
audio_out = vc.pipeline(
|
||||
self.embedder,
|
||||
self.inferencer,
|
||||
self.pitchExtractor,
|
||||
sid,
|
||||
audio,
|
||||
f0_up_key,
|
||||
f0_method,
|
||||
self.index,
|
||||
self.feature,
|
||||
index_rate,
|
||||
if_f0,
|
||||
silence_front=self.settings.extraConvertSize
|
||||
/ self.settings.modelSamplingRate,
|
||||
embChannels=embChannels,
|
||||
)
|
||||
|
||||
result = audio_out * np.sqrt(vol)
|
||||
result = audio_out * np.sqrt(vol)
|
||||
|
||||
return result
|
||||
|
||||
@ -440,12 +421,54 @@ class RVC:
|
||||
self.currentSlot = self.settings.modelSlotIndex
|
||||
self.switchModel()
|
||||
|
||||
if self.settings.framework == "ONNX":
|
||||
audio = self._onnx_inference(data)
|
||||
else:
|
||||
audio = self._pyTorch_inference(data)
|
||||
dev = self.deviceManager.getDevice(self.settings.gpu)
|
||||
self.embedder = self.embedder.to(dev)
|
||||
self.inferencer = self.inferencer.to(dev)
|
||||
|
||||
return audio
|
||||
audio = data[0]
|
||||
convertSize = data[1]
|
||||
vol = data[2]
|
||||
|
||||
audio = resampy.resample(audio, self.settings.modelSamplingRate, 16000)
|
||||
|
||||
if vol < self.settings.silentThreshold:
|
||||
return np.zeros(convertSize).astype(np.int16)
|
||||
|
||||
repeat = 3 if self.is_half else 1
|
||||
repeat *= self.settings.rvcQuality # 0 or 3
|
||||
vc = VC(self.settings.modelSamplingRate, dev, self.is_half, repeat)
|
||||
sid = 0
|
||||
f0_up_key = self.settings.tran
|
||||
f0_method = self.settings.f0Detector
|
||||
index_rate = self.settings.indexRatio
|
||||
if_f0 = 1 if self.settings.modelSlots[self.currentSlot].f0 else 0
|
||||
|
||||
embChannels = self.settings.modelSlots[self.currentSlot].embChannels
|
||||
audio_out = vc.pipeline(
|
||||
self.embedder,
|
||||
self.inferencer,
|
||||
self.pitchExtractor,
|
||||
sid,
|
||||
audio,
|
||||
f0_up_key,
|
||||
f0_method,
|
||||
self.index,
|
||||
self.feature,
|
||||
index_rate,
|
||||
if_f0,
|
||||
silence_front=self.settings.extraConvertSize
|
||||
/ self.settings.modelSamplingRate,
|
||||
embChannels=embChannels,
|
||||
)
|
||||
|
||||
result = audio_out * np.sqrt(vol)
|
||||
|
||||
# if self.settings.framework == "ONNX":
|
||||
# audio = self._onnx_inference(data)
|
||||
# else:
|
||||
# audio = self._pyTorch_inference(data)
|
||||
|
||||
return result
|
||||
|
||||
def __del__(self):
|
||||
del self.net_g
|
||||
|
41
server/voice_changer/RVC/deviceManager/DeviceManager.py
Normal file
41
server/voice_changer/RVC/deviceManager/DeviceManager.py
Normal file
@ -0,0 +1,41 @@
|
||||
import torch
|
||||
|
||||
|
||||
class DeviceManager(object):
|
||||
_instance = None
|
||||
|
||||
@classmethod
|
||||
def get_instance(cls):
|
||||
if cls._instance is None:
|
||||
cls._instance = cls()
|
||||
return cls._instance
|
||||
|
||||
def __init__(self):
|
||||
self.gpu_num = torch.cuda.device_count()
|
||||
self.mps_enabled: bool = (
|
||||
getattr(torch.backends, "mps", None) is not None
|
||||
and torch.backends.mps.is_available()
|
||||
)
|
||||
|
||||
def getDevice(self, id: int):
|
||||
if id < 0 or (self.gpu_num == 0 and self.mps_enabled is False):
|
||||
dev = torch.device("cpu")
|
||||
elif self.mps_enabled:
|
||||
dev = torch.device("mps")
|
||||
else:
|
||||
dev = torch.device("cuda", index=id)
|
||||
return dev
|
||||
|
||||
def halfPrecisionAvailable(self, id: int):
|
||||
if self.gpu_num == 0:
|
||||
return False
|
||||
gpuName = torch.cuda.get_device_name(id).upper()
|
||||
|
||||
# original: https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/config.py
|
||||
if (
|
||||
("16" in gpuName and "V100" not in gpuName)
|
||||
or "P40" in gpuName.upper()
|
||||
or "1070" in gpuName
|
||||
or "1080" in gpuName
|
||||
):
|
||||
return False
|
@ -31,7 +31,6 @@ class Embedder(Protocol):
|
||||
self.file = file
|
||||
self.isHalf = isHalf
|
||||
self.dev = dev
|
||||
print("hubert initialize dev::::", self.dev, dev)
|
||||
|
||||
def setHalf(self, isHalf: bool):
|
||||
self.isHalf = isHalf
|
||||
|
Loading…
Reference in New Issue
Block a user