refactor pipeline

This commit is contained in:
wataru 2023-05-04 17:15:53 +09:00
parent e14e823fec
commit 617551db49
5 changed files with 150 additions and 94 deletions

View File

@ -13,6 +13,16 @@ class HalfPrecisionChangingException(Exception):
return repr("HalfPrecision related exception.")
class DeviceChangingException(Exception):
def __str__(self):
return repr("Device changing...")
class NotEnoughDataExtimateF0(Exception):
def __str__(self):
return repr("Not enough data to estimate f0.")
class ONNXInputArgumentException(Exception):
def __str__(self):
return repr("ONNX received invalid argument.")

View File

@ -1,63 +1,109 @@
import numpy as np
from typing import Any
# import parselmouth
import torch
import torch.nn.functional as F
from Exceptions import HalfPrecisionChangingException
from Exceptions import (
DeviceChangingException,
HalfPrecisionChangingException,
NotEnoughDataExtimateF0,
)
from voice_changer.RVC.embedder.Embedder import Embedder
from voice_changer.RVC.inferencer.Inferencer import Inferencer
from voice_changer.RVC.pitchExtractor.PitchExtractor import PitchExtractor
class VC(object):
def __init__(self, tgt_sr, device, is_half, x_pad):
self.sr = 16000 # hubert输入采样率
self.window = 160 # 每帧点数
self.t_pad = self.sr * x_pad # 每条前后pad时间
self.t_pad_tgt = tgt_sr * x_pad
self.device = device
self.is_half = is_half
class Pipeline(object):
embedder: Embedder
inferencer: Inferencer
pitchExtractor: PitchExtractor
def pipeline(
index: Any | None
feature: Any | None
targetSR: int
device: torch.device
isHalf: bool
def __init__(
self,
embedder: Embedder,
inferencer: Inferencer,
pitchExtractor: PitchExtractor,
index: Any | None,
feature: Any | None,
targetSR,
device,
isHalf,
):
self.embedder = embedder
self.inferencer = inferencer
self.pitchExtractor = pitchExtractor
self.index = index
self.feature = feature
self.targetSR = targetSR
self.device = device
self.isHalf = isHalf
self.sr = 16000
self.window = 160
self.device = device
self.isHalf = isHalf
def setDevice(self, device: torch.device):
self.device = device
self.embedder.setDevice(device)
self.inferencer.setDevice(device)
def setPitchExtractor(self, pitchExtractor: PitchExtractor):
self.pitchExtractor = pitchExtractor
def exec(
self,
sid,
audio,
f0_up_key,
index,
big_npy,
index_rate,
if_f0,
silence_front=0,
embChannels=256,
silence_front,
embChannels,
repeat,
):
self.t_pad = self.sr * repeat
self.t_pad_tgt = self.targetSR * repeat
audio_pad = np.pad(audio, (self.t_pad, self.t_pad), mode="reflect")
p_len = audio_pad.shape[0] // self.window
sid = torch.tensor(sid, device=self.device).unsqueeze(0).long()
# ピッチ検出
pitch, pitchf = None, None
if if_f0 == 1:
pitch, pitchf = pitchExtractor.extract(
audio_pad,
f0_up_key,
self.sr,
self.window,
silence_front=silence_front,
)
pitch = pitch[:p_len]
pitchf = pitchf[:p_len]
pitch = torch.tensor(pitch, device=self.device).unsqueeze(0).long()
pitchf = torch.tensor(
pitchf, device=self.device, dtype=torch.float
).unsqueeze(0)
try:
if if_f0 == 1:
pitch, pitchf = self.pitchExtractor.extract(
audio_pad,
f0_up_key,
self.sr,
self.window,
silence_front=silence_front,
)
pitch = pitch[:p_len]
pitchf = pitchf[:p_len]
pitch = torch.tensor(pitch, device=self.device).unsqueeze(0).long()
pitchf = torch.tensor(
pitchf, device=self.device, dtype=torch.float
).unsqueeze(0)
except IndexError as e:
print(e)
raise NotEnoughDataExtimateF0()
# tensor型調整
feats = torch.from_numpy(audio_pad)
if self.is_half is True:
if self.isHalf is True:
feats = feats.half()
else:
feats = feats.float()
@ -69,25 +115,23 @@ class VC(object):
# embedding
padding_mask = torch.BoolTensor(feats.shape).to(self.device).fill_(False)
try:
feats = embedder.extractFeatures(feats, embChannels)
feats = self.embedder.extractFeatures(feats, embChannels)
except RuntimeError as e:
if "HALF" in e.__str__().upper():
raise HalfPrecisionChangingException()
elif "same device" in e.__str__():
raise DeviceChangingException()
else:
raise e
# Index - feature抽出
if (
isinstance(index, type(None)) is False
and isinstance(big_npy, type(None)) is False
and index_rate != 0
):
if self.index is not None and self.feature is not None and index_rate != 0:
npy = feats[0].cpu().numpy()
if self.is_half is True:
if self.isHalf is True:
npy = npy.astype("float32")
D, I = index.search(npy, 1)
npy = big_npy[I.squeeze()]
if self.is_half is True:
D, I = self.index.search(npy, 1)
npy = self.feature[I.squeeze()]
if self.isHalf is True:
npy = npy.astype("float16")
feats = (
@ -110,7 +154,7 @@ class VC(object):
with torch.no_grad():
audio1 = (
(
inferencer.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0]
self.inferencer.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0]
* 32768
)
.data.cpu()

View File

@ -4,7 +4,6 @@ from Exceptions import NoModeLoadedException
from voice_changer.RVC.ModelSlot import ModelSlot
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
from voice_changer.RVC.pitchExtractor.PitchExtractor import PitchExtractor
from voice_changer.RVC.pitchExtractor.PitchExtractorManager import PitchExtractorManager
# avoiding parse arg error in RVC
@ -25,9 +24,7 @@ from voice_changer.RVC.modelMerger.MergeModel import merge_model
from voice_changer.RVC.modelMerger.MergeModelRequest import MergeModelRequest
from voice_changer.RVC.ModelSlotGenerator import generateModelSlot
from voice_changer.RVC.RVCSettings import RVCSettings
from voice_changer.RVC.embedder.Embedder import Embedder
from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager
from voice_changer.RVC.inferencer.Inferencer import Inferencer
from voice_changer.RVC.inferencer.InferencerManager import InferencerManager
from voice_changer.utils.LoadModelParams import FilePaths, LoadModelParams
from voice_changer.utils.VoiceChangerModel import AudioInOut
@ -47,7 +44,7 @@ import faiss
from const import UPLOAD_DIR, EnumEmbedderTypes
from voice_changer.RVC.Pipeline import VC
from voice_changer.RVC.Pipeline import Pipeline
providers = [
"OpenVINOExecutionProvider",
@ -61,10 +58,8 @@ class RVC:
initialLoad: bool = True
settings: RVCSettings = RVCSettings()
embedder: Embedder | None = None
inferencer: Inferencer | None = None
pipeline: Pipeline | None = None
pitchExtractor: PitchExtractor | None = None
deviceManager = DeviceManager.get_instance()
audio_buffer: AudioInOut | None = None
@ -149,7 +144,26 @@ class RVC:
print("[Voice Changer] exception! loading embedder", e)
traceback.print_exc()
return inferencer, embedder
# pitchExtractor
pitchExtractor = PitchExtractorManager.getPitchExtractor(
self.settings.f0Detector
)
# index, feature
index, feature = self.loadIndex(modelSlot)
pipeline = Pipeline(
embedder,
inferencer,
pitchExtractor,
index,
feature,
modelSlot.samplingRate,
dev,
half,
)
return pipeline
def loadIndex(self, modelSlot: ModelSlot):
# Indexのロード
@ -187,16 +201,8 @@ class RVC:
print("[Voice Changer] Prepare Model of slot:", slot)
# Inferencer, embedderのロード
inferencer, embedder = self.createPipeline(modelSlot)
self.next_inferencer = inferencer
self.next_embedder = embedder
# Indexのロード
index, feature = self.loadIndex(modelSlot)
self.next_index = index
self.next_feature = feature
# pipelineの生成
self.next_pipeline = self.createPipeline(modelSlot)
# その他の設定
self.next_trans = modelSlot.defaultTrans
@ -208,10 +214,7 @@ class RVC:
def switchModel(self):
print("[Voice Changer] Switching model..")
self.embedder = self.next_embedder
self.inferencer = self.next_inferencer
self.feature = self.next_feature
self.index = self.next_index
self.pipeline = self.next_pipeline
self.settings.tran = self.next_trans
self.settings.modelSamplingRate = self.next_samplingRate
self.settings.framework = self.next_framework
@ -229,27 +232,21 @@ class RVC:
return True
val = val % 1000 # Quick hack for same slot is selected
self.prepareModel(val)
self.needSwitch = True
# 設定
setattr(self.settings, key, val)
if key == "gpu" and self.embedder is not None:
if key == "gpu":
dev = self.deviceManager.getDevice(val)
half = self.deviceManager.halfPrecisionAvailable(val)
# half-precisionの使用可否が変わるときは作り直し
if (
self.inferencer is not None
and self.inferencer.isHalf == half
and self.embedder.isHalf == half
):
if self.pipeline is not None and self.pipeline.isHalf == half:
print(
"USE EXSISTING PIPELINE",
half,
)
self.embedder.setDevice(dev)
self.inferencer.setDevice(dev)
self.pipeline.setDevice(dev)
else:
print("CHAGE TO NEW PIPELINE", half)
self.prepareModel(self.settings.modelSlotIndex)
@ -257,10 +254,11 @@ class RVC:
setattr(self.settings, key, float(val))
elif key in self.settings.strData:
setattr(self.settings, key, str(val))
if key == "f0Detector":
self.pitchExtractor = PitchExtractorManager.getPitchExtractor(
if key == "f0Detector" and self.pipeline is not None:
pitchExtractor = PitchExtractorManager.getPitchExtractor(
self.settings.f0Detector
)
self.pipeline.setPitchExtractor(pitchExtractor)
else:
return False
return True
@ -323,7 +321,6 @@ class RVC:
self.switchModel()
self.needSwitch = False
dev = self.deviceManager.getDevice(self.settings.gpu)
half = self.deviceManager.halfPrecisionAvailable(self.settings.gpu)
audio = data[0]
@ -337,7 +334,6 @@ class RVC:
repeat = 3 if half else 1
repeat *= self.settings.rvcQuality # 0 or 3
vc = VC(self.settings.modelSamplingRate, dev, half, repeat)
sid = 0
f0_up_key = self.settings.tran
index_rate = self.settings.indexRatio
@ -345,20 +341,15 @@ class RVC:
embChannels = self.settings.modelSlots[self.currentSlot].embChannels
audio_out = vc.pipeline(
self.embedder,
self.inferencer,
self.pitchExtractor,
audio_out = self.pipeline.exec(
sid,
audio,
f0_up_key,
self.index,
self.feature,
index_rate,
if_f0,
silence_front=self.settings.extraConvertSize
/ self.settings.modelSamplingRate,
embChannels=embChannels,
self.settings.extraConvertSize / self.settings.modelSamplingRate,
embChannels,
repeat,
)
result = audio_out * np.sqrt(vol)
@ -366,8 +357,7 @@ class RVC:
return result
def __del__(self):
del self.inferencer
del self.embedder
del self.pipeline
print("---------- REMOVING ---------------")

View File

@ -32,13 +32,17 @@ class DeviceManager(object):
if id < 0:
return False
gpuName = torch.cuda.get_device_name(id).upper()
if (
("16" in gpuName and "V100" not in gpuName)
or "P40" in gpuName.upper()
or "1070" in gpuName
or "1080" in gpuName
):
try:
gpuName = torch.cuda.get_device_name(id).upper()
if (
("16" in gpuName and "V100" not in gpuName)
or "P40" in gpuName.upper()
or "1070" in gpuName
or "1080" in gpuName
):
return False
except Exception as e:
print(e)
return False
return True

View File

@ -14,8 +14,10 @@ from voice_changer.utils.LoadModelParams import LoadModelParams
from voice_changer.utils.Timer import Timer
from voice_changer.utils.VoiceChangerModel import VoiceChangerModel, AudioInOut
from Exceptions import (
DeviceChangingException,
HalfPrecisionChangingException,
NoModeLoadedException,
NotEnoughDataExtimateF0,
ONNXInputArgumentException,
)
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
@ -348,6 +350,12 @@ class VoiceChanger:
except HalfPrecisionChangingException as e:
print("[Voice Changer] Switching model configuration....", e)
return np.zeros(1).astype(np.int16), [0, 0, 0]
except NotEnoughDataExtimateF0 as e:
print("[Voice Changer] not enough data", e)
return np.zeros(1).astype(np.int16), [0, 0, 0]
except DeviceChangingException as e:
print("[Voice Changer] embedder:", e)
return np.zeros(1).astype(np.int16), [0, 0, 0]
except Exception as e:
print("VC PROCESSING!!!! EXCEPTION!!!", e)
print(traceback.format_exc())