mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-02-02 16:23:58 +03:00
WIP: refactoring
This commit is contained in:
parent
308fd190f3
commit
8121c3a849
17
server/voice_changer/RVC/ModelSlot.py
Normal file
17
server/voice_changer/RVC/ModelSlot.py
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
from dataclasses import dataclass
|
||||||
|
from voice_changer.RVC.const import RVC_MODEL_TYPE_RVC
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ModelSlot:
|
||||||
|
pyTorchModelFile: str = ""
|
||||||
|
onnxModelFile: str = ""
|
||||||
|
featureFile: str = ""
|
||||||
|
indexFile: str = ""
|
||||||
|
defaultTrans: int = 0
|
||||||
|
modelType: int = RVC_MODEL_TYPE_RVC
|
||||||
|
samplingRate: int = -1
|
||||||
|
f0: bool = True
|
||||||
|
embChannels: int = 256
|
||||||
|
deprecated: bool = False
|
||||||
|
embedder: str = "hubert_base" # "hubert_base", "contentvec", "distilhubert"
|
@ -4,10 +4,11 @@ import json
|
|||||||
import resampy
|
import resampy
|
||||||
from voice_changer.RVC.ModelWrapper import ModelWrapper
|
from voice_changer.RVC.ModelWrapper import ModelWrapper
|
||||||
from Exceptions import NoModeLoadedException
|
from Exceptions import NoModeLoadedException
|
||||||
|
from voice_changer.RVC.RVCSettings import RVCSettings
|
||||||
from voice_changer.utils.LoadModelParams import LoadModelParams
|
from voice_changer.utils.LoadModelParams import LoadModelParams
|
||||||
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
||||||
|
|
||||||
from dataclasses import dataclass, asdict, field
|
from dataclasses import asdict
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
@ -44,62 +45,6 @@ providers = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class ModelSlot:
|
|
||||||
pyTorchModelFile: str = ""
|
|
||||||
onnxModelFile: str = ""
|
|
||||||
featureFile: str = ""
|
|
||||||
indexFile: str = ""
|
|
||||||
defaultTrans: int = 0
|
|
||||||
modelType: int = RVC_MODEL_TYPE_RVC
|
|
||||||
samplingRate: int = -1
|
|
||||||
f0: bool = True
|
|
||||||
embChannels: int = 256
|
|
||||||
deprecated: bool = False
|
|
||||||
embedder: str = "hubert_base" # "hubert_base", "contentvec", "distilhubert"
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class RVCSettings:
|
|
||||||
gpu: int = 0
|
|
||||||
dstId: int = 0
|
|
||||||
|
|
||||||
f0Detector: str = "pm" # pm or harvest
|
|
||||||
tran: int = 20
|
|
||||||
silentThreshold: float = 0.00001
|
|
||||||
extraConvertSize: int = 1024 * 32
|
|
||||||
clusterInferRatio: float = 0.1
|
|
||||||
|
|
||||||
framework: str = "PyTorch" # PyTorch or ONNX
|
|
||||||
pyTorchModelFile: str = ""
|
|
||||||
onnxModelFile: str = ""
|
|
||||||
configFile: str = ""
|
|
||||||
modelSlots: list[ModelSlot] = field(
|
|
||||||
default_factory=lambda: [ModelSlot(), ModelSlot(), ModelSlot()]
|
|
||||||
)
|
|
||||||
indexRatio: float = 0
|
|
||||||
rvcQuality: int = 0
|
|
||||||
silenceFront: int = 1 # 0:off, 1:on
|
|
||||||
modelSamplingRate: int = 48000
|
|
||||||
modelSlotIndex: int = -1
|
|
||||||
|
|
||||||
speakers: dict[str, int] = field(default_factory=lambda: {})
|
|
||||||
|
|
||||||
# ↓mutableな物だけ列挙
|
|
||||||
intData = [
|
|
||||||
"gpu",
|
|
||||||
"dstId",
|
|
||||||
"tran",
|
|
||||||
"extraConvertSize",
|
|
||||||
"rvcQuality",
|
|
||||||
"modelSamplingRate",
|
|
||||||
"silenceFront",
|
|
||||||
"modelSlotIndex",
|
|
||||||
]
|
|
||||||
floatData = ["silentThreshold", "indexRatio"]
|
|
||||||
strData = ["framework", "f0Detector"]
|
|
||||||
|
|
||||||
|
|
||||||
class RVC:
|
class RVC:
|
||||||
def __init__(self, params: VoiceChangerParams):
|
def __init__(self, params: VoiceChangerParams):
|
||||||
self.initialLoad = True
|
self.initialLoad = True
|
||||||
@ -123,41 +68,44 @@ class RVC:
|
|||||||
print("mps: ", self.mps_enabled)
|
print("mps: ", self.mps_enabled)
|
||||||
|
|
||||||
def loadModel(self, props: LoadModelParams):
|
def loadModel(self, props: LoadModelParams):
|
||||||
|
"""
|
||||||
|
loadModelはスロットへのエントリ(推論向けにはロードしない)。
|
||||||
|
例外的に、まだ一つも推論向けにロードされていない場合は、ロードする。
|
||||||
|
"""
|
||||||
self.is_half = props.isHalf
|
self.is_half = props.isHalf
|
||||||
tmp_slot = props.slot
|
tmp_slot = props.slot
|
||||||
params_str = props.params
|
params_str = props.params
|
||||||
params = json.loads(params_str)
|
params = json.loads(params_str)
|
||||||
|
|
||||||
newSlot = asdict(self.settings.modelSlots[tmp_slot])
|
self.settings.modelSlots[
|
||||||
newSlot.update(
|
tmp_slot
|
||||||
{
|
].pyTorchModelFile = props.files.pyTorchModelFilename
|
||||||
"pyTorchModelFile": props.files.pyTorchModelFilename,
|
self.settings.modelSlots[tmp_slot].onnxModelFile = props.files.onnxModelFilename
|
||||||
"onnxModelFile": props.files.onnxModelFilename,
|
self.settings.modelSlots[tmp_slot].featureFile = props.files.featureFilename
|
||||||
"featureFile": props.files.featureFilename,
|
self.settings.modelSlots[tmp_slot].indexFile = props.files.indexFilename
|
||||||
"indexFile": props.files.indexFilename,
|
self.settings.modelSlots[tmp_slot].defaultTrans = params["trans"]
|
||||||
"defaultTrans": params["trans"],
|
|
||||||
}
|
isONNX = (
|
||||||
|
True
|
||||||
|
if self.settings.modelSlots[tmp_slot].onnxModelFile is not None
|
||||||
|
else False
|
||||||
)
|
)
|
||||||
self.settings.modelSlots[tmp_slot] = ModelSlot(**newSlot)
|
|
||||||
|
|
||||||
print("[Voice Changer] RVC loading... slot:", tmp_slot)
|
# メタデータ設定
|
||||||
|
if isONNX:
|
||||||
# Load metadata
|
|
||||||
if (
|
|
||||||
self.settings.modelSlots[tmp_slot].pyTorchModelFile is not None
|
|
||||||
and self.settings.modelSlots[tmp_slot].pyTorchModelFile != ""
|
|
||||||
):
|
|
||||||
self._setInfoByPytorch(
|
|
||||||
tmp_slot, self.settings.modelSlots[tmp_slot].pyTorchModelFile
|
|
||||||
)
|
|
||||||
if (
|
|
||||||
self.settings.modelSlots[tmp_slot].onnxModelFile is not None
|
|
||||||
and self.settings.modelSlots[tmp_slot].onnxModelFile != ""
|
|
||||||
):
|
|
||||||
self._setInfoByONNX(
|
self._setInfoByONNX(
|
||||||
tmp_slot, self.settings.modelSlots[tmp_slot].onnxModelFile
|
tmp_slot, self.settings.modelSlots[tmp_slot].onnxModelFile
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
self._setInfoByPytorch(
|
||||||
|
tmp_slot, self.settings.modelSlots[tmp_slot].pyTorchModelFile
|
||||||
|
)
|
||||||
|
|
||||||
|
print(
|
||||||
|
f"[Voice Changer] RVC loading... slot:{tmp_slot}",
|
||||||
|
asdict(self.settings.modelSlots[tmp_slot]),
|
||||||
|
)
|
||||||
|
# hubertロード
|
||||||
try:
|
try:
|
||||||
hubert_path = self.params.hubert_base
|
hubert_path = self.params.hubert_base
|
||||||
models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
|
models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
|
||||||
@ -173,6 +121,7 @@ class RVC:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("EXCEPTION during loading hubert/contentvec model", e)
|
print("EXCEPTION during loading hubert/contentvec model", e)
|
||||||
|
|
||||||
|
# 初回のみロード
|
||||||
if self.initialLoad or tmp_slot == self.currentSlot:
|
if self.initialLoad or tmp_slot == self.currentSlot:
|
||||||
self.prepareModel(tmp_slot)
|
self.prepareModel(tmp_slot)
|
||||||
self.settings.modelSlotIndex = tmp_slot
|
self.settings.modelSlotIndex = tmp_slot
|
||||||
@ -197,7 +146,6 @@ class RVC:
|
|||||||
self.settings.modelSlots[slot].embedder = self.settings.modelSlots[
|
self.settings.modelSlots[slot].embedder = self.settings.modelSlots[
|
||||||
slot
|
slot
|
||||||
].embedder[:-3]
|
].embedder[:-3]
|
||||||
print("embedder....", self.settings.modelSlots[slot].embedder)
|
|
||||||
|
|
||||||
self.settings.modelSlots[slot].f0 = True if cpt["f0"] == 1 else False
|
self.settings.modelSlots[slot].f0 = True if cpt["f0"] == 1 else False
|
||||||
self.settings.modelSlots[slot].samplingRate = cpt["config"][-1]
|
self.settings.modelSlots[slot].samplingRate = cpt["config"][-1]
|
||||||
@ -208,84 +156,55 @@ class RVC:
|
|||||||
tmp_onnx_session = ModelWrapper(file)
|
tmp_onnx_session = ModelWrapper(file)
|
||||||
self.settings.modelSlots[slot].modelType = tmp_onnx_session.getModelType()
|
self.settings.modelSlots[slot].modelType = tmp_onnx_session.getModelType()
|
||||||
self.settings.modelSlots[slot].embChannels = tmp_onnx_session.getEmbChannels()
|
self.settings.modelSlots[slot].embChannels = tmp_onnx_session.getEmbChannels()
|
||||||
|
self.settings.modelSlots[slot].embedder = tmp_onnx_session.getEmbedder()
|
||||||
self.settings.modelSlots[slot].f0 = tmp_onnx_session.getF0()
|
self.settings.modelSlots[slot].f0 = tmp_onnx_session.getF0()
|
||||||
self.settings.modelSlots[slot].samplingRate = tmp_onnx_session.getSamplingRate()
|
self.settings.modelSlots[slot].samplingRate = tmp_onnx_session.getSamplingRate()
|
||||||
self.settings.modelSlots[slot].deprecated = tmp_onnx_session.getDeprecated()
|
self.settings.modelSlots[slot].deprecated = tmp_onnx_session.getDeprecated()
|
||||||
self.settings.modelSlots[slot].embedder = tmp_onnx_session.getEmbedder()
|
|
||||||
print("embedder....", self.settings.modelSlots[slot].embedder)
|
|
||||||
|
|
||||||
def prepareModel(self, slot: int):
|
def prepareModel(self, slot: int):
|
||||||
print("[Voice Changer] Prepare Model of slot:", slot)
|
print("[Voice Changer] Prepare Model of slot:", slot)
|
||||||
pyTorchModelFile = self.settings.modelSlots[slot].pyTorchModelFile
|
|
||||||
onnxModelFile = self.settings.modelSlots[slot].onnxModelFile
|
onnxModelFile = self.settings.modelSlots[slot].onnxModelFile
|
||||||
# PyTorchモデル生成
|
isONNX = (
|
||||||
if pyTorchModelFile != None and pyTorchModelFile != "":
|
True if self.settings.modelSlots[slot].onnxModelFile is not None else False
|
||||||
|
)
|
||||||
|
|
||||||
|
if isONNX:
|
||||||
|
print("[Voice Changer] Loading ONNX Model...")
|
||||||
|
self.next_onnx_session = ModelWrapper(onnxModelFile)
|
||||||
|
self.next_net_g = None
|
||||||
|
else:
|
||||||
print("[Voice Changer] Loading Pytorch Model...")
|
print("[Voice Changer] Loading Pytorch Model...")
|
||||||
cpt = torch.load(pyTorchModelFile, map_location="cpu")
|
torchModelSlot = self.settings.modelSlots[slot]
|
||||||
"""
|
cpt = torch.load(torchModelSlot.pyTorchModelFile, map_location="cpu")
|
||||||
(1) オリジナルとrvc-webuiのモデル判定 ⇒ config全体の形状
|
|
||||||
■ ノーマル256
|
|
||||||
[1025, 32, 192, 192, 768, 2, 6, 3, 0, '1', [3, 7, 11], [[1, 3, 5], [1, 3, 5], [1, 3, 5]], [10, 6, 2, 2, 2], 512, [16, 16, 4, 4, 4], 109, 256, 48000]
|
|
||||||
■ ノーマル 768対応
|
|
||||||
[1025, 32, 192, 192, 768, 2, 6, 3, 0, '1', [3, 7, 11], [[1, 3, 5], [1, 3, 5], [1, 3, 5]], [10, 6, 2, 2, 2], 512, [16, 16, 4, 4, 4], 109, 256, 768, 48000]
|
|
||||||
⇒ 18: オリジナル, 19: rvc-webui
|
|
||||||
|
|
||||||
(2-1) オリジナルのノーマルorPitchレス判定 ⇒ ckp["f0"]で判定
|
|
||||||
0: ピッチレス, 1:ノーマル
|
|
||||||
|
|
||||||
(2-2) rvc-webuiの、(256 or 768) x (ノーマルor pitchレス)判定 ⇒ 256, or 768 は17番目の要素で判定。, ノーマルor pitchレスはckp["f0"]で判定
|
|
||||||
"""
|
|
||||||
|
|
||||||
if (
|
if (
|
||||||
self.settings.modelSlots[slot].modelType == RVC_MODEL_TYPE_RVC
|
torchModelSlot.modelType == RVC_MODEL_TYPE_RVC
|
||||||
and self.settings.modelSlots[slot].f0 is True
|
and torchModelSlot.f0 is True
|
||||||
):
|
):
|
||||||
net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=self.is_half)
|
net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=self.is_half)
|
||||||
elif (
|
elif (
|
||||||
self.settings.modelSlots[slot].modelType == RVC_MODEL_TYPE_RVC
|
torchModelSlot.modelType == RVC_MODEL_TYPE_RVC
|
||||||
and self.settings.modelSlots[slot].f0 is False
|
and torchModelSlot.f0 is False
|
||||||
):
|
):
|
||||||
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
|
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
|
||||||
elif (
|
elif (
|
||||||
self.settings.modelSlots[slot].modelType == RVC_MODEL_TYPE_WEBUI
|
torchModelSlot.modelType == RVC_MODEL_TYPE_WEBUI
|
||||||
and self.settings.modelSlots[slot].f0 is True
|
and torchModelSlot.f0 is True
|
||||||
):
|
):
|
||||||
net_g = SynthesizerTrnMsNSFsid_webui(
|
net_g = SynthesizerTrnMsNSFsid_webui(
|
||||||
**cpt["params"], is_half=self.is_half
|
**cpt["params"], is_half=self.is_half
|
||||||
)
|
)
|
||||||
elif (
|
else:
|
||||||
self.settings.modelSlots[slot].modelType == RVC_MODEL_TYPE_WEBUI
|
|
||||||
and self.settings.modelSlots[slot].f0 is False
|
|
||||||
):
|
|
||||||
net_g = SynthesizerTrnMsNSFsidNono_webui(
|
net_g = SynthesizerTrnMsNSFsidNono_webui(
|
||||||
**cpt["params"], is_half=self.is_half
|
**cpt["params"], is_half=self.is_half
|
||||||
)
|
)
|
||||||
else:
|
|
||||||
print("unknwon")
|
|
||||||
|
|
||||||
net_g.eval()
|
net_g.eval()
|
||||||
net_g.load_state_dict(cpt["weight"], strict=False)
|
net_g.load_state_dict(cpt["weight"], strict=False)
|
||||||
|
|
||||||
if self.is_half:
|
if self.is_half:
|
||||||
net_g = net_g.half()
|
net_g = net_g.half()
|
||||||
|
|
||||||
self.next_net_g = net_g
|
self.next_net_g = net_g
|
||||||
else:
|
|
||||||
print("[Voice Changer] Skip Loading Pytorch Model...")
|
|
||||||
self.next_net_g = None
|
|
||||||
|
|
||||||
# ONNXモデル生成
|
|
||||||
if onnxModelFile != None and onnxModelFile != "":
|
|
||||||
print("[Voice Changer] Loading ONNX Model...")
|
|
||||||
self.next_onnx_session = ModelWrapper(onnxModelFile)
|
|
||||||
# self.settings.modelSlots[slot].samplingRateOnnx = self.next_onnx_session.getSamplingRate()
|
|
||||||
# self.settings.modelSlots[slot].f0Onnx = self.next_onnx_session.getF0()
|
|
||||||
# self.settings.modelSlots[slot].embChannelsOnnx = self.next_onnx_session.getEmbChannels()
|
|
||||||
|
|
||||||
# # ONNXがある場合は、ONNXの設定を優先
|
|
||||||
# self.settings.modelSlots[slot].samplingRate = self.settings.modelSlots[slot].samplingRateOnnx
|
|
||||||
# self.settings.modelSlots[slot].f0 = self.settings.modelSlots[slot].f0Onnx
|
|
||||||
# self.settings.modelSlots[slot].embChannels = self.settings.modelSlots[slot].embChannelsOnnx
|
|
||||||
else:
|
|
||||||
print("[Voice Changer] Skip Loading ONNX Model...")
|
|
||||||
self.next_onnx_session = None
|
self.next_onnx_session = None
|
||||||
|
|
||||||
self.next_feature_file = self.settings.modelSlots[slot].featureFile
|
self.next_feature_file = self.settings.modelSlots[slot].featureFile
|
||||||
@ -295,15 +214,11 @@ class RVC:
|
|||||||
self.next_framework = (
|
self.next_framework = (
|
||||||
"ONNX" if self.next_onnx_session is not None else "PyTorch"
|
"ONNX" if self.next_onnx_session is not None else "PyTorch"
|
||||||
)
|
)
|
||||||
print(
|
print("[Voice Changer] Prepare done.")
|
||||||
"[Voice Changer] Prepare done.",
|
|
||||||
)
|
|
||||||
return self.get_info()
|
return self.get_info()
|
||||||
|
|
||||||
def switchModel(self):
|
def switchModel(self):
|
||||||
print(
|
print("[Voice Changer] Switching model..")
|
||||||
"[Voice Changer] Switching model..",
|
|
||||||
)
|
|
||||||
# del self.net_g
|
# del self.net_g
|
||||||
# del self.onnx_session
|
# del self.onnx_session
|
||||||
self.net_g = self.next_net_g
|
self.net_g = self.next_net_g
|
||||||
|
44
server/voice_changer/RVC/RVCSettings.py
Normal file
44
server/voice_changer/RVC/RVCSettings.py
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
from voice_changer.RVC.ModelSlot import ModelSlot
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RVCSettings:
|
||||||
|
gpu: int = 0
|
||||||
|
dstId: int = 0
|
||||||
|
|
||||||
|
f0Detector: str = "pm" # pm or harvest
|
||||||
|
tran: int = 20
|
||||||
|
silentThreshold: float = 0.00001
|
||||||
|
extraConvertSize: int = 1024 * 32
|
||||||
|
clusterInferRatio: float = 0.1
|
||||||
|
|
||||||
|
framework: str = "PyTorch" # PyTorch or ONNX
|
||||||
|
pyTorchModelFile: str = ""
|
||||||
|
onnxModelFile: str = ""
|
||||||
|
configFile: str = ""
|
||||||
|
modelSlots: list[ModelSlot] = field(
|
||||||
|
default_factory=lambda: [ModelSlot(), ModelSlot(), ModelSlot()]
|
||||||
|
)
|
||||||
|
indexRatio: float = 0
|
||||||
|
rvcQuality: int = 0
|
||||||
|
silenceFront: int = 1 # 0:off, 1:on
|
||||||
|
modelSamplingRate: int = 48000
|
||||||
|
modelSlotIndex: int = -1
|
||||||
|
|
||||||
|
speakers: dict[str, int] = field(default_factory=lambda: {})
|
||||||
|
|
||||||
|
# ↓mutableな物だけ列挙
|
||||||
|
intData = [
|
||||||
|
"gpu",
|
||||||
|
"dstId",
|
||||||
|
"tran",
|
||||||
|
"extraConvertSize",
|
||||||
|
"rvcQuality",
|
||||||
|
"modelSamplingRate",
|
||||||
|
"silenceFront",
|
||||||
|
"modelSlotIndex",
|
||||||
|
]
|
||||||
|
floatData = ["silentThreshold", "indexRatio"]
|
||||||
|
strData = ["framework", "f0Detector"]
|
@ -3,12 +3,12 @@ from dataclasses import dataclass
|
|||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class FilePaths:
|
class FilePaths:
|
||||||
configFilename: str
|
configFilename: str | None
|
||||||
pyTorchModelFilename: str
|
pyTorchModelFilename: str | None
|
||||||
onnxModelFilename: str
|
onnxModelFilename: str | None
|
||||||
clusterTorchModelFilename: str
|
clusterTorchModelFilename: str | None
|
||||||
featureFilename: str
|
featureFilename: str | None
|
||||||
indexFilename: str
|
indexFilename: str | None
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
Loading…
Reference in New Issue
Block a user