WIP: DADAPAN

This commit is contained in:
wataru 2023-05-24 16:56:23 +09:00
parent aa42c3f619
commit 55ea032a86
6 changed files with 71 additions and 19 deletions

View File

@ -17,6 +17,8 @@ class ModelSlot:
samplingRate: int = -1 samplingRate: int = -1
f0: bool = True f0: bool = True
embChannels: int = 256 embChannels: int = 256
embOutputLayter: int = 9
useFinalProj: bool = True
deprecated: bool = False deprecated: bool = False
embedder: EnumEmbedderTypes = EnumEmbedderTypes.hubert embedder: EnumEmbedderTypes = EnumEmbedderTypes.hubert

View File

@ -54,7 +54,9 @@ def generateModelSlot(slotDir: str):
def _setInfoByPytorch(slot: ModelSlot): def _setInfoByPytorch(slot: ModelSlot):
cpt = torch.load(slot.modelFile, map_location="cpu") cpt = torch.load(slot.modelFile, map_location="cpu")
config_len = len(cpt["config"]) config_len = len(cpt["config"])
if config_len == 18: if config_len == 18:
# Original RVC
slot.f0 = True if cpt["f0"] == 1 else False slot.f0 = True if cpt["f0"] == 1 else False
version = cpt.get("version", "v1") version = cpt.get("version", "v1")
if version is None or version == "v1": if version is None or version == "v1":
@ -64,6 +66,8 @@ def _setInfoByPytorch(slot: ModelSlot):
else EnumInferenceTypes.pyTorchRVCNono else EnumInferenceTypes.pyTorchRVCNono
) )
slot.embChannels = 256 slot.embChannels = 256
slot.embOutputLayter = 9
slot.useFinalProj = True
slot.embedder = EnumEmbedderTypes.hubert slot.embedder = EnumEmbedderTypes.hubert
else: else:
slot.modelType = ( slot.modelType = (
@ -72,9 +76,12 @@ def _setInfoByPytorch(slot: ModelSlot):
else EnumInferenceTypes.pyTorchRVCv2Nono else EnumInferenceTypes.pyTorchRVCv2Nono
) )
slot.embChannels = 768 slot.embChannels = 768
slot.embOutputLayter = 12
slot.useFinalProj = False
slot.embedder = EnumEmbedderTypes.hubert slot.embedder = EnumEmbedderTypes.hubert
else: else:
# DDPN RVC
slot.f0 = True if cpt["f0"] == 1 else False slot.f0 = True if cpt["f0"] == 1 else False
slot.modelType = ( slot.modelType = (
EnumInferenceTypes.pyTorchWebUI EnumInferenceTypes.pyTorchWebUI
@ -82,6 +89,32 @@ def _setInfoByPytorch(slot: ModelSlot):
else EnumInferenceTypes.pyTorchWebUINono else EnumInferenceTypes.pyTorchWebUINono
) )
slot.embChannels = cpt["config"][17] slot.embChannels = cpt["config"][17]
slot.embOutputLayter = (
cpt["embedder_output_layer"] if "embedder_output_layer" in cpt else 9
)
if slot.embChannels == 256:
slot.useFinalProj = True
else:
slot.useFinalProj = False
# DDPNモデルの情報を表示
if (
slot.embChannels == 256
and slot.embOutputLayter == 9
and slot.useFinalProj is True
):
print("[Voice Changer] DDPN Model: Original v1 like")
elif (
slot.embChannels == 768
and slot.embOutputLayter == 12
and slot.useFinalProj is False
):
print("[Voice Changer] DDPN Model: Original v2 like")
else:
print(
f"[Voice Changer] DDPN Model: ch:{slot.embChannels}, L:{slot.embOutputLayter}, FP:{slot.useFinalProj}"
)
slot.embedder = cpt["embedder_name"] slot.embedder = cpt["embedder_name"]
if slot.embedder.endswith("768"): if slot.embedder.endswith("768"):
slot.embedder = slot.embedder[:-3] slot.embedder = slot.embedder[:-3]
@ -111,6 +144,19 @@ def _setInfoByONNX(slot: ModelSlot):
# slot.modelType = metadata["modelType"] # slot.modelType = metadata["modelType"]
slot.embChannels = metadata["embChannels"] slot.embChannels = metadata["embChannels"]
slot.embOutputLayter = (
metadata["embedder_output_layer"]
if "embedder_output_layer" in metadata
else 9
)
if slot.embChannels == 256:
slot.useFinalProj = True
else:
slot.useFinalProj = False
print("ONNX", slot)
if "embedder" not in metadata: if "embedder" not in metadata:
slot.embedder = EnumEmbedderTypes.hubert slot.embedder = EnumEmbedderTypes.hubert
elif metadata["embedder"] == EnumEmbedderTypes.hubert.value: elif metadata["embedder"] == EnumEmbedderTypes.hubert.value:

View File

@ -333,8 +333,8 @@ class RVC:
f0_up_key = self.settings.tran f0_up_key = self.settings.tran
index_rate = self.settings.indexRatio index_rate = self.settings.indexRatio
if_f0 = 1 if self.settings.modelSlots[self.currentSlot].f0 else 0 if_f0 = 1 if self.settings.modelSlots[self.currentSlot].f0 else 0
embOutputLayer = self.settings.modelSlots[self.currentSlot].embOutputLayter
embChannels = self.settings.modelSlots[self.currentSlot].embChannels useFinalProj = self.settings.modelSlots[self.currentSlot].useFinalProj
audio_out = self.pipeline.exec( audio_out = self.pipeline.exec(
sid, sid,
@ -343,7 +343,8 @@ class RVC:
index_rate, index_rate,
if_f0, if_f0,
self.settings.extraConvertSize / self.settings.modelSamplingRate, self.settings.extraConvertSize / self.settings.modelSamplingRate,
embChannels, embOutputLayer,
useFinalProj,
repeat, repeat,
) )

View File

@ -17,7 +17,9 @@ class Embedder(Protocol):
def loadModel(self, file: str, dev: device, isHalf: bool = True): def loadModel(self, file: str, dev: device, isHalf: bool = True):
... ...
def extractFeatures(self, feats: torch.Tensor, embChannels=256) -> torch.Tensor: def extractFeatures(
self, feats: torch.Tensor, embOutputLayer=9, useFinalProj=True
) -> torch.Tensor:
... ...
def setProps( def setProps(

View File

@ -23,23 +23,23 @@ class FairseqHubert(Embedder):
self.model = model self.model = model
return self return self
def extractFeatures(self, feats: torch.Tensor, embChannels=256) -> torch.Tensor: def extractFeatures(
self, feats: torch.Tensor, embOutputLayer=9, useFinalProj=True
) -> torch.Tensor:
padding_mask = torch.BoolTensor(feats.shape).to(self.dev).fill_(False) padding_mask = torch.BoolTensor(feats.shape).to(self.dev).fill_(False)
if embChannels == 256:
inputs = { # オリジナル_v1は L9にfinal_projをかけていた。(-> 256)
"source": feats.to(self.dev), # オリジナル_v2は L12にfinal_projをかけない。(-> 768)
"padding_mask": padding_mask,
"output_layer": 9, # layer 9 inputs = {
} "source": feats.to(self.dev),
else: "padding_mask": padding_mask,
inputs = { "output_layer": embOutputLayer, # 9 or 12
"source": feats.to(self.dev), }
"padding_mask": padding_mask,
}
with torch.no_grad(): with torch.no_grad():
logits = self.model.extract_features(**inputs) logits = self.model.extract_features(**inputs)
if embChannels == 256: if useFinalProj:
feats = self.model.final_proj(logits[0]) feats = self.model.final_proj(logits[0])
else: else:
feats = logits[0] feats = logits[0]

View File

@ -82,7 +82,8 @@ class Pipeline(object):
index_rate, index_rate,
if_f0, if_f0,
silence_front, silence_front,
embChannels, embOutputLayer,
useFinalProj,
repeat, repeat,
): ):
self.t_pad = self.sr * repeat self.t_pad = self.sr * repeat
@ -127,7 +128,7 @@ class Pipeline(object):
# embedding # embedding
padding_mask = torch.BoolTensor(feats.shape).to(self.device).fill_(False) padding_mask = torch.BoolTensor(feats.shape).to(self.device).fill_(False)
try: try:
feats = self.embedder.extractFeatures(feats, embChannels) feats = self.embedder.extractFeatures(feats, embOutputLayer, useFinalProj)
except RuntimeError as e: except RuntimeError as e:
if "HALF" in e.__str__().upper(): if "HALF" in e.__str__().upper():
raise HalfPrecisionChangingException() raise HalfPrecisionChangingException()