WIP: DADAPAN

This commit is contained in:
wataru 2023-05-24 16:56:23 +09:00
parent aa42c3f619
commit 55ea032a86
6 changed files with 71 additions and 19 deletions

View File

@ -17,6 +17,8 @@ class ModelSlot:
samplingRate: int = -1
f0: bool = True
embChannels: int = 256
embOutputLayter: int = 9
useFinalProj: bool = True
deprecated: bool = False
embedder: EnumEmbedderTypes = EnumEmbedderTypes.hubert

View File

@ -54,7 +54,9 @@ def generateModelSlot(slotDir: str):
def _setInfoByPytorch(slot: ModelSlot):
cpt = torch.load(slot.modelFile, map_location="cpu")
config_len = len(cpt["config"])
if config_len == 18:
# Original RVC
slot.f0 = True if cpt["f0"] == 1 else False
version = cpt.get("version", "v1")
if version is None or version == "v1":
@ -64,6 +66,8 @@ def _setInfoByPytorch(slot: ModelSlot):
else EnumInferenceTypes.pyTorchRVCNono
)
slot.embChannels = 256
slot.embOutputLayter = 9
slot.useFinalProj = True
slot.embedder = EnumEmbedderTypes.hubert
else:
slot.modelType = (
@ -72,9 +76,12 @@ def _setInfoByPytorch(slot: ModelSlot):
else EnumInferenceTypes.pyTorchRVCv2Nono
)
slot.embChannels = 768
slot.embOutputLayter = 12
slot.useFinalProj = False
slot.embedder = EnumEmbedderTypes.hubert
else:
# DDPN RVC
slot.f0 = True if cpt["f0"] == 1 else False
slot.modelType = (
EnumInferenceTypes.pyTorchWebUI
@ -82,6 +89,32 @@ def _setInfoByPytorch(slot: ModelSlot):
else EnumInferenceTypes.pyTorchWebUINono
)
slot.embChannels = cpt["config"][17]
slot.embOutputLayter = (
cpt["embedder_output_layer"] if "embedder_output_layer" in cpt else 9
)
if slot.embChannels == 256:
slot.useFinalProj = True
else:
slot.useFinalProj = False
# DDPNモデルの情報を表示
if (
slot.embChannels == 256
and slot.embOutputLayter == 9
and slot.useFinalProj is True
):
print("[Voice Changer] DDPN Model: Original v1 like")
elif (
slot.embChannels == 768
and slot.embOutputLayter == 12
and slot.useFinalProj is False
):
print("[Voice Changer] DDPN Model: Original v2 like")
else:
print(
f"[Voice Changer] DDPN Model: ch:{slot.embChannels}, L:{slot.embOutputLayter}, FP:{slot.useFinalProj}"
)
slot.embedder = cpt["embedder_name"]
if slot.embedder.endswith("768"):
slot.embedder = slot.embedder[:-3]
@ -111,6 +144,19 @@ def _setInfoByONNX(slot: ModelSlot):
# slot.modelType = metadata["modelType"]
slot.embChannels = metadata["embChannels"]
slot.embOutputLayter = (
metadata["embedder_output_layer"]
if "embedder_output_layer" in metadata
else 9
)
if slot.embChannels == 256:
slot.useFinalProj = True
else:
slot.useFinalProj = False
print("ONNX", slot)
if "embedder" not in metadata:
slot.embedder = EnumEmbedderTypes.hubert
elif metadata["embedder"] == EnumEmbedderTypes.hubert.value:

View File

@ -333,8 +333,8 @@ class RVC:
f0_up_key = self.settings.tran
index_rate = self.settings.indexRatio
if_f0 = 1 if self.settings.modelSlots[self.currentSlot].f0 else 0
embChannels = self.settings.modelSlots[self.currentSlot].embChannels
embOutputLayer = self.settings.modelSlots[self.currentSlot].embOutputLayter
useFinalProj = self.settings.modelSlots[self.currentSlot].useFinalProj
audio_out = self.pipeline.exec(
sid,
@ -343,7 +343,8 @@ class RVC:
index_rate,
if_f0,
self.settings.extraConvertSize / self.settings.modelSamplingRate,
embChannels,
embOutputLayer,
useFinalProj,
repeat,
)

View File

@ -17,7 +17,9 @@ class Embedder(Protocol):
def loadModel(self, file: str, dev: device, isHalf: bool = True):
...
def extractFeatures(self, feats: torch.Tensor, embChannels=256) -> torch.Tensor:
def extractFeatures(
self, feats: torch.Tensor, embOutputLayer=9, useFinalProj=True
) -> torch.Tensor:
...
def setProps(

View File

@ -23,23 +23,23 @@ class FairseqHubert(Embedder):
self.model = model
return self
def extractFeatures(self, feats: torch.Tensor, embChannels=256) -> torch.Tensor:
def extractFeatures(
self, feats: torch.Tensor, embOutputLayer=9, useFinalProj=True
) -> torch.Tensor:
padding_mask = torch.BoolTensor(feats.shape).to(self.dev).fill_(False)
if embChannels == 256:
inputs = {
"source": feats.to(self.dev),
"padding_mask": padding_mask,
"output_layer": 9, # layer 9
}
else:
inputs = {
"source": feats.to(self.dev),
"padding_mask": padding_mask,
}
# オリジナル_v1は L9にfinal_projをかけていた。(-> 256)
# オリジナル_v2は L12にfinal_projをかけない。(-> 768)
inputs = {
"source": feats.to(self.dev),
"padding_mask": padding_mask,
"output_layer": embOutputLayer, # 9 or 12
}
with torch.no_grad():
logits = self.model.extract_features(**inputs)
if embChannels == 256:
if useFinalProj:
feats = self.model.final_proj(logits[0])
else:
feats = logits[0]

View File

@ -82,7 +82,8 @@ class Pipeline(object):
index_rate,
if_f0,
silence_front,
embChannels,
embOutputLayer,
useFinalProj,
repeat,
):
self.t_pad = self.sr * repeat
@ -127,7 +128,7 @@ class Pipeline(object):
# embedding
padding_mask = torch.BoolTensor(feats.shape).to(self.device).fill_(False)
try:
feats = self.embedder.extractFeatures(feats, embChannels)
feats = self.embedder.extractFeatures(feats, embOutputLayer, useFinalProj)
except RuntimeError as e:
if "HALF" in e.__str__().upper():
raise HalfPrecisionChangingException()