mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-01-23 21:45:00 +03:00
WIP: DADAPAN
This commit is contained in:
parent
aa42c3f619
commit
55ea032a86
@ -17,6 +17,8 @@ class ModelSlot:
|
|||||||
samplingRate: int = -1
|
samplingRate: int = -1
|
||||||
f0: bool = True
|
f0: bool = True
|
||||||
embChannels: int = 256
|
embChannels: int = 256
|
||||||
|
embOutputLayter: int = 9
|
||||||
|
useFinalProj: bool = True
|
||||||
deprecated: bool = False
|
deprecated: bool = False
|
||||||
embedder: EnumEmbedderTypes = EnumEmbedderTypes.hubert
|
embedder: EnumEmbedderTypes = EnumEmbedderTypes.hubert
|
||||||
|
|
||||||
|
@ -54,7 +54,9 @@ def generateModelSlot(slotDir: str):
|
|||||||
def _setInfoByPytorch(slot: ModelSlot):
|
def _setInfoByPytorch(slot: ModelSlot):
|
||||||
cpt = torch.load(slot.modelFile, map_location="cpu")
|
cpt = torch.load(slot.modelFile, map_location="cpu")
|
||||||
config_len = len(cpt["config"])
|
config_len = len(cpt["config"])
|
||||||
|
|
||||||
if config_len == 18:
|
if config_len == 18:
|
||||||
|
# Original RVC
|
||||||
slot.f0 = True if cpt["f0"] == 1 else False
|
slot.f0 = True if cpt["f0"] == 1 else False
|
||||||
version = cpt.get("version", "v1")
|
version = cpt.get("version", "v1")
|
||||||
if version is None or version == "v1":
|
if version is None or version == "v1":
|
||||||
@ -64,6 +66,8 @@ def _setInfoByPytorch(slot: ModelSlot):
|
|||||||
else EnumInferenceTypes.pyTorchRVCNono
|
else EnumInferenceTypes.pyTorchRVCNono
|
||||||
)
|
)
|
||||||
slot.embChannels = 256
|
slot.embChannels = 256
|
||||||
|
slot.embOutputLayter = 9
|
||||||
|
slot.useFinalProj = True
|
||||||
slot.embedder = EnumEmbedderTypes.hubert
|
slot.embedder = EnumEmbedderTypes.hubert
|
||||||
else:
|
else:
|
||||||
slot.modelType = (
|
slot.modelType = (
|
||||||
@ -72,9 +76,12 @@ def _setInfoByPytorch(slot: ModelSlot):
|
|||||||
else EnumInferenceTypes.pyTorchRVCv2Nono
|
else EnumInferenceTypes.pyTorchRVCv2Nono
|
||||||
)
|
)
|
||||||
slot.embChannels = 768
|
slot.embChannels = 768
|
||||||
|
slot.embOutputLayter = 12
|
||||||
|
slot.useFinalProj = False
|
||||||
slot.embedder = EnumEmbedderTypes.hubert
|
slot.embedder = EnumEmbedderTypes.hubert
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
# DDPN RVC
|
||||||
slot.f0 = True if cpt["f0"] == 1 else False
|
slot.f0 = True if cpt["f0"] == 1 else False
|
||||||
slot.modelType = (
|
slot.modelType = (
|
||||||
EnumInferenceTypes.pyTorchWebUI
|
EnumInferenceTypes.pyTorchWebUI
|
||||||
@ -82,6 +89,32 @@ def _setInfoByPytorch(slot: ModelSlot):
|
|||||||
else EnumInferenceTypes.pyTorchWebUINono
|
else EnumInferenceTypes.pyTorchWebUINono
|
||||||
)
|
)
|
||||||
slot.embChannels = cpt["config"][17]
|
slot.embChannels = cpt["config"][17]
|
||||||
|
slot.embOutputLayter = (
|
||||||
|
cpt["embedder_output_layer"] if "embedder_output_layer" in cpt else 9
|
||||||
|
)
|
||||||
|
if slot.embChannels == 256:
|
||||||
|
slot.useFinalProj = True
|
||||||
|
else:
|
||||||
|
slot.useFinalProj = False
|
||||||
|
|
||||||
|
# DDPNモデルの情報を表示
|
||||||
|
if (
|
||||||
|
slot.embChannels == 256
|
||||||
|
and slot.embOutputLayter == 9
|
||||||
|
and slot.useFinalProj is True
|
||||||
|
):
|
||||||
|
print("[Voice Changer] DDPN Model: Original v1 like")
|
||||||
|
elif (
|
||||||
|
slot.embChannels == 768
|
||||||
|
and slot.embOutputLayter == 12
|
||||||
|
and slot.useFinalProj is False
|
||||||
|
):
|
||||||
|
print("[Voice Changer] DDPN Model: Original v2 like")
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
f"[Voice Changer] DDPN Model: ch:{slot.embChannels}, L:{slot.embOutputLayter}, FP:{slot.useFinalProj}"
|
||||||
|
)
|
||||||
|
|
||||||
slot.embedder = cpt["embedder_name"]
|
slot.embedder = cpt["embedder_name"]
|
||||||
if slot.embedder.endswith("768"):
|
if slot.embedder.endswith("768"):
|
||||||
slot.embedder = slot.embedder[:-3]
|
slot.embedder = slot.embedder[:-3]
|
||||||
@ -111,6 +144,19 @@ def _setInfoByONNX(slot: ModelSlot):
|
|||||||
# slot.modelType = metadata["modelType"]
|
# slot.modelType = metadata["modelType"]
|
||||||
slot.embChannels = metadata["embChannels"]
|
slot.embChannels = metadata["embChannels"]
|
||||||
|
|
||||||
|
slot.embOutputLayter = (
|
||||||
|
metadata["embedder_output_layer"]
|
||||||
|
if "embedder_output_layer" in metadata
|
||||||
|
else 9
|
||||||
|
)
|
||||||
|
|
||||||
|
if slot.embChannels == 256:
|
||||||
|
slot.useFinalProj = True
|
||||||
|
else:
|
||||||
|
slot.useFinalProj = False
|
||||||
|
|
||||||
|
print("ONNX", slot)
|
||||||
|
|
||||||
if "embedder" not in metadata:
|
if "embedder" not in metadata:
|
||||||
slot.embedder = EnumEmbedderTypes.hubert
|
slot.embedder = EnumEmbedderTypes.hubert
|
||||||
elif metadata["embedder"] == EnumEmbedderTypes.hubert.value:
|
elif metadata["embedder"] == EnumEmbedderTypes.hubert.value:
|
||||||
|
@ -333,8 +333,8 @@ class RVC:
|
|||||||
f0_up_key = self.settings.tran
|
f0_up_key = self.settings.tran
|
||||||
index_rate = self.settings.indexRatio
|
index_rate = self.settings.indexRatio
|
||||||
if_f0 = 1 if self.settings.modelSlots[self.currentSlot].f0 else 0
|
if_f0 = 1 if self.settings.modelSlots[self.currentSlot].f0 else 0
|
||||||
|
embOutputLayer = self.settings.modelSlots[self.currentSlot].embOutputLayter
|
||||||
embChannels = self.settings.modelSlots[self.currentSlot].embChannels
|
useFinalProj = self.settings.modelSlots[self.currentSlot].useFinalProj
|
||||||
|
|
||||||
audio_out = self.pipeline.exec(
|
audio_out = self.pipeline.exec(
|
||||||
sid,
|
sid,
|
||||||
@ -343,7 +343,8 @@ class RVC:
|
|||||||
index_rate,
|
index_rate,
|
||||||
if_f0,
|
if_f0,
|
||||||
self.settings.extraConvertSize / self.settings.modelSamplingRate,
|
self.settings.extraConvertSize / self.settings.modelSamplingRate,
|
||||||
embChannels,
|
embOutputLayer,
|
||||||
|
useFinalProj,
|
||||||
repeat,
|
repeat,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -17,7 +17,9 @@ class Embedder(Protocol):
|
|||||||
def loadModel(self, file: str, dev: device, isHalf: bool = True):
|
def loadModel(self, file: str, dev: device, isHalf: bool = True):
|
||||||
...
|
...
|
||||||
|
|
||||||
def extractFeatures(self, feats: torch.Tensor, embChannels=256) -> torch.Tensor:
|
def extractFeatures(
|
||||||
|
self, feats: torch.Tensor, embOutputLayer=9, useFinalProj=True
|
||||||
|
) -> torch.Tensor:
|
||||||
...
|
...
|
||||||
|
|
||||||
def setProps(
|
def setProps(
|
||||||
|
@ -23,23 +23,23 @@ class FairseqHubert(Embedder):
|
|||||||
self.model = model
|
self.model = model
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def extractFeatures(self, feats: torch.Tensor, embChannels=256) -> torch.Tensor:
|
def extractFeatures(
|
||||||
|
self, feats: torch.Tensor, embOutputLayer=9, useFinalProj=True
|
||||||
|
) -> torch.Tensor:
|
||||||
padding_mask = torch.BoolTensor(feats.shape).to(self.dev).fill_(False)
|
padding_mask = torch.BoolTensor(feats.shape).to(self.dev).fill_(False)
|
||||||
if embChannels == 256:
|
|
||||||
inputs = {
|
# オリジナル_v1は L9にfinal_projをかけていた。(-> 256)
|
||||||
"source": feats.to(self.dev),
|
# オリジナル_v2は L12にfinal_projをかけない。(-> 768)
|
||||||
"padding_mask": padding_mask,
|
|
||||||
"output_layer": 9, # layer 9
|
inputs = {
|
||||||
}
|
"source": feats.to(self.dev),
|
||||||
else:
|
"padding_mask": padding_mask,
|
||||||
inputs = {
|
"output_layer": embOutputLayer, # 9 or 12
|
||||||
"source": feats.to(self.dev),
|
}
|
||||||
"padding_mask": padding_mask,
|
|
||||||
}
|
|
||||||
|
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
logits = self.model.extract_features(**inputs)
|
logits = self.model.extract_features(**inputs)
|
||||||
if embChannels == 256:
|
if useFinalProj:
|
||||||
feats = self.model.final_proj(logits[0])
|
feats = self.model.final_proj(logits[0])
|
||||||
else:
|
else:
|
||||||
feats = logits[0]
|
feats = logits[0]
|
||||||
|
@ -82,7 +82,8 @@ class Pipeline(object):
|
|||||||
index_rate,
|
index_rate,
|
||||||
if_f0,
|
if_f0,
|
||||||
silence_front,
|
silence_front,
|
||||||
embChannels,
|
embOutputLayer,
|
||||||
|
useFinalProj,
|
||||||
repeat,
|
repeat,
|
||||||
):
|
):
|
||||||
self.t_pad = self.sr * repeat
|
self.t_pad = self.sr * repeat
|
||||||
@ -127,7 +128,7 @@ class Pipeline(object):
|
|||||||
# embedding
|
# embedding
|
||||||
padding_mask = torch.BoolTensor(feats.shape).to(self.device).fill_(False)
|
padding_mask = torch.BoolTensor(feats.shape).to(self.device).fill_(False)
|
||||||
try:
|
try:
|
||||||
feats = self.embedder.extractFeatures(feats, embChannels)
|
feats = self.embedder.extractFeatures(feats, embOutputLayer, useFinalProj)
|
||||||
except RuntimeError as e:
|
except RuntimeError as e:
|
||||||
if "HALF" in e.__str__().upper():
|
if "HALF" in e.__str__().upper():
|
||||||
raise HalfPrecisionChangingException()
|
raise HalfPrecisionChangingException()
|
||||||
|
Loading…
Reference in New Issue
Block a user