From caa8c99f14a06e0d3a213925bc968287dfafc7d4 Mon Sep 17 00:00:00 2001 From: w-okada Date: Sat, 8 Jul 2023 03:18:23 +0900 Subject: [PATCH] New Feature: - contentvec onnx --- server/downloader/SampleDownloader.py | 2 +- server/downloader/WeightDownloader.py | 11 +++++++- server/voice_changer/RVC/embedder/Embedder.py | 15 +++++----- .../RVC/embedder/EmbedderManager.py | 28 +++++++++++++------ .../RVC/embedder/EmbedderProtocol.py | 15 ++++++++++ .../RVC/embedder/OnnxContentvec.py | 13 +++++++++ server/voice_changer/RVC/pipeline/Pipeline.py | 3 +- 7 files changed, 68 insertions(+), 19 deletions(-) create mode 100644 server/voice_changer/RVC/embedder/EmbedderProtocol.py create mode 100644 server/voice_changer/RVC/embedder/OnnxContentvec.py diff --git a/server/downloader/SampleDownloader.py b/server/downloader/SampleDownloader.py index 3ecb8345..c0d83ea5 100644 --- a/server/downloader/SampleDownloader.py +++ b/server/downloader/SampleDownloader.py @@ -169,5 +169,5 @@ def _downloadSamples(samples: list[ModelSamples], sampleModelIds: list[Tuple[str slotInfo = RVCModelSlotGenerator._setInfoByONNX(slotInfo) else: slotInfo = RVCModelSlotGenerator._setInfoByPytorch(slotInfo) - + modelSlotManager.save_model_slot(targetSlotIndex, slotInfo) diff --git a/server/downloader/WeightDownloader.py b/server/downloader/WeightDownloader.py index d7c19b69..153dcb6d 100644 --- a/server/downloader/WeightDownloader.py +++ b/server/downloader/WeightDownloader.py @@ -7,6 +7,7 @@ from Exceptions import WeightDownladException def downloadWeight(voiceChangerParams: VoiceChangerParams): + content_vec_500_onnx = voiceChangerParams.content_vec_500_onnx hubert_base = voiceChangerParams.hubert_base hubert_base_jp = voiceChangerParams.hubert_base_jp hubert_soft = voiceChangerParams.hubert_soft @@ -67,7 +68,6 @@ def downloadWeight(voiceChangerParams: VoiceChangerParams): "position": 5, } ) - if os.path.exists(crepe_onnx_tiny) is False: downloadParams.append( { @@ -77,6 +77,15 @@ def downloadWeight(voiceChangerParams: VoiceChangerParams): } ) + if os.path.exists(content_vec_500_onnx) is False: + downloadParams.append( + { + "url": "https://huggingface.co/wok000/weights_gpl/resolve/main/content-vec/contentvec-f.onnx", + "saveTo": content_vec_500_onnx, + "position": 7, + } + ) + with ThreadPoolExecutor() as pool: pool.map(download, downloadParams) diff --git a/server/voice_changer/RVC/embedder/Embedder.py b/server/voice_changer/RVC/embedder/Embedder.py index e9ec4214..628f2302 100644 --- a/server/voice_changer/RVC/embedder/Embedder.py +++ b/server/voice_changer/RVC/embedder/Embedder.py @@ -1,18 +1,19 @@ -from typing import Any, Protocol +from typing import Any import torch from torch import device from const import EmbedderType +from voice_changer.RVC.embedder.EmbedderProtocol import EmbedderProtocol -class Embedder(Protocol): - embedderType: EmbedderType = "hubert_base" - file: str - isHalf: bool = True - dev: device +class Embedder(EmbedderProtocol): + def __init__(self): + self.embedderType: EmbedderType = "hubert_base" + self.file: str + self.dev: device - model: Any | None = None + self.model: Any | None = None def loadModel(self, file: str, dev: device, isHalf: bool = True): ... diff --git a/server/voice_changer/RVC/embedder/EmbedderManager.py b/server/voice_changer/RVC/embedder/EmbedderManager.py index ac92ba3f..42e011ad 100644 --- a/server/voice_changer/RVC/embedder/EmbedderManager.py +++ b/server/voice_changer/RVC/embedder/EmbedderManager.py @@ -5,6 +5,7 @@ from voice_changer.RVC.embedder.Embedder import Embedder from voice_changer.RVC.embedder.FairseqContentvec import FairseqContentvec from voice_changer.RVC.embedder.FairseqHubert import FairseqHubert from voice_changer.RVC.embedder.FairseqHubertJp import FairseqHubertJp +from voice_changer.RVC.embedder.OnnxContentvec import OnnxContentvec from voice_changer.utils.VoiceChangerParams import VoiceChangerParams @@ -27,10 +28,11 @@ class EmbedderManager: print("[Voice Changer] generate new embedder. (not match)") cls.currentEmbedder = cls.loadEmbedder(embederType, isHalf, dev) else: - cls.currentEmbedder.setDevice(dev) - cls.currentEmbedder.setHalf(isHalf) - # print("[Voice Changer] generate new embedder. (ANYWAY)", isHalf) - # cls.currentEmbedder = cls.loadEmbedder(embederType, file, isHalf, dev) + print("[Voice Changer] generate new embedder. (anyway)") + cls.currentEmbedder = cls.loadEmbedder(embederType, isHalf, dev) + + # cls.currentEmbedder.setDevice(dev) + # cls.currentEmbedder.setHalf(isHalf) return cls.currentEmbedder @classmethod @@ -38,13 +40,23 @@ class EmbedderManager: cls, embederType: EmbedderType, isHalf: bool, dev: device ) -> Embedder: if embederType == "hubert_base": - file = cls.params.hubert_base - return FairseqHubert().loadModel(file, dev, isHalf) + try: + file = cls.params.content_vec_500_onnx + return OnnxContentvec().loadModel(file, dev) + except Exception as e: + print(e) + file = cls.params.hubert_base + return FairseqHubert().loadModel(file, dev, isHalf) elif embederType == "hubert-base-japanese": file = cls.params.hubert_base_jp return FairseqHubertJp().loadModel(file, dev, isHalf) elif embederType == "contentvec": - file = cls.params.hubert_base - return FairseqContentvec().loadModel(file, dev, isHalf) + try: + file = cls.params.content_vec_500_onnx + return OnnxContentvec().loadModel(file, dev) + except Exception as e: + print(e) + file = cls.params.hubert_base + return FairseqContentvec().loadModel(file, dev, isHalf) else: return FairseqHubert().loadModel(file, dev, isHalf) diff --git a/server/voice_changer/RVC/embedder/EmbedderProtocol.py b/server/voice_changer/RVC/embedder/EmbedderProtocol.py new file mode 100644 index 00000000..d27ae7df --- /dev/null +++ b/server/voice_changer/RVC/embedder/EmbedderProtocol.py @@ -0,0 +1,15 @@ +from typing import Protocol + +import torch +from torch import device + + +class EmbedderProtocol(Protocol): + + def loadModel(self, file: str, dev: device, isHalf: bool = True): + ... + + def extractFeatures( + self, feats: torch.Tensor, embOutputLayer=9, useFinalProj=True + ) -> torch.Tensor: + ... diff --git a/server/voice_changer/RVC/embedder/OnnxContentvec.py b/server/voice_changer/RVC/embedder/OnnxContentvec.py new file mode 100644 index 00000000..81aa6ab7 --- /dev/null +++ b/server/voice_changer/RVC/embedder/OnnxContentvec.py @@ -0,0 +1,13 @@ +import torch +from voice_changer.RVC.embedder.Embedder import Embedder + + +class OnnxContentvec(Embedder): + + def loadModel(self, file: str, dev: torch.device) -> Embedder: + raise Exception("Not implemented") + + def extractFeatures( + self, feats: torch.Tensor, embOutputLayer=9, useFinalProj=True + ) -> torch.Tensor: + raise Exception("Not implemented") diff --git a/server/voice_changer/RVC/pipeline/Pipeline.py b/server/voice_changer/RVC/pipeline/Pipeline.py index 0de9c6df..a10755a6 100644 --- a/server/voice_changer/RVC/pipeline/Pipeline.py +++ b/server/voice_changer/RVC/pipeline/Pipeline.py @@ -209,14 +209,13 @@ class Pipeline(object): # apply silent front for inference if type(self.inferencer) in [OnnxRVCInferencer, OnnxRVCInferencerNono]: npyOffset = math.floor(silence_front * 16000) // 360 - feats = feats[:, npyOffset * 2 :, :] + feats = feats[:, npyOffset * 2 :, :] # NOQA feats_len = feats.shape[1] if pitch is not None and pitchf is not None: pitch = pitch[:, -feats_len:] pitchf = pitchf[:, -feats_len:] p_len = torch.tensor([feats_len], device=self.device).long() - # 推論実行 try: