From fef588b00b04ca46dd49036a650b91e53fb54698 Mon Sep 17 00:00:00 2001 From: wataru Date: Sat, 20 May 2023 15:54:00 +0900 Subject: [PATCH] WIP: support RVCv2 --- server/MMVCServerSIO.py | 15 ++-- server/ModelSample.py | 30 ++++--- server/const.py | 7 +- server/{samples.json => samples_0001.json} | 61 ++++++++++---- server/samples_0002.json | 79 +++++++++++++++++++ .../voice_changer/RVC/ModelSlotGenerator.py | 25 ++++-- server/voice_changer/RVC/RVC.py | 15 +++- server/voice_changer/RVC/SampleDownloader.py | 8 +- .../RVC/inferencer/InferencerManager.py | 12 +++ .../RVC/inferencer/RVCInferencerNono.py | 2 +- .../RVC/inferencer/RVCInferencerv2.py | 36 +++++++++ .../RVC/inferencer/RVCInferencerv2Nono.py | 35 ++++++++ .../RVC/inferencer/WebUIInferencer.py | 2 +- .../RVC/inferencer/WebUIInferencerNono.py | 2 +- .../voice_changer/utils/VoiceChangerParams.py | 1 - 15 files changed, 276 insertions(+), 54 deletions(-) rename server/{samples.json => samples_0001.json} (82%) create mode 100644 server/samples_0002.json create mode 100644 server/voice_changer/RVC/inferencer/RVCInferencerv2.py create mode 100644 server/voice_changer/RVC/inferencer/RVCInferencerv2Nono.py diff --git a/server/MMVCServerSIO.py b/server/MMVCServerSIO.py index fb73c965..8f584023 100755 --- a/server/MMVCServerSIO.py +++ b/server/MMVCServerSIO.py @@ -23,7 +23,7 @@ from restapi.MMVC_Rest import MMVC_Rest from const import ( NATIVE_CLIENT_FILE_MAC, NATIVE_CLIENT_FILE_WIN, - SAMPLES_JSON, + SAMPLES_JSONS, SSL_KEY_DIR, ) import subprocess @@ -50,7 +50,6 @@ def setupArgParser(): help="generate self-signed certificate", ) - parser.add_argument("--samples", type=str, help="path to samples") parser.add_argument("--model_dir", type=str, help="path to model files") parser.add_argument( @@ -193,7 +192,6 @@ if __name__ == "MMVCServerSIO": mp.freeze_support() voiceChangerParams = VoiceChangerParams( model_dir=args.model_dir, - samples=args.samples, content_vec_500=args.content_vec_500, content_vec_500_onnx=args.content_vec_500_onnx, content_vec_500_onnx_on=args.content_vec_500_onnx_on, @@ -228,13 +226,16 @@ if __name__ == "__main__": os.makedirs(args.model_dir, exist_ok=True) try: - download_no_tqdm({"url": SAMPLES_JSON, "saveTo": args.samples, "position": 0}) + sampleJsons = [] + for url in SAMPLES_JSONS: + filename = os.path.basename(url) + download_no_tqdm({"url": url, "saveTo": filename, "position": 0}) + sampleJsons.append(filename) + if checkRvcModelExist(args.model_dir) is False: + downloadInitialSampleModels(sampleJsons, args.model_dir) except Exception as e: print("[Voice Changer] loading sample failed", e) - if checkRvcModelExist(args.model_dir) is False: - downloadInitialSampleModels(args.samples, args.model_dir) - PORT = args.p if os.getenv("EX_PORT"): diff --git a/server/ModelSample.py b/server/ModelSample.py index 10dec484..36e92a3d 100644 --- a/server/ModelSample.py +++ b/server/ModelSample.py @@ -17,22 +17,28 @@ class RVCModelSample: credit: str = "" description: str = "" + sampleRate: int = 48000 + modelType: str = "" + f0: bool = True -def getModelSamples(jsonPath: str, modelType: ModelType): + +def getModelSamples(jsonFiles: list[str], modelType: ModelType): try: - with open(jsonPath, "r", encoding="utf-8") as f: - jsonDict = json.load(f) + samples: list[RVCModelSample] = [] + for file in jsonFiles: + with open(file, "r", encoding="utf-8") as f: + jsonDict = json.load(f) - modelList = jsonDict[modelType] - if modelType == "RVC": - samples: list[RVCModelSample] = [] - for s in modelList: - modelSample = RVCModelSample(**s) - samples.append(modelSample) - return samples + modelList = jsonDict[modelType] + if modelType == "RVC": + for s in modelList: + modelSample = RVCModelSample(**s) + samples.append(modelSample) + + else: + raise RuntimeError(f"Unknown model type {modelType}") + return samples - else: - raise RuntimeError(f"Unknown model type {modelType}") except Exception as e: print("[Voice Changer] loading sample info error:", e) return None diff --git a/server/const.py b/server/const.py index d0603149..428b0439 100644 --- a/server/const.py +++ b/server/const.py @@ -76,6 +76,8 @@ class EnumEmbedderTypes(Enum): class EnumInferenceTypes(Enum): pyTorchRVC = "pyTorchRVC" pyTorchRVCNono = "pyTorchRVCNono" + pyTorchRVCv2 = "pyTorchRVCv2" + pyTorchRVCv2Nono = "pyTorchRVCv2Nono" pyTorchWebUI = "pyTorchWebUI" pyTorchWebUINono = "pyTorchWebUINono" onnxRVC = "onnxRVC" @@ -97,6 +99,9 @@ class ServerAudioDeviceTypes(Enum): audiooutput = "audiooutput" -SAMPLES_JSON = "https://huggingface.co/wok000/vcclient_model/raw/main/samples.json" +SAMPLES_JSONS = [ + "https://huggingface.co/wok000/vcclient_model/raw/main/samples_0001.json", + "https://huggingface.co/wok000/vcclient_model/raw/main/samples_0002.json", +] RVC_MODEL_DIRNAME = "rvc" RVC_MAX_SLOT_NUM = 5 diff --git a/server/samples.json b/server/samples_0001.json similarity index 82% rename from server/samples.json rename to server/samples_0001.json index 3503785a..d2e12edc 100644 --- a/server/samples.json +++ b/server/samples_0001.json @@ -10,7 +10,10 @@ "featureUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/kikoto_kurage_48k_256/total_fea.npy", "termsOfUseUrl": "https://huggingface.co/wok000/vcclient_model/raw/main/rvc/kikoto_kurage_48k_256/term_of_use.txt", "credit": "黄琴海月", - "description": "" + "description": "", + "sampleRate": 48000, + "modelType": "webui_v1", + "f0": true }, { "id": "KikotoMahiro", @@ -22,7 +25,10 @@ "featureUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/kikoto_mahiro_48k_256/total_fea.npy", "termsOfUseUrl": "https://huggingface.co/wok000/vcclient_model/raw/main/rvc/kikoto_mahiro_48k_256/term_of_use.txt", "credit": "黄琴まひろ", - "description": "" + "description": "", + "sampleRate": 48000, + "modelType": "webui_v1", + "f0": true }, { "id": "TokinaShigure", @@ -34,7 +40,10 @@ "featureUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/tokina_sigure/total_fea.npy", "termsOfUseUrl": "https://huggingface.co/wok000/vcclient_model/raw/main/rvc/tokina_sigure/term_of_use.txt", "credit": "刻鳴時雨", - "description": "https://huggingface.co/yasyune/Shigure_Tokina_RVC" + "description": "https://huggingface.co/yasyune/Shigure_Tokina_RVC", + "sampleRate": 40000, + "modelType": "webui_v1", + "f0": true }, { "id": "Amitaro", @@ -46,55 +55,70 @@ "featureUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/amitaro_contentvec_256/amitaro48k.0.big.npy", "termsOfUseUrl": "https://huggingface.co/wok000/vcclient_model/raw/main/rvc/amitaro_contentvec_256/term_of_use.txt", "credit": "あみたろ", - "description": "" + "description": "", + "sampleRate": 48000, + "modelType": "webui_v1", + "f0": true }, { - "id": "Amitaro_d", + "id": "Amitaro_768d", "lang": "ja-JP", "tag": "", - "name": "あみたろ(d)", + "name": "あみたろ(768d)", "modelUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/amitaro_hubertjp_768_default/amitaro_hubertjp_768_def-100.pth", "indexUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/amitaro_hubertjp_768_default/amitaro_hubertjp_768_def.0.index.bin", "featureUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/amitaro_hubertjp_768_default/amitaro_hubertjp_768_def.0.big.npy", "termsOfUseUrl": "https://huggingface.co/wok000/vcclient_model/raw/main/rvc/amitaro_hubertjp_768_default/term_of_use.txt", "credit": "あみたろ", - "description": "rinna hubertjpを使用した768次元埋め込み版。デフォルトの事前学習モデルを使用" + "description": "rinna hubertjpを使用した768次元埋め込み版。デフォルトの事前学習モデルを使用", + "sampleRate": 48000, + "modelType": "webui_v1", + "f0": true }, { - "id": "Amitaro_n", + "id": "Amitaro_768n", "lang": "ja-JP", "tag": "", - "name": "あみたろ(n)", + "name": "あみたろ(768n)", "modelUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/amitaro_hubertjp_768_nadare/amitaro_hubert_jp-100.pth", "indexUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/amitaro_hubertjp_768_nadare/amitaro_hubert_jp.0.index.bin", "featureUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/amitaro_hubertjp_768_nadare/amitaro_hubert_jp.0.big.npy", "termsOfUseUrl": "https://huggingface.co/wok000/vcclient_model/raw/main/rvc/amitaro_hubertjp_768_nadare/term_of_use.txt", "credit": "あみたろ", - "description": "rinna hubertjpを使用した768次元埋め込み版。nadare様作成の事前学習モデルを使用" + "description": "rinna hubertjpを使用した768次元埋め込み版。nadare様作成の事前学習モデルを使用", + "sampleRate": 48000, + "modelType": "webui_v1", + "f0": true }, { - "id": "Amitaro_t", + "id": "Amitaro_768t", "lang": "ja-JP", "tag": "", - "name": "あみたろ(t)", + "name": "あみたろ(768t)", "modelUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/amitaro_hubertjp_768_tylor/amitaro_hubertjp_768_tylor-100.pth", "indexUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/amitaro_hubertjp_768_tylor/amitaro_hubertjp_768_tylor.0.index.bin", "featureUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/amitaro_hubertjp_768_tylor/amitaro_hubertjp_768_tylor.0.big.npy", "termsOfUseUrl": "https://huggingface.co/wok000/vcclient_model/raw/main/rvc/amitaro_hubertjp_768_tylor/term_of_use.txt", "credit": "あみたろ", - "description": "rinna hubertjpを使用した768次元埋め込み版。tylor様作成の事前学習モデルを使用" + "description": "rinna hubertjpを使用した768次元埋め込み版。tylor様作成の事前学習モデルを使用", + "sampleRate": 48000, + "modelType": "webui_v1", + "f0": true }, { - "id": "Tsukuyomi-chan_d", + "id": "Tsukuyomi-chan_768d", "lang": "ja-JP", "tag": "", - "name": "つくよみちゃん(d)", + "name": "つくよみちゃん(768d)", "modelUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/tsukuyomi-chan_hubertjp_768_default/tsukuyomi_hubertjp_768_def-100.pth", "indexUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/tsukuyomi-chan_hubertjp_768_default/tsukuyomi_hubertjp_768_def.0.index.bin", "featureUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/tsukuyomi-chan_hubertjp_768_default/tsukuyomi_hubertjp_768_def.0.big.npy", "termsOfUseUrl": "https://huggingface.co/wok000/vcclient_model/raw/main/rvc/tsukuyomi-chan_hubertjp_768_default/term_of_use.txt", "credit": "つくよみちゃん", - "description": "rinna hubertjpを使用した768次元埋め込み版。デフォルトの事前学習モデルを使用" + "description": "rinna hubertjpを使用した768次元埋め込み版。デフォルトの事前学習モデルを使用", + "sampleRate": 48000, + "modelType": "webui_v1", + "f0": true }, { "id": "Tsukuyomi-chan", @@ -106,7 +130,10 @@ "featureUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/tsukuyomi_contentvec_256/tsukuyomi48k.0.big.npy", "termsOfUseUrl": "https://huggingface.co/wok000/vcclient_model/raw/main/rvc/tsukuyomi_contentvec_256/term_of_use.txt", "credit": "つくよみちゃん", - "description": "" + "description": "", + "sampleRate": 48000, + "modelType": "webui_v1", + "f0": true } ] } diff --git a/server/samples_0002.json b/server/samples_0002.json new file mode 100644 index 00000000..b469e883 --- /dev/null +++ b/server/samples_0002.json @@ -0,0 +1,79 @@ +{ + "RVC": [ + { + "id": "KikotoKurage_v2", + "lang": "ja-JP", + "tag": "", + "name": "黄琴海月_v2", + "modelUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/kikoto_kurage/kikoto_kurage_v2_40k_e100.pth", + "indexUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/kikoto_kurage/added_IVF5181_Flat_nprobe_1_v2.index.bin", + "featureUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/kikoto_kurage/total_fea.npy", + "termsOfUseUrl": "https://huggingface.co/wok000/vcclient_model/raw/main/rvc_v2_alpha/kikoto_kurage/terms_of_use.txt", + "credit": "黄琴海月", + "description": "", + "sampleRate": 40000, + "modelType": "rvc_v2", + "f0": true + }, + { + "id": "KikotoMahiro_v2", + "lang": "ja-JP", + "tag": "", + "name": "黄琴まひろ_v2", + "modelUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/kikoto_mahiro/kikoto_mahiro_v2_40k.pth", + "indexUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/kikoto_mahiro/added_IVF6881_Flat_nprobe_1_v2.index.bin", + "featureUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/kikoto_mahiro/total_fea.npy", + "termsOfUseUrl": "", + "credit": "黄琴まひろ", + "description": "", + "sampleRate": 40000, + "modelType": "rvc_v2", + "f0": true + }, + { + "id": "TokinaShigure_v2", + "lang": "ja-JP", + "tag": "", + "name": "刻鳴時雨_v2", + "modelUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/tokina_shigure/tokina_shigure_v2_40k_e100.pth", + "indexUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/tokina_shigure/added_IVF2736_Flat_nprobe_1_v2.index.bin", + "featureUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/tokina_shigure/total_fea.npy", + "termsOfUseUrl": "https://huggingface.co/wok000/vcclient_model/raw/main/rvc_v2_alpha/tokina_shigure/terms_of_use.txt", + "credit": "刻鳴時雨", + "description": "", + "sampleRate": 40000, + "modelType": "rvc_v2", + "f0": true + }, + { + "id": "Amitaro_v2", + "lang": "ja-JP", + "tag": "", + "name": "あみたろ_v2", + "modelUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/amitaro/amitaro_v2_40k_e100.pth", + "indexUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/amitaro/added_IVF3139_Flat_nprobe_1_v2.index.bin", + "featureUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/amitaro/total_fea.npy", + "termsOfUseUrl": "https://huggingface.co/wok000/vcclient_model/raw/main/rvc_v2_alpha/amitaro/terms_of_use.txt", + "credit": "あみたろ", + "description": "", + "sampleRate": 40000, + "modelType": "rvc_v2", + "f0": true + }, + { + "id": "Tsukuyomi-chan_v2", + "lang": "ja-JP", + "tag": "", + "name": "つくよみちゃん_v2", + "modelUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/tsukuyomi-chan/tsukuyomi_v2_40k_e100.pth", + "indexUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/tsukuyomi-chan/added_IVF7852_Flat_nprobe_1_v2.index.bin", + "featureUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/tsukuyomi-chan/total_fea.npy", + "termsOfUseUrl": "https://huggingface.co/wok000/vcclient_model/blob/main/rvc_v2_alpha/tsukuyomi-chan/terms_of_use.txt", + "credit": "つくよみちゃん", + "description": "", + "sampleRate": 40000, + "modelType": "rvc_v2", + "f0": true + } + ] +} diff --git a/server/voice_changer/RVC/ModelSlotGenerator.py b/server/voice_changer/RVC/ModelSlotGenerator.py index c9d5f021..1db9d426 100644 --- a/server/voice_changer/RVC/ModelSlotGenerator.py +++ b/server/voice_changer/RVC/ModelSlotGenerator.py @@ -56,13 +56,24 @@ def _setInfoByPytorch(slot: ModelSlot): config_len = len(cpt["config"]) if config_len == 18: slot.f0 = True if cpt["f0"] == 1 else False - slot.modelType = ( - EnumInferenceTypes.pyTorchRVC - if slot.f0 - else EnumInferenceTypes.pyTorchRVCNono - ) - slot.embChannels = 256 - slot.embedder = EnumEmbedderTypes.hubert + version = cpt.get("version", "v1") + if version == "v1": + slot.modelType = ( + EnumInferenceTypes.pyTorchRVC + if slot.f0 + else EnumInferenceTypes.pyTorchRVCNono + ) + slot.embChannels = 256 + slot.embedder = EnumEmbedderTypes.hubert + else: + slot.modelType = ( + EnumInferenceTypes.pyTorchRVCv2 + if slot.f0 + else EnumInferenceTypes.pyTorchRVCv2Nono + ) + slot.embChannels = 768 + slot.embedder = EnumEmbedderTypes.hubert + else: slot.f0 = True if cpt["f0"] == 1 else False slot.modelType = ( diff --git a/server/voice_changer/RVC/RVC.py b/server/voice_changer/RVC/RVC.py index 18c34b7d..6dbc2ea1 100644 --- a/server/voice_changer/RVC/RVC.py +++ b/server/voice_changer/RVC/RVC.py @@ -37,7 +37,7 @@ from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager from voice_changer.RVC.pipeline.Pipeline import Pipeline from Exceptions import NoModeLoadedException -from const import RVC_MAX_SLOT_NUM, RVC_MODEL_DIRNAME, UPLOAD_DIR +from const import RVC_MAX_SLOT_NUM, RVC_MODEL_DIRNAME, SAMPLES_JSONS, UPLOAD_DIR import shutil import json @@ -60,7 +60,7 @@ class RVC: audio_buffer: AudioInOut | None = None prevVol: float = 0 params: VoiceChangerParams - currentSlot: int = -1 + currentSlot: int = 0 needSwitch: bool = False def __init__(self, params: VoiceChangerParams): @@ -72,7 +72,11 @@ class RVC: self.loadSlots() print("RVC initialization: ", params) - sampleModels = getModelSamples(params.samples, "RVC") + sampleJsons: list[str] = [] + for url in SAMPLES_JSONS: + filename = os.path.basename(url) + sampleJsons.append(filename) + sampleModels = getModelSamples(sampleJsons, "RVC") if sampleModels is not None: self.settings.sampleModels = sampleModels @@ -122,7 +126,9 @@ class RVC: params["name"] = sampleInfo.name params["sampleId"] = sampleInfo.id params["termsOfUseUrl"] = sampleInfo.termsOfUseUrl - + params["sampleRate"] = sampleInfo.sampleRate + params["modelType"] = sampleInfo.modelType + params["f0"] = sampleInfo.f0 # メタデータを見て、永続化モデルフォルダに移動させる # その際に、メタデータのファイル格納場所も書き換える slotDir = os.path.join( @@ -220,6 +226,7 @@ class RVC: def prepareModel(self, slot: int): if slot < 0: + print("[Voice Changer] Prepare Model of slot skip:", slot) return self.get_info() modelSlot = self.settings.modelSlots[slot] diff --git a/server/voice_changer/RVC/SampleDownloader.py b/server/voice_changer/RVC/SampleDownloader.py index 57a2020d..309d89f0 100644 --- a/server/voice_changer/RVC/SampleDownloader.py +++ b/server/voice_changer/RVC/SampleDownloader.py @@ -14,14 +14,14 @@ def checkRvcModelExist(model_dir: str): return True -def downloadInitialSampleModels(sampleJson: str, model_dir: str): +def downloadInitialSampleModels(sampleJsons: list[str], model_dir: str): sampleModelIds = [ "KikotoMahiro", "TokinaShigure", "Amitaro", "Tsukuyomi-chan", ] - sampleModels = getModelSamples(sampleJson, "RVC") + sampleModels = getModelSamples(sampleJsons, "RVC") if sampleModels is None: return @@ -87,6 +87,10 @@ def downloadInitialSampleModels(sampleJson: str, model_dir: str): sampleParams["name"] = sample.name sampleParams["sampleId"] = sample.id sampleParams["termsOfUseUrl"] = sample.termsOfUseUrl + sampleParams["sampleRate"] = sample.sampleRate + sampleParams["modelType"] = sample.modelType + sampleParams["f0"] = sample.f0 + jsonFilePath = os.path.join(slotDir, "params.json") json.dump(sampleParams, open(jsonFilePath, "w")) slot_count += 1 diff --git a/server/voice_changer/RVC/inferencer/InferencerManager.py b/server/voice_changer/RVC/inferencer/InferencerManager.py index f59c69ee..94dc4cfe 100644 --- a/server/voice_changer/RVC/inferencer/InferencerManager.py +++ b/server/voice_changer/RVC/inferencer/InferencerManager.py @@ -6,6 +6,8 @@ from voice_changer.RVC.inferencer.OnnxRVCInferencer import OnnxRVCInferencer from voice_changer.RVC.inferencer.OnnxRVCInferencerNono import OnnxRVCInferencerNono from voice_changer.RVC.inferencer.RVCInferencer import RVCInferencer from voice_changer.RVC.inferencer.RVCInferencerNono import RVCInferencerNono +from voice_changer.RVC.inferencer.RVCInferencerv2 import RVCInferencerv2 +from voice_changer.RVC.inferencer.RVCInferencerv2Nono import RVCInferencerv2Nono from voice_changer.RVC.inferencer.WebUIInferencer import WebUIInferencer from voice_changer.RVC.inferencer.WebUIInferencerNono import WebUIInferencerNono @@ -34,6 +36,16 @@ class InferencerManager: or inferencerType == EnumInferenceTypes.pyTorchRVCNono.value ): return RVCInferencerNono().loadModel(file, dev, isHalf) + elif ( + inferencerType == EnumInferenceTypes.pyTorchRVCv2 + or inferencerType == EnumInferenceTypes.pyTorchRVCv2.value + ): + return RVCInferencerv2().loadModel(file, dev, isHalf) + elif ( + inferencerType == EnumInferenceTypes.pyTorchRVCv2Nono + or inferencerType == EnumInferenceTypes.pyTorchRVCv2Nono.value + ): + return RVCInferencerv2Nono().loadModel(file, dev, isHalf) elif ( inferencerType == EnumInferenceTypes.pyTorchWebUI or inferencerType == EnumInferenceTypes.pyTorchWebUI.value diff --git a/server/voice_changer/RVC/inferencer/RVCInferencerNono.py b/server/voice_changer/RVC/inferencer/RVCInferencerNono.py index f84d9c3b..f30d9531 100644 --- a/server/voice_changer/RVC/inferencer/RVCInferencerNono.py +++ b/server/voice_changer/RVC/inferencer/RVCInferencerNono.py @@ -10,7 +10,7 @@ from infer_pack.models import ( # type:ignore class RVCInferencerNono(Inferencer): def loadModel(self, file: str, dev: device, isHalf: bool = True): - super().setProps(EnumInferenceTypes.pyTorchRVC, file, dev, isHalf) + super().setProps(EnumInferenceTypes.pyTorchRVCNono, file, dev, isHalf) cpt = torch.load(file, map_location="cpu") model = SynthesizerTrnMs256NSFsid_nono(*cpt["config"], is_half=isHalf) diff --git a/server/voice_changer/RVC/inferencer/RVCInferencerv2.py b/server/voice_changer/RVC/inferencer/RVCInferencerv2.py new file mode 100644 index 00000000..38a10e77 --- /dev/null +++ b/server/voice_changer/RVC/inferencer/RVCInferencerv2.py @@ -0,0 +1,36 @@ +import torch +from torch import device + +from const import EnumInferenceTypes +from voice_changer.RVC.inferencer.Inferencer import Inferencer +from infer_pack.models import ( # type:ignore + SynthesizerTrnMs768NSFsid, +) + + +class RVCInferencerv2(Inferencer): + def loadModel(self, file: str, dev: device, isHalf: bool = True): + super().setProps(EnumInferenceTypes.pyTorchRVCv2, file, dev, isHalf) + print("load inf", file) + cpt = torch.load(file, map_location="cpu") + model = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=isHalf) + + model.eval() + model.load_state_dict(cpt["weight"], strict=False) + + model = model.to(dev) + if isHalf: + model = model.half() + + self.model = model + return self + + def infer( + self, + feats: torch.Tensor, + pitch_length: torch.Tensor, + pitch: torch.Tensor, + pitchf: torch.Tensor, + sid: torch.Tensor, + ) -> torch.Tensor: + return self.model.infer(feats, pitch_length, pitch, pitchf, sid) diff --git a/server/voice_changer/RVC/inferencer/RVCInferencerv2Nono.py b/server/voice_changer/RVC/inferencer/RVCInferencerv2Nono.py new file mode 100644 index 00000000..773ebe44 --- /dev/null +++ b/server/voice_changer/RVC/inferencer/RVCInferencerv2Nono.py @@ -0,0 +1,35 @@ +import torch +from torch import device + +from const import EnumInferenceTypes +from voice_changer.RVC.inferencer.Inferencer import Inferencer +from infer_pack.models import ( # type:ignore + SynthesizerTrnMs768NSFsid_nono, +) + + +class RVCInferencerv2Nono(Inferencer): + def loadModel(self, file: str, dev: device, isHalf: bool = True): + super().setProps(EnumInferenceTypes.pyTorchRVCv2Nono, file, dev, isHalf) + cpt = torch.load(file, map_location="cpu") + model = SynthesizerTrnMs768NSFsid_nono(*cpt["config"], is_half=isHalf) + + model.eval() + model.load_state_dict(cpt["weight"], strict=False) + + model = model.to(dev) + if isHalf: + model = model.half() + + self.model = model + return self + + def infer( + self, + feats: torch.Tensor, + pitch_length: torch.Tensor, + pitch: torch.Tensor | None, + pitchf: torch.Tensor | None, + sid: torch.Tensor, + ) -> torch.Tensor: + return self.model.infer(feats, pitch_length, sid) diff --git a/server/voice_changer/RVC/inferencer/WebUIInferencer.py b/server/voice_changer/RVC/inferencer/WebUIInferencer.py index a9d3b0a8..01fb5608 100644 --- a/server/voice_changer/RVC/inferencer/WebUIInferencer.py +++ b/server/voice_changer/RVC/inferencer/WebUIInferencer.py @@ -8,7 +8,7 @@ from .models import SynthesizerTrnMsNSFsid class WebUIInferencer(Inferencer): def loadModel(self, file: str, dev: device, isHalf: bool = True): - super().setProps(EnumInferenceTypes.pyTorchRVC, file, dev, isHalf) + super().setProps(EnumInferenceTypes.pyTorchWebUI, file, dev, isHalf) cpt = torch.load(file, map_location="cpu") model = SynthesizerTrnMsNSFsid(**cpt["params"], is_half=isHalf) diff --git a/server/voice_changer/RVC/inferencer/WebUIInferencerNono.py b/server/voice_changer/RVC/inferencer/WebUIInferencerNono.py index c17465cc..044a0fcf 100644 --- a/server/voice_changer/RVC/inferencer/WebUIInferencerNono.py +++ b/server/voice_changer/RVC/inferencer/WebUIInferencerNono.py @@ -8,7 +8,7 @@ from .models import SynthesizerTrnMsNSFsidNono class WebUIInferencerNono(Inferencer): def loadModel(self, file: str, dev: device, isHalf: bool = True): - super().setProps(EnumInferenceTypes.pyTorchRVC, file, dev, isHalf) + super().setProps(EnumInferenceTypes.pyTorchWebUINono, file, dev, isHalf) cpt = torch.load(file, map_location="cpu") model = SynthesizerTrnMsNSFsidNono(**cpt["params"], is_half=isHalf) diff --git a/server/voice_changer/utils/VoiceChangerParams.py b/server/voice_changer/utils/VoiceChangerParams.py index 77a2ead2..8b9e83a4 100644 --- a/server/voice_changer/utils/VoiceChangerParams.py +++ b/server/voice_changer/utils/VoiceChangerParams.py @@ -4,7 +4,6 @@ from dataclasses import dataclass @dataclass class VoiceChangerParams: model_dir: str - samples: str content_vec_500: str content_vec_500_onnx: str content_vec_500_onnx_on: bool