WIP:common sample

2025-02-02 16:23:58 +03:00 · 2023-06-16 18:04:33 +09:00 · 2023-06-16 18:04:33 +09:00 · 435699d387
commit 435699d387
parent 24b55116f2
7 changed files with 31 additions and 303 deletions
--- a/server/const.py
+++ b/server/const.py
@ -107,9 +107,9 @@ def getSampleJsonAndModelIds(mode: RVCSampleMode):
            "https://huggingface.co/wok000/vcclient_model/raw/main/samples_0003_t2.json",
            "https://huggingface.co/wok000/vcclient_model/raw/main/samples_0003_o2.json",
        ], [
-            ("TokinaShigure_o", {"useIndex": True}),
-            ("KikotoMahiro_o", {"useIndex": False}),
-            ("Amitaro_o", {"useIndex": False}),
+            # ("TokinaShigure_o", {"useIndex": True}),
+            # ("KikotoMahiro_o", {"useIndex": False}),
+            # ("Amitaro_o", {"useIndex": False}),
            ("Tsukuyomi-chan_o", {"useIndex": False}),
        ]
    elif mode == "testOfficial":
--- a/server/voice_changer/RVC/ModelSlot.py
+++ b/server/voice_changer/RVC/ModelSlot.py
@ -1,28 +0,0 @@
-from const import EnumInferenceTypes, EnumEmbedderTypes
-
-from dataclasses import dataclass
-
-
-@dataclass
-class ModelSlot:
-    modelFile: str = ""
-    indexFile: str = ""
-    defaultTune: int = 0
-    defaultIndexRatio: int = 1
-    defaultProtect: float = 0.5
-    isONNX: bool = False
-    modelType: str = EnumInferenceTypes.pyTorchRVC.value
-    samplingRate: int = -1
-    f0: bool = True
-    embChannels: int = 256
-    embOutputLayer: int = 9
-    useFinalProj: bool = True
-    deprecated: bool = False
-    embedder: str = EnumEmbedderTypes.hubert.value
-
-    name: str = ""
-    description: str = ""
-    credit: str = ""
-    termsOfUseUrl: str = ""
-    sampleId: str = ""
-    iconFile: str = ""
--- a/server/voice_changer/RVC/ModelSlotGenerator.py
+++ b/server/voice_changer/RVC/ModelSlotGenerator.py
@ -1,10 +1,11 @@
 from const import EnumEmbedderTypes, EnumInferenceTypes
-from voice_changer.RVC.ModelSlot import ModelSlot

 import torch
 import onnxruntime
 import json

+from data.ModelSlot import ModelSlot
+

 def _setInfoByPytorch(slot: ModelSlot):
    cpt = torch.load(slot.modelFile, map_location="cpu")
@ -15,22 +16,14 @@ def _setInfoByPytorch(slot: ModelSlot):
        slot.f0 = True if cpt["f0"] == 1 else False
        version = cpt.get("version", "v1")
        if version is None or version == "v1":
-            slot.modelType = (
-                EnumInferenceTypes.pyTorchRVC.value
-                if slot.f0
-                else EnumInferenceTypes.pyTorchRVCNono.value
-            )
+            slot.modelType = EnumInferenceTypes.pyTorchRVC.value if slot.f0 else EnumInferenceTypes.pyTorchRVCNono.value
            slot.embChannels = 256
            slot.embOutputLayer = 9
            slot.useFinalProj = True
            slot.embedder = EnumEmbedderTypes.hubert.value
            print("[Voice Changer] Official Model(pyTorch) : v1")
        else:
-            slot.modelType = (
-                EnumInferenceTypes.pyTorchRVCv2.value
-                if slot.f0
-                else EnumInferenceTypes.pyTorchRVCv2Nono.value
-            )
+            slot.modelType = EnumInferenceTypes.pyTorchRVCv2.value if slot.f0 else EnumInferenceTypes.pyTorchRVCv2Nono.value
            slot.embChannels = 768
            slot.embOutputLayer = 12
            slot.useFinalProj = False
@ -40,37 +33,21 @@ def _setInfoByPytorch(slot: ModelSlot):
    else:
        # DDPN RVC
        slot.f0 = True if cpt["f0"] == 1 else False
-        slot.modelType = (
-            EnumInferenceTypes.pyTorchWebUI.value
-            if slot.f0
-            else EnumInferenceTypes.pyTorchWebUINono.value
-        )
+        slot.modelType = EnumInferenceTypes.pyTorchWebUI.value if slot.f0 else EnumInferenceTypes.pyTorchWebUINono.value
        slot.embChannels = cpt["config"][17]
-        slot.embOutputLayer = (
-            cpt["embedder_output_layer"] if "embedder_output_layer" in cpt else 9
-        )
+        slot.embOutputLayer = cpt["embedder_output_layer"] if "embedder_output_layer" in cpt else 9
        if slot.embChannels == 256:
            slot.useFinalProj = True
        else:
            slot.useFinalProj = False

        # DDPNモデルの情報を表示
-        if (
-            slot.embChannels == 256
-            and slot.embOutputLayer == 9
-            and slot.useFinalProj is True
-        ):
+        if slot.embChannels == 256 and slot.embOutputLayer == 9 and slot.useFinalProj is True:
            print("[Voice Changer] DDPN Model(pyTorch) : Official v1 like")
-        elif (
-            slot.embChannels == 768
-            and slot.embOutputLayer == 12
-            and slot.useFinalProj is False
-        ):
+        elif slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False:
            print("[Voice Changer] DDPN Model(pyTorch): Official v2 like")
        else:
-            print(
-                f"[Voice Changer] DDPN Model(pyTorch): ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}"
-            )
+            print(f"[Voice Changer] DDPN Model(pyTorch): ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}")

        slot.embedder = cpt["embedder_name"]
        if slot.embedder.endswith("768"):
@ -91,9 +68,7 @@ def _setInfoByPytorch(slot: ModelSlot):


 def _setInfoByONNX(slot: ModelSlot):
-    tmp_onnx_session = onnxruntime.InferenceSession(
-        slot.modelFile, providers=["CPUExecutionProvider"]
-    )
+    tmp_onnx_session = onnxruntime.InferenceSession(slot.modelFile, providers=["CPUExecutionProvider"])
    modelmeta = tmp_onnx_session.get_modelmeta()
    try:
        metadata = json.loads(modelmeta.custom_metadata_map["metadata"])
@ -101,16 +76,8 @@ def _setInfoByONNX(slot: ModelSlot):
        # slot.modelType = metadata["modelType"]
        slot.embChannels = metadata["embChannels"]

-        slot.embOutputLayer = (
-            metadata["embOutputLayer"] if "embOutputLayer" in metadata else 9
-        )
-        slot.useFinalProj = (
-            metadata["useFinalProj"]
-            if "useFinalProj" in metadata
-            else True
-            if slot.embChannels == 256
-            else False
-        )
+        slot.embOutputLayer = metadata["embOutputLayer"] if "embOutputLayer" in metadata else 9
+        slot.useFinalProj = metadata["useFinalProj"] if "useFinalProj" in metadata else True if slot.embChannels == 256 else False

        if slot.embChannels == 256:
            slot.useFinalProj = True
@ -118,22 +85,12 @@ def _setInfoByONNX(slot: ModelSlot):
            slot.useFinalProj = False

        # ONNXモデルの情報を表示
-        if (
-            slot.embChannels == 256
-            and slot.embOutputLayer == 9
-            and slot.useFinalProj is True
-        ):
+        if slot.embChannels == 256 and slot.embOutputLayer == 9 and slot.useFinalProj is True:
            print("[Voice Changer] ONNX Model: Official v1 like")
-        elif (
-            slot.embChannels == 768
-            and slot.embOutputLayer == 12
-            and slot.useFinalProj is False
-        ):
+        elif slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False:
            print("[Voice Changer] ONNX Model: Official v2 like")
        else:
-            print(
-                f"[Voice Changer] ONNX Model: ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}"
-            )
+            print(f"[Voice Changer] ONNX Model: ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}")

        if "embedder" not in metadata:
            slot.embedder = EnumEmbedderTypes.hubert.value
@ -149,11 +106,7 @@ def _setInfoByONNX(slot: ModelSlot):
        #     raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder")

        slot.f0 = metadata["f0"]
-        slot.modelType = (
-            EnumInferenceTypes.onnxRVC.value
-            if slot.f0
-            else EnumInferenceTypes.onnxRVCNono.value
-        )
+        slot.modelType = EnumInferenceTypes.onnxRVC.value if slot.f0 else EnumInferenceTypes.onnxRVCNono.value
        slot.samplingRate = metadata["samplingRate"]
        slot.deprecated = False

--- a/server/voice_changer/RVC/RVC.py
+++ b/server/voice_changer/RVC/RVC.py
@ -5,10 +5,8 @@ from typing import cast
 import numpy as np
 import torch
 import torchaudio
-from data.ModelSlot import loadAllSlotInfo
+from data.ModelSlot import RVCModelSlot, loadAllSlotInfo
 from utils.downloader.SampleDownloader import getSampleInfos
-from voice_changer.RVC.ModelSlot import ModelSlot
-from voice_changer.RVC.SampleDownloader import downloadModelFiles


 # avoiding parse arg error in RVC
@ -102,41 +100,16 @@ class RVC:
    def loadModel(self, props: LoadModelParams):
        target_slot_idx = props.slot
        params = props.params
-        slotInfo: ModelSlot = ModelSlot()
+        slotInfo: RVCModelSlot = RVCModelSlot()

        print("loadModel", params)
-        # サンプルが指定されたときはダウンロードしてメタデータをでっちあげる
-        if len(params["sampleId"]) > 0:
-            sampleId = params["sampleId"]
-            sampleInfo = self.getSampleInfo(sampleId)
-            useIndex = params["rvcIndexDownload"]
-
-            if sampleInfo is None:
-                print("[Voice Changer] sampleInfo is None")
-                return
-            modelPath, indexPath, iconPath = downloadModelFiles(sampleInfo, useIndex)
-            slotInfo.modelFile = modelPath
-            if indexPath is not None:
-                slotInfo.indexFile = indexPath
-            if iconPath is not None:
-                slotInfo.iconFile = iconPath
-
-            slotInfo.sampleId = sampleInfo.id
-            slotInfo.credit = sampleInfo.credit
-            slotInfo.description = sampleInfo.description
-            slotInfo.name = sampleInfo.name
-            slotInfo.termsOfUseUrl = sampleInfo.termsOfUseUrl
-
-            # slotInfo.samplingRate = sampleInfo.sampleRate
-            # slotInfo.modelType = sampleInfo.modelType
-            # slotInfo.f0 = sampleInfo.f0
-        else:
-            slotInfo.modelFile = params["files"]["rvcModel"]
-            slotInfo.indexFile = params["files"]["rvcIndex"] if "rvcIndex" in params["files"] else None
+        slotInfo.modelFile = params["files"]["rvcModel"]
+        slotInfo.indexFile = params["files"]["rvcIndex"] if "rvcIndex" in params["files"] else None

        slotInfo.defaultTune = params["defaultTune"]
        slotInfo.defaultIndexRatio = params["defaultIndexRatio"]
        slotInfo.defaultProtect = params["defaultProtect"]
+        slotInfo.voiceChangerType = "RVC"
        slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")

        if slotInfo.isONNX:
--- a/server/voice_changer/RVC/SampleDownloader.py
+++ b/server/voice_changer/RVC/SampleDownloader.py
@ -1,164 +0,0 @@
-from concurrent.futures import ThreadPoolExecutor
-from dataclasses import asdict
-import os
-from const import RVC_MODEL_DIRNAME, TMP_DIR
-from Downloader import download, download_no_tqdm
-from ModelSample import RVCModelSample, getModelSamples
-import json
-
-from voice_changer.RVC.ModelSlot import ModelSlot
-from voice_changer.RVC.ModelSlotGenerator import _setInfoByONNX, _setInfoByPytorch
-
-
-def checkRvcModelExist(model_dir: str):
-    rvcModelDir = os.path.join(model_dir, RVC_MODEL_DIRNAME)
-    if not os.path.exists(rvcModelDir):
-        return False
-    return True
-
-
-def downloadInitialSampleModels(sampleJsons: list[str], sampleModelIds: list[str], model_dir: str):
-    sampleModels = getModelSamples(sampleJsons, "RVC")
-    if sampleModels is None:
-        return
-
-    downloadParams = []
-    slot_count = 0
-    line_num = 0
-    for initSampleId in sampleModelIds:
-        # 初期サンプルをサーチ
-        match = False
-        for sample in sampleModels:
-            if sample.id == initSampleId[0]:
-                match = True
-                break
-        if match is False:
-            print(f"[Voice Changer] initiail sample not found. {initSampleId[0]}")
-            continue
-
-        # 検出されたら、、、
-        slotInfo: ModelSlot = ModelSlot()
-        # sampleParams: Any = {"files": {}}
-
-        slotDir = os.path.join(model_dir, RVC_MODEL_DIRNAME, str(slot_count))
-        os.makedirs(slotDir, exist_ok=True)
-        modelFilePath = os.path.join(
-            slotDir,
-            os.path.basename(sample.modelUrl),
-        )
-        downloadParams.append(
-            {
-                "url": sample.modelUrl,
-                "saveTo": modelFilePath,
-                "position": line_num,
-            }
-        )
-        slotInfo.modelFile = modelFilePath
-        line_num += 1
-
-        if initSampleId[1] is True and hasattr(sample, "indexUrl") and sample.indexUrl != "":
-            indexPath = os.path.join(
-                slotDir,
-                os.path.basename(sample.indexUrl),
-            )
-            downloadParams.append(
-                {
-                    "url": sample.indexUrl,
-                    "saveTo": indexPath,
-                    "position": line_num,
-                }
-            )
-            slotInfo.indexFile = indexPath
-            line_num += 1
-        if hasattr(sample, "icon") and sample.icon != "":
-            iconPath = os.path.join(
-                slotDir,
-                os.path.basename(sample.icon),
-            )
-            downloadParams.append(
-                {
-                    "url": sample.icon,
-                    "saveTo": iconPath,
-                    "position": line_num,
-                }
-            )
-            slotInfo.iconFile = iconPath
-            line_num += 1
-
-        slotInfo.sampleId = sample.id
-        slotInfo.credit = sample.credit
-        slotInfo.description = sample.description
-        slotInfo.name = sample.name
-        slotInfo.termsOfUseUrl = sample.termsOfUseUrl
-        slotInfo.defaultTune = 0
-        slotInfo.defaultIndexRatio = 0.0
-        slotInfo.defaultProtect = 0.5
-        slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
-
-        # この時点ではまだファイルはダウンロードされていない
-        # if slotInfo.isONNX:
-        #     _setInfoByONNX(slotInfo)
-        # else:
-        #     _setInfoByPytorch(slotInfo)
-
-        json.dump(asdict(slotInfo), open(os.path.join(slotDir, "params.json"), "w"))
-        slot_count += 1
-
-    # ダウンロード
-    print("[Voice Changer] Downloading model files...")
-    with ThreadPoolExecutor() as pool:
-        pool.map(download, downloadParams)
-
-    # メタデータ作成
-    print("[Voice Changer] Generating metadata...")
-    for slotId in range(slot_count):
-        slotDir = os.path.join(model_dir, RVC_MODEL_DIRNAME, str(slotId))
-        jsonDict = json.load(open(os.path.join(slotDir, "params.json")))
-        slotInfo = ModelSlot(**jsonDict)
-        if slotInfo.isONNX:
-            _setInfoByONNX(slotInfo)
-        else:
-            _setInfoByPytorch(slotInfo)
-        json.dump(asdict(slotInfo), open(os.path.join(slotDir, "params.json"), "w"))
-
-
-def downloadModelFiles(sampleInfo: RVCModelSample, useIndex: bool = True):
-    downloadParams = []
-
-    modelPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.modelUrl))
-    downloadParams.append(
-        {
-            "url": sampleInfo.modelUrl,
-            "saveTo": modelPath,
-            "position": 0,
-        }
-    )
-
-    indexPath = None
-    if useIndex is True and hasattr(sampleInfo, "indexUrl") and sampleInfo.indexUrl != "":
-        print("[Voice Changer] Download sample with index.")
-        indexPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.indexUrl))
-        downloadParams.append(
-            {
-                "url": sampleInfo.indexUrl,
-                "saveTo": indexPath,
-                "position": 1,
-            }
-        )
-
-    iconPath = None
-    if hasattr(sampleInfo, "icon") and sampleInfo.icon != "":
-        iconPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.icon))
-        downloadParams.append(
-            {
-                "url": sampleInfo.icon,
-                "saveTo": iconPath,
-                "position": 2,
-            }
-        )
-
-    print("[Voice Changer] Downloading model files...", end="")
-    with ThreadPoolExecutor() as pool:
-        pool.map(download_no_tqdm, downloadParams)
-    print("")
-    return modelPath, indexPath, iconPath
--- a/server/voice_changer/RVC/onnxExporter/export2onnx.py
+++ b/server/voice_changer/RVC/onnxExporter/export2onnx.py
@ -4,7 +4,7 @@ import torch
 from onnxsim import simplify
 import onnx
 from const import TMP_DIR, EnumInferenceTypes
-from voice_changer.RVC.ModelSlot import ModelSlot
+from data.ModelSlot import ModelSlot
 from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
 from voice_changer.RVC.onnxExporter.SynthesizerTrnMs256NSFsid_ONNX import (
    SynthesizerTrnMs256NSFsid_ONNX,
@ -30,9 +30,7 @@ def export2onnx(gpu: int, modelSlot: ModelSlot):
    modelFile = modelSlot.modelFile

    output_file = os.path.splitext(os.path.basename(modelFile))[0] + ".onnx"
-    output_file_simple = (
-        os.path.splitext(os.path.basename(modelFile))[0] + "_simple.onnx"
-    )
+    output_file_simple = os.path.splitext(os.path.basename(modelFile))[0] + "_simple.onnx"
    output_path = os.path.join(TMP_DIR, output_file)
    output_path_simple = os.path.join(TMP_DIR, output_file_simple)
    metadata = {
@ -52,9 +50,7 @@ def export2onnx(gpu: int, modelSlot: ModelSlot):
    if gpuMomory > 0:
        _export2onnx(modelFile, output_path, output_path_simple, True, metadata)
    else:
-        print(
-            "[Voice Changer] Warning!!! onnx export with float32. maybe size is doubled."
-        )
+        print("[Voice Changer] Warning!!! onnx export with float32. maybe size is doubled.")
        _export2onnx(modelFile, output_path, output_path_simple, False, metadata)
    return output_file_simple

--- a/server/voice_changer/RVC/pipeline/PipelineGenerator.py
+++ b/server/voice_changer/RVC/pipeline/PipelineGenerator.py
@ -1,8 +1,8 @@
 import os
 import traceback
 import faiss
+from data.ModelSlot import RVCModelSlot

-from voice_changer.RVC.ModelSlot import ModelSlot
 from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
 from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager
 from voice_changer.RVC.inferencer.InferencerManager import InferencerManager
@ -10,15 +10,13 @@ from voice_changer.RVC.pipeline.Pipeline import Pipeline
 from voice_changer.RVC.pitchExtractor.PitchExtractorManager import PitchExtractorManager


-def createPipeline(modelSlot: ModelSlot, gpu: int, f0Detector: str):
+def createPipeline(modelSlot: RVCModelSlot, gpu: int, f0Detector: str):
    dev = DeviceManager.get_instance().getDevice(gpu)
    half = DeviceManager.get_instance().halfPrecisionAvailable(gpu)

    # Inferencer 生成
    try:
-        inferencer = InferencerManager.getInferencer(
-            modelSlot.modelType, modelSlot.modelFile, gpu
-        )
+        inferencer = InferencerManager.getInferencer(modelSlot.modelType, modelSlot.modelFile, gpu)
    except Exception as e:
        print("[Voice Changer] exception! loading inferencer", e)
        traceback.print_exc()
@ -54,7 +52,7 @@ def createPipeline(modelSlot: ModelSlot, gpu: int, f0Detector: str):
    return pipeline


-def _loadIndex(modelSlot: ModelSlot):
+def _loadIndex(modelSlot: RVCModelSlot):
    # Indexのロード
    print("[Voice Changer] Loading index...")
    # ファイル指定がない場合はNone