WIP:common sample

This commit is contained in:
wataru 2023-06-16 18:04:33 +09:00
parent 24b55116f2
commit 435699d387
7 changed files with 31 additions and 303 deletions

View File

@ -107,9 +107,9 @@ def getSampleJsonAndModelIds(mode: RVCSampleMode):
"https://huggingface.co/wok000/vcclient_model/raw/main/samples_0003_t2.json",
"https://huggingface.co/wok000/vcclient_model/raw/main/samples_0003_o2.json",
], [
("TokinaShigure_o", {"useIndex": True}),
("KikotoMahiro_o", {"useIndex": False}),
("Amitaro_o", {"useIndex": False}),
# ("TokinaShigure_o", {"useIndex": True}),
# ("KikotoMahiro_o", {"useIndex": False}),
# ("Amitaro_o", {"useIndex": False}),
("Tsukuyomi-chan_o", {"useIndex": False}),
]
elif mode == "testOfficial":

View File

@ -1,28 +0,0 @@
from const import EnumInferenceTypes, EnumEmbedderTypes
from dataclasses import dataclass
@dataclass
class ModelSlot:
modelFile: str = ""
indexFile: str = ""
defaultTune: int = 0
defaultIndexRatio: int = 1
defaultProtect: float = 0.5
isONNX: bool = False
modelType: str = EnumInferenceTypes.pyTorchRVC.value
samplingRate: int = -1
f0: bool = True
embChannels: int = 256
embOutputLayer: int = 9
useFinalProj: bool = True
deprecated: bool = False
embedder: str = EnumEmbedderTypes.hubert.value
name: str = ""
description: str = ""
credit: str = ""
termsOfUseUrl: str = ""
sampleId: str = ""
iconFile: str = ""

View File

@ -1,10 +1,11 @@
from const import EnumEmbedderTypes, EnumInferenceTypes
from voice_changer.RVC.ModelSlot import ModelSlot
import torch
import onnxruntime
import json
from data.ModelSlot import ModelSlot
def _setInfoByPytorch(slot: ModelSlot):
cpt = torch.load(slot.modelFile, map_location="cpu")
@ -15,22 +16,14 @@ def _setInfoByPytorch(slot: ModelSlot):
slot.f0 = True if cpt["f0"] == 1 else False
version = cpt.get("version", "v1")
if version is None or version == "v1":
slot.modelType = (
EnumInferenceTypes.pyTorchRVC.value
if slot.f0
else EnumInferenceTypes.pyTorchRVCNono.value
)
slot.modelType = EnumInferenceTypes.pyTorchRVC.value if slot.f0 else EnumInferenceTypes.pyTorchRVCNono.value
slot.embChannels = 256
slot.embOutputLayer = 9
slot.useFinalProj = True
slot.embedder = EnumEmbedderTypes.hubert.value
print("[Voice Changer] Official Model(pyTorch) : v1")
else:
slot.modelType = (
EnumInferenceTypes.pyTorchRVCv2.value
if slot.f0
else EnumInferenceTypes.pyTorchRVCv2Nono.value
)
slot.modelType = EnumInferenceTypes.pyTorchRVCv2.value if slot.f0 else EnumInferenceTypes.pyTorchRVCv2Nono.value
slot.embChannels = 768
slot.embOutputLayer = 12
slot.useFinalProj = False
@ -40,37 +33,21 @@ def _setInfoByPytorch(slot: ModelSlot):
else:
# DDPN RVC
slot.f0 = True if cpt["f0"] == 1 else False
slot.modelType = (
EnumInferenceTypes.pyTorchWebUI.value
if slot.f0
else EnumInferenceTypes.pyTorchWebUINono.value
)
slot.modelType = EnumInferenceTypes.pyTorchWebUI.value if slot.f0 else EnumInferenceTypes.pyTorchWebUINono.value
slot.embChannels = cpt["config"][17]
slot.embOutputLayer = (
cpt["embedder_output_layer"] if "embedder_output_layer" in cpt else 9
)
slot.embOutputLayer = cpt["embedder_output_layer"] if "embedder_output_layer" in cpt else 9
if slot.embChannels == 256:
slot.useFinalProj = True
else:
slot.useFinalProj = False
# DDPNモデルの情報を表示
if (
slot.embChannels == 256
and slot.embOutputLayer == 9
and slot.useFinalProj is True
):
if slot.embChannels == 256 and slot.embOutputLayer == 9 and slot.useFinalProj is True:
print("[Voice Changer] DDPN Model(pyTorch) : Official v1 like")
elif (
slot.embChannels == 768
and slot.embOutputLayer == 12
and slot.useFinalProj is False
):
elif slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False:
print("[Voice Changer] DDPN Model(pyTorch): Official v2 like")
else:
print(
f"[Voice Changer] DDPN Model(pyTorch): ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}"
)
print(f"[Voice Changer] DDPN Model(pyTorch): ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}")
slot.embedder = cpt["embedder_name"]
if slot.embedder.endswith("768"):
@ -91,9 +68,7 @@ def _setInfoByPytorch(slot: ModelSlot):
def _setInfoByONNX(slot: ModelSlot):
tmp_onnx_session = onnxruntime.InferenceSession(
slot.modelFile, providers=["CPUExecutionProvider"]
)
tmp_onnx_session = onnxruntime.InferenceSession(slot.modelFile, providers=["CPUExecutionProvider"])
modelmeta = tmp_onnx_session.get_modelmeta()
try:
metadata = json.loads(modelmeta.custom_metadata_map["metadata"])
@ -101,16 +76,8 @@ def _setInfoByONNX(slot: ModelSlot):
# slot.modelType = metadata["modelType"]
slot.embChannels = metadata["embChannels"]
slot.embOutputLayer = (
metadata["embOutputLayer"] if "embOutputLayer" in metadata else 9
)
slot.useFinalProj = (
metadata["useFinalProj"]
if "useFinalProj" in metadata
else True
if slot.embChannels == 256
else False
)
slot.embOutputLayer = metadata["embOutputLayer"] if "embOutputLayer" in metadata else 9
slot.useFinalProj = metadata["useFinalProj"] if "useFinalProj" in metadata else True if slot.embChannels == 256 else False
if slot.embChannels == 256:
slot.useFinalProj = True
@ -118,22 +85,12 @@ def _setInfoByONNX(slot: ModelSlot):
slot.useFinalProj = False
# ONNXモデルの情報を表示
if (
slot.embChannels == 256
and slot.embOutputLayer == 9
and slot.useFinalProj is True
):
if slot.embChannels == 256 and slot.embOutputLayer == 9 and slot.useFinalProj is True:
print("[Voice Changer] ONNX Model: Official v1 like")
elif (
slot.embChannels == 768
and slot.embOutputLayer == 12
and slot.useFinalProj is False
):
elif slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False:
print("[Voice Changer] ONNX Model: Official v2 like")
else:
print(
f"[Voice Changer] ONNX Model: ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}"
)
print(f"[Voice Changer] ONNX Model: ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}")
if "embedder" not in metadata:
slot.embedder = EnumEmbedderTypes.hubert.value
@ -149,11 +106,7 @@ def _setInfoByONNX(slot: ModelSlot):
# raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder")
slot.f0 = metadata["f0"]
slot.modelType = (
EnumInferenceTypes.onnxRVC.value
if slot.f0
else EnumInferenceTypes.onnxRVCNono.value
)
slot.modelType = EnumInferenceTypes.onnxRVC.value if slot.f0 else EnumInferenceTypes.onnxRVCNono.value
slot.samplingRate = metadata["samplingRate"]
slot.deprecated = False

View File

@ -5,10 +5,8 @@ from typing import cast
import numpy as np
import torch
import torchaudio
from data.ModelSlot import loadAllSlotInfo
from data.ModelSlot import RVCModelSlot, loadAllSlotInfo
from utils.downloader.SampleDownloader import getSampleInfos
from voice_changer.RVC.ModelSlot import ModelSlot
from voice_changer.RVC.SampleDownloader import downloadModelFiles
# avoiding parse arg error in RVC
@ -102,41 +100,16 @@ class RVC:
def loadModel(self, props: LoadModelParams):
target_slot_idx = props.slot
params = props.params
slotInfo: ModelSlot = ModelSlot()
slotInfo: RVCModelSlot = RVCModelSlot()
print("loadModel", params)
# サンプルが指定されたときはダウンロードしてメタデータをでっちあげる
if len(params["sampleId"]) > 0:
sampleId = params["sampleId"]
sampleInfo = self.getSampleInfo(sampleId)
useIndex = params["rvcIndexDownload"]
if sampleInfo is None:
print("[Voice Changer] sampleInfo is None")
return
modelPath, indexPath, iconPath = downloadModelFiles(sampleInfo, useIndex)
slotInfo.modelFile = modelPath
if indexPath is not None:
slotInfo.indexFile = indexPath
if iconPath is not None:
slotInfo.iconFile = iconPath
slotInfo.sampleId = sampleInfo.id
slotInfo.credit = sampleInfo.credit
slotInfo.description = sampleInfo.description
slotInfo.name = sampleInfo.name
slotInfo.termsOfUseUrl = sampleInfo.termsOfUseUrl
# slotInfo.samplingRate = sampleInfo.sampleRate
# slotInfo.modelType = sampleInfo.modelType
# slotInfo.f0 = sampleInfo.f0
else:
slotInfo.modelFile = params["files"]["rvcModel"]
slotInfo.indexFile = params["files"]["rvcIndex"] if "rvcIndex" in params["files"] else None
slotInfo.modelFile = params["files"]["rvcModel"]
slotInfo.indexFile = params["files"]["rvcIndex"] if "rvcIndex" in params["files"] else None
slotInfo.defaultTune = params["defaultTune"]
slotInfo.defaultIndexRatio = params["defaultIndexRatio"]
slotInfo.defaultProtect = params["defaultProtect"]
slotInfo.voiceChangerType = "RVC"
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
if slotInfo.isONNX:

View File

@ -1,164 +0,0 @@
from concurrent.futures import ThreadPoolExecutor
from dataclasses import asdict
import os
from const import RVC_MODEL_DIRNAME, TMP_DIR
from Downloader import download, download_no_tqdm
from ModelSample import RVCModelSample, getModelSamples
import json
from voice_changer.RVC.ModelSlot import ModelSlot
from voice_changer.RVC.ModelSlotGenerator import _setInfoByONNX, _setInfoByPytorch
def checkRvcModelExist(model_dir: str):
rvcModelDir = os.path.join(model_dir, RVC_MODEL_DIRNAME)
if not os.path.exists(rvcModelDir):
return False
return True
def downloadInitialSampleModels(sampleJsons: list[str], sampleModelIds: list[str], model_dir: str):
sampleModels = getModelSamples(sampleJsons, "RVC")
if sampleModels is None:
return
downloadParams = []
slot_count = 0
line_num = 0
for initSampleId in sampleModelIds:
# 初期サンプルをサーチ
match = False
for sample in sampleModels:
if sample.id == initSampleId[0]:
match = True
break
if match is False:
print(f"[Voice Changer] initiail sample not found. {initSampleId[0]}")
continue
# 検出されたら、、、
slotInfo: ModelSlot = ModelSlot()
# sampleParams: Any = {"files": {}}
slotDir = os.path.join(model_dir, RVC_MODEL_DIRNAME, str(slot_count))
os.makedirs(slotDir, exist_ok=True)
modelFilePath = os.path.join(
slotDir,
os.path.basename(sample.modelUrl),
)
downloadParams.append(
{
"url": sample.modelUrl,
"saveTo": modelFilePath,
"position": line_num,
}
)
slotInfo.modelFile = modelFilePath
line_num += 1
if initSampleId[1] is True and hasattr(sample, "indexUrl") and sample.indexUrl != "":
indexPath = os.path.join(
slotDir,
os.path.basename(sample.indexUrl),
)
downloadParams.append(
{
"url": sample.indexUrl,
"saveTo": indexPath,
"position": line_num,
}
)
slotInfo.indexFile = indexPath
line_num += 1
if hasattr(sample, "icon") and sample.icon != "":
iconPath = os.path.join(
slotDir,
os.path.basename(sample.icon),
)
downloadParams.append(
{
"url": sample.icon,
"saveTo": iconPath,
"position": line_num,
}
)
slotInfo.iconFile = iconPath
line_num += 1
slotInfo.sampleId = sample.id
slotInfo.credit = sample.credit
slotInfo.description = sample.description
slotInfo.name = sample.name
slotInfo.termsOfUseUrl = sample.termsOfUseUrl
slotInfo.defaultTune = 0
slotInfo.defaultIndexRatio = 0.0
slotInfo.defaultProtect = 0.5
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
# この時点ではまだファイルはダウンロードされていない
# if slotInfo.isONNX:
# _setInfoByONNX(slotInfo)
# else:
# _setInfoByPytorch(slotInfo)
json.dump(asdict(slotInfo), open(os.path.join(slotDir, "params.json"), "w"))
slot_count += 1
# ダウンロード
print("[Voice Changer] Downloading model files...")
with ThreadPoolExecutor() as pool:
pool.map(download, downloadParams)
# メタデータ作成
print("[Voice Changer] Generating metadata...")
for slotId in range(slot_count):
slotDir = os.path.join(model_dir, RVC_MODEL_DIRNAME, str(slotId))
jsonDict = json.load(open(os.path.join(slotDir, "params.json")))
slotInfo = ModelSlot(**jsonDict)
if slotInfo.isONNX:
_setInfoByONNX(slotInfo)
else:
_setInfoByPytorch(slotInfo)
json.dump(asdict(slotInfo), open(os.path.join(slotDir, "params.json"), "w"))
def downloadModelFiles(sampleInfo: RVCModelSample, useIndex: bool = True):
downloadParams = []
modelPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.modelUrl))
downloadParams.append(
{
"url": sampleInfo.modelUrl,
"saveTo": modelPath,
"position": 0,
}
)
indexPath = None
if useIndex is True and hasattr(sampleInfo, "indexUrl") and sampleInfo.indexUrl != "":
print("[Voice Changer] Download sample with index.")
indexPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.indexUrl))
downloadParams.append(
{
"url": sampleInfo.indexUrl,
"saveTo": indexPath,
"position": 1,
}
)
iconPath = None
if hasattr(sampleInfo, "icon") and sampleInfo.icon != "":
iconPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.icon))
downloadParams.append(
{
"url": sampleInfo.icon,
"saveTo": iconPath,
"position": 2,
}
)
print("[Voice Changer] Downloading model files...", end="")
with ThreadPoolExecutor() as pool:
pool.map(download_no_tqdm, downloadParams)
print("")
return modelPath, indexPath, iconPath

View File

@ -4,7 +4,7 @@ import torch
from onnxsim import simplify
import onnx
from const import TMP_DIR, EnumInferenceTypes
from voice_changer.RVC.ModelSlot import ModelSlot
from data.ModelSlot import ModelSlot
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
from voice_changer.RVC.onnxExporter.SynthesizerTrnMs256NSFsid_ONNX import (
SynthesizerTrnMs256NSFsid_ONNX,
@ -30,9 +30,7 @@ def export2onnx(gpu: int, modelSlot: ModelSlot):
modelFile = modelSlot.modelFile
output_file = os.path.splitext(os.path.basename(modelFile))[0] + ".onnx"
output_file_simple = (
os.path.splitext(os.path.basename(modelFile))[0] + "_simple.onnx"
)
output_file_simple = os.path.splitext(os.path.basename(modelFile))[0] + "_simple.onnx"
output_path = os.path.join(TMP_DIR, output_file)
output_path_simple = os.path.join(TMP_DIR, output_file_simple)
metadata = {
@ -52,9 +50,7 @@ def export2onnx(gpu: int, modelSlot: ModelSlot):
if gpuMomory > 0:
_export2onnx(modelFile, output_path, output_path_simple, True, metadata)
else:
print(
"[Voice Changer] Warning!!! onnx export with float32. maybe size is doubled."
)
print("[Voice Changer] Warning!!! onnx export with float32. maybe size is doubled.")
_export2onnx(modelFile, output_path, output_path_simple, False, metadata)
return output_file_simple

View File

@ -1,8 +1,8 @@
import os
import traceback
import faiss
from data.ModelSlot import RVCModelSlot
from voice_changer.RVC.ModelSlot import ModelSlot
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager
from voice_changer.RVC.inferencer.InferencerManager import InferencerManager
@ -10,15 +10,13 @@ from voice_changer.RVC.pipeline.Pipeline import Pipeline
from voice_changer.RVC.pitchExtractor.PitchExtractorManager import PitchExtractorManager
def createPipeline(modelSlot: ModelSlot, gpu: int, f0Detector: str):
def createPipeline(modelSlot: RVCModelSlot, gpu: int, f0Detector: str):
dev = DeviceManager.get_instance().getDevice(gpu)
half = DeviceManager.get_instance().halfPrecisionAvailable(gpu)
# Inferencer 生成
try:
inferencer = InferencerManager.getInferencer(
modelSlot.modelType, modelSlot.modelFile, gpu
)
inferencer = InferencerManager.getInferencer(modelSlot.modelType, modelSlot.modelFile, gpu)
except Exception as e:
print("[Voice Changer] exception! loading inferencer", e)
traceback.print_exc()
@ -54,7 +52,7 @@ def createPipeline(modelSlot: ModelSlot, gpu: int, f0Detector: str):
return pipeline
def _loadIndex(modelSlot: ModelSlot):
def _loadIndex(modelSlot: RVCModelSlot):
# Indexのロード
print("[Voice Changer] Loading index...")
# ファイル指定がない場合はNone