mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-02-02 16:23:58 +03:00
WIP:common sample
This commit is contained in:
parent
24b55116f2
commit
435699d387
@ -107,9 +107,9 @@ def getSampleJsonAndModelIds(mode: RVCSampleMode):
|
||||
"https://huggingface.co/wok000/vcclient_model/raw/main/samples_0003_t2.json",
|
||||
"https://huggingface.co/wok000/vcclient_model/raw/main/samples_0003_o2.json",
|
||||
], [
|
||||
("TokinaShigure_o", {"useIndex": True}),
|
||||
("KikotoMahiro_o", {"useIndex": False}),
|
||||
("Amitaro_o", {"useIndex": False}),
|
||||
# ("TokinaShigure_o", {"useIndex": True}),
|
||||
# ("KikotoMahiro_o", {"useIndex": False}),
|
||||
# ("Amitaro_o", {"useIndex": False}),
|
||||
("Tsukuyomi-chan_o", {"useIndex": False}),
|
||||
]
|
||||
elif mode == "testOfficial":
|
||||
|
@ -1,28 +0,0 @@
|
||||
from const import EnumInferenceTypes, EnumEmbedderTypes
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class ModelSlot:
|
||||
modelFile: str = ""
|
||||
indexFile: str = ""
|
||||
defaultTune: int = 0
|
||||
defaultIndexRatio: int = 1
|
||||
defaultProtect: float = 0.5
|
||||
isONNX: bool = False
|
||||
modelType: str = EnumInferenceTypes.pyTorchRVC.value
|
||||
samplingRate: int = -1
|
||||
f0: bool = True
|
||||
embChannels: int = 256
|
||||
embOutputLayer: int = 9
|
||||
useFinalProj: bool = True
|
||||
deprecated: bool = False
|
||||
embedder: str = EnumEmbedderTypes.hubert.value
|
||||
|
||||
name: str = ""
|
||||
description: str = ""
|
||||
credit: str = ""
|
||||
termsOfUseUrl: str = ""
|
||||
sampleId: str = ""
|
||||
iconFile: str = ""
|
@ -1,10 +1,11 @@
|
||||
from const import EnumEmbedderTypes, EnumInferenceTypes
|
||||
from voice_changer.RVC.ModelSlot import ModelSlot
|
||||
|
||||
import torch
|
||||
import onnxruntime
|
||||
import json
|
||||
|
||||
from data.ModelSlot import ModelSlot
|
||||
|
||||
|
||||
def _setInfoByPytorch(slot: ModelSlot):
|
||||
cpt = torch.load(slot.modelFile, map_location="cpu")
|
||||
@ -15,22 +16,14 @@ def _setInfoByPytorch(slot: ModelSlot):
|
||||
slot.f0 = True if cpt["f0"] == 1 else False
|
||||
version = cpt.get("version", "v1")
|
||||
if version is None or version == "v1":
|
||||
slot.modelType = (
|
||||
EnumInferenceTypes.pyTorchRVC.value
|
||||
if slot.f0
|
||||
else EnumInferenceTypes.pyTorchRVCNono.value
|
||||
)
|
||||
slot.modelType = EnumInferenceTypes.pyTorchRVC.value if slot.f0 else EnumInferenceTypes.pyTorchRVCNono.value
|
||||
slot.embChannels = 256
|
||||
slot.embOutputLayer = 9
|
||||
slot.useFinalProj = True
|
||||
slot.embedder = EnumEmbedderTypes.hubert.value
|
||||
print("[Voice Changer] Official Model(pyTorch) : v1")
|
||||
else:
|
||||
slot.modelType = (
|
||||
EnumInferenceTypes.pyTorchRVCv2.value
|
||||
if slot.f0
|
||||
else EnumInferenceTypes.pyTorchRVCv2Nono.value
|
||||
)
|
||||
slot.modelType = EnumInferenceTypes.pyTorchRVCv2.value if slot.f0 else EnumInferenceTypes.pyTorchRVCv2Nono.value
|
||||
slot.embChannels = 768
|
||||
slot.embOutputLayer = 12
|
||||
slot.useFinalProj = False
|
||||
@ -40,37 +33,21 @@ def _setInfoByPytorch(slot: ModelSlot):
|
||||
else:
|
||||
# DDPN RVC
|
||||
slot.f0 = True if cpt["f0"] == 1 else False
|
||||
slot.modelType = (
|
||||
EnumInferenceTypes.pyTorchWebUI.value
|
||||
if slot.f0
|
||||
else EnumInferenceTypes.pyTorchWebUINono.value
|
||||
)
|
||||
slot.modelType = EnumInferenceTypes.pyTorchWebUI.value if slot.f0 else EnumInferenceTypes.pyTorchWebUINono.value
|
||||
slot.embChannels = cpt["config"][17]
|
||||
slot.embOutputLayer = (
|
||||
cpt["embedder_output_layer"] if "embedder_output_layer" in cpt else 9
|
||||
)
|
||||
slot.embOutputLayer = cpt["embedder_output_layer"] if "embedder_output_layer" in cpt else 9
|
||||
if slot.embChannels == 256:
|
||||
slot.useFinalProj = True
|
||||
else:
|
||||
slot.useFinalProj = False
|
||||
|
||||
# DDPNモデルの情報を表示
|
||||
if (
|
||||
slot.embChannels == 256
|
||||
and slot.embOutputLayer == 9
|
||||
and slot.useFinalProj is True
|
||||
):
|
||||
if slot.embChannels == 256 and slot.embOutputLayer == 9 and slot.useFinalProj is True:
|
||||
print("[Voice Changer] DDPN Model(pyTorch) : Official v1 like")
|
||||
elif (
|
||||
slot.embChannels == 768
|
||||
and slot.embOutputLayer == 12
|
||||
and slot.useFinalProj is False
|
||||
):
|
||||
elif slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False:
|
||||
print("[Voice Changer] DDPN Model(pyTorch): Official v2 like")
|
||||
else:
|
||||
print(
|
||||
f"[Voice Changer] DDPN Model(pyTorch): ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}"
|
||||
)
|
||||
print(f"[Voice Changer] DDPN Model(pyTorch): ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}")
|
||||
|
||||
slot.embedder = cpt["embedder_name"]
|
||||
if slot.embedder.endswith("768"):
|
||||
@ -91,9 +68,7 @@ def _setInfoByPytorch(slot: ModelSlot):
|
||||
|
||||
|
||||
def _setInfoByONNX(slot: ModelSlot):
|
||||
tmp_onnx_session = onnxruntime.InferenceSession(
|
||||
slot.modelFile, providers=["CPUExecutionProvider"]
|
||||
)
|
||||
tmp_onnx_session = onnxruntime.InferenceSession(slot.modelFile, providers=["CPUExecutionProvider"])
|
||||
modelmeta = tmp_onnx_session.get_modelmeta()
|
||||
try:
|
||||
metadata = json.loads(modelmeta.custom_metadata_map["metadata"])
|
||||
@ -101,16 +76,8 @@ def _setInfoByONNX(slot: ModelSlot):
|
||||
# slot.modelType = metadata["modelType"]
|
||||
slot.embChannels = metadata["embChannels"]
|
||||
|
||||
slot.embOutputLayer = (
|
||||
metadata["embOutputLayer"] if "embOutputLayer" in metadata else 9
|
||||
)
|
||||
slot.useFinalProj = (
|
||||
metadata["useFinalProj"]
|
||||
if "useFinalProj" in metadata
|
||||
else True
|
||||
if slot.embChannels == 256
|
||||
else False
|
||||
)
|
||||
slot.embOutputLayer = metadata["embOutputLayer"] if "embOutputLayer" in metadata else 9
|
||||
slot.useFinalProj = metadata["useFinalProj"] if "useFinalProj" in metadata else True if slot.embChannels == 256 else False
|
||||
|
||||
if slot.embChannels == 256:
|
||||
slot.useFinalProj = True
|
||||
@ -118,22 +85,12 @@ def _setInfoByONNX(slot: ModelSlot):
|
||||
slot.useFinalProj = False
|
||||
|
||||
# ONNXモデルの情報を表示
|
||||
if (
|
||||
slot.embChannels == 256
|
||||
and slot.embOutputLayer == 9
|
||||
and slot.useFinalProj is True
|
||||
):
|
||||
if slot.embChannels == 256 and slot.embOutputLayer == 9 and slot.useFinalProj is True:
|
||||
print("[Voice Changer] ONNX Model: Official v1 like")
|
||||
elif (
|
||||
slot.embChannels == 768
|
||||
and slot.embOutputLayer == 12
|
||||
and slot.useFinalProj is False
|
||||
):
|
||||
elif slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False:
|
||||
print("[Voice Changer] ONNX Model: Official v2 like")
|
||||
else:
|
||||
print(
|
||||
f"[Voice Changer] ONNX Model: ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}"
|
||||
)
|
||||
print(f"[Voice Changer] ONNX Model: ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}")
|
||||
|
||||
if "embedder" not in metadata:
|
||||
slot.embedder = EnumEmbedderTypes.hubert.value
|
||||
@ -149,11 +106,7 @@ def _setInfoByONNX(slot: ModelSlot):
|
||||
# raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder")
|
||||
|
||||
slot.f0 = metadata["f0"]
|
||||
slot.modelType = (
|
||||
EnumInferenceTypes.onnxRVC.value
|
||||
if slot.f0
|
||||
else EnumInferenceTypes.onnxRVCNono.value
|
||||
)
|
||||
slot.modelType = EnumInferenceTypes.onnxRVC.value if slot.f0 else EnumInferenceTypes.onnxRVCNono.value
|
||||
slot.samplingRate = metadata["samplingRate"]
|
||||
slot.deprecated = False
|
||||
|
||||
|
@ -5,10 +5,8 @@ from typing import cast
|
||||
import numpy as np
|
||||
import torch
|
||||
import torchaudio
|
||||
from data.ModelSlot import loadAllSlotInfo
|
||||
from data.ModelSlot import RVCModelSlot, loadAllSlotInfo
|
||||
from utils.downloader.SampleDownloader import getSampleInfos
|
||||
from voice_changer.RVC.ModelSlot import ModelSlot
|
||||
from voice_changer.RVC.SampleDownloader import downloadModelFiles
|
||||
|
||||
|
||||
# avoiding parse arg error in RVC
|
||||
@ -102,41 +100,16 @@ class RVC:
|
||||
def loadModel(self, props: LoadModelParams):
|
||||
target_slot_idx = props.slot
|
||||
params = props.params
|
||||
slotInfo: ModelSlot = ModelSlot()
|
||||
slotInfo: RVCModelSlot = RVCModelSlot()
|
||||
|
||||
print("loadModel", params)
|
||||
# サンプルが指定されたときはダウンロードしてメタデータをでっちあげる
|
||||
if len(params["sampleId"]) > 0:
|
||||
sampleId = params["sampleId"]
|
||||
sampleInfo = self.getSampleInfo(sampleId)
|
||||
useIndex = params["rvcIndexDownload"]
|
||||
|
||||
if sampleInfo is None:
|
||||
print("[Voice Changer] sampleInfo is None")
|
||||
return
|
||||
modelPath, indexPath, iconPath = downloadModelFiles(sampleInfo, useIndex)
|
||||
slotInfo.modelFile = modelPath
|
||||
if indexPath is not None:
|
||||
slotInfo.indexFile = indexPath
|
||||
if iconPath is not None:
|
||||
slotInfo.iconFile = iconPath
|
||||
|
||||
slotInfo.sampleId = sampleInfo.id
|
||||
slotInfo.credit = sampleInfo.credit
|
||||
slotInfo.description = sampleInfo.description
|
||||
slotInfo.name = sampleInfo.name
|
||||
slotInfo.termsOfUseUrl = sampleInfo.termsOfUseUrl
|
||||
|
||||
# slotInfo.samplingRate = sampleInfo.sampleRate
|
||||
# slotInfo.modelType = sampleInfo.modelType
|
||||
# slotInfo.f0 = sampleInfo.f0
|
||||
else:
|
||||
slotInfo.modelFile = params["files"]["rvcModel"]
|
||||
slotInfo.indexFile = params["files"]["rvcIndex"] if "rvcIndex" in params["files"] else None
|
||||
slotInfo.modelFile = params["files"]["rvcModel"]
|
||||
slotInfo.indexFile = params["files"]["rvcIndex"] if "rvcIndex" in params["files"] else None
|
||||
|
||||
slotInfo.defaultTune = params["defaultTune"]
|
||||
slotInfo.defaultIndexRatio = params["defaultIndexRatio"]
|
||||
slotInfo.defaultProtect = params["defaultProtect"]
|
||||
slotInfo.voiceChangerType = "RVC"
|
||||
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
|
||||
|
||||
if slotInfo.isONNX:
|
||||
|
@ -1,164 +0,0 @@
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from dataclasses import asdict
|
||||
import os
|
||||
from const import RVC_MODEL_DIRNAME, TMP_DIR
|
||||
from Downloader import download, download_no_tqdm
|
||||
from ModelSample import RVCModelSample, getModelSamples
|
||||
import json
|
||||
|
||||
from voice_changer.RVC.ModelSlot import ModelSlot
|
||||
from voice_changer.RVC.ModelSlotGenerator import _setInfoByONNX, _setInfoByPytorch
|
||||
|
||||
|
||||
def checkRvcModelExist(model_dir: str):
|
||||
rvcModelDir = os.path.join(model_dir, RVC_MODEL_DIRNAME)
|
||||
if not os.path.exists(rvcModelDir):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def downloadInitialSampleModels(sampleJsons: list[str], sampleModelIds: list[str], model_dir: str):
|
||||
sampleModels = getModelSamples(sampleJsons, "RVC")
|
||||
if sampleModels is None:
|
||||
return
|
||||
|
||||
downloadParams = []
|
||||
slot_count = 0
|
||||
line_num = 0
|
||||
for initSampleId in sampleModelIds:
|
||||
# 初期サンプルをサーチ
|
||||
match = False
|
||||
for sample in sampleModels:
|
||||
if sample.id == initSampleId[0]:
|
||||
match = True
|
||||
break
|
||||
if match is False:
|
||||
print(f"[Voice Changer] initiail sample not found. {initSampleId[0]}")
|
||||
continue
|
||||
|
||||
# 検出されたら、、、
|
||||
slotInfo: ModelSlot = ModelSlot()
|
||||
# sampleParams: Any = {"files": {}}
|
||||
|
||||
slotDir = os.path.join(model_dir, RVC_MODEL_DIRNAME, str(slot_count))
|
||||
os.makedirs(slotDir, exist_ok=True)
|
||||
modelFilePath = os.path.join(
|
||||
slotDir,
|
||||
os.path.basename(sample.modelUrl),
|
||||
)
|
||||
downloadParams.append(
|
||||
{
|
||||
"url": sample.modelUrl,
|
||||
"saveTo": modelFilePath,
|
||||
"position": line_num,
|
||||
}
|
||||
)
|
||||
slotInfo.modelFile = modelFilePath
|
||||
line_num += 1
|
||||
|
||||
if initSampleId[1] is True and hasattr(sample, "indexUrl") and sample.indexUrl != "":
|
||||
indexPath = os.path.join(
|
||||
slotDir,
|
||||
os.path.basename(sample.indexUrl),
|
||||
)
|
||||
downloadParams.append(
|
||||
{
|
||||
"url": sample.indexUrl,
|
||||
"saveTo": indexPath,
|
||||
"position": line_num,
|
||||
}
|
||||
)
|
||||
slotInfo.indexFile = indexPath
|
||||
line_num += 1
|
||||
if hasattr(sample, "icon") and sample.icon != "":
|
||||
iconPath = os.path.join(
|
||||
slotDir,
|
||||
os.path.basename(sample.icon),
|
||||
)
|
||||
downloadParams.append(
|
||||
{
|
||||
"url": sample.icon,
|
||||
"saveTo": iconPath,
|
||||
"position": line_num,
|
||||
}
|
||||
)
|
||||
slotInfo.iconFile = iconPath
|
||||
line_num += 1
|
||||
|
||||
slotInfo.sampleId = sample.id
|
||||
slotInfo.credit = sample.credit
|
||||
slotInfo.description = sample.description
|
||||
slotInfo.name = sample.name
|
||||
slotInfo.termsOfUseUrl = sample.termsOfUseUrl
|
||||
slotInfo.defaultTune = 0
|
||||
slotInfo.defaultIndexRatio = 0.0
|
||||
slotInfo.defaultProtect = 0.5
|
||||
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
|
||||
|
||||
# この時点ではまだファイルはダウンロードされていない
|
||||
# if slotInfo.isONNX:
|
||||
# _setInfoByONNX(slotInfo)
|
||||
# else:
|
||||
# _setInfoByPytorch(slotInfo)
|
||||
|
||||
json.dump(asdict(slotInfo), open(os.path.join(slotDir, "params.json"), "w"))
|
||||
slot_count += 1
|
||||
|
||||
# ダウンロード
|
||||
print("[Voice Changer] Downloading model files...")
|
||||
with ThreadPoolExecutor() as pool:
|
||||
pool.map(download, downloadParams)
|
||||
|
||||
# メタデータ作成
|
||||
print("[Voice Changer] Generating metadata...")
|
||||
for slotId in range(slot_count):
|
||||
slotDir = os.path.join(model_dir, RVC_MODEL_DIRNAME, str(slotId))
|
||||
jsonDict = json.load(open(os.path.join(slotDir, "params.json")))
|
||||
slotInfo = ModelSlot(**jsonDict)
|
||||
if slotInfo.isONNX:
|
||||
_setInfoByONNX(slotInfo)
|
||||
else:
|
||||
_setInfoByPytorch(slotInfo)
|
||||
json.dump(asdict(slotInfo), open(os.path.join(slotDir, "params.json"), "w"))
|
||||
|
||||
|
||||
def downloadModelFiles(sampleInfo: RVCModelSample, useIndex: bool = True):
|
||||
downloadParams = []
|
||||
|
||||
modelPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.modelUrl))
|
||||
downloadParams.append(
|
||||
{
|
||||
"url": sampleInfo.modelUrl,
|
||||
"saveTo": modelPath,
|
||||
"position": 0,
|
||||
}
|
||||
)
|
||||
|
||||
indexPath = None
|
||||
if useIndex is True and hasattr(sampleInfo, "indexUrl") and sampleInfo.indexUrl != "":
|
||||
print("[Voice Changer] Download sample with index.")
|
||||
indexPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.indexUrl))
|
||||
downloadParams.append(
|
||||
{
|
||||
"url": sampleInfo.indexUrl,
|
||||
"saveTo": indexPath,
|
||||
"position": 1,
|
||||
}
|
||||
)
|
||||
|
||||
iconPath = None
|
||||
if hasattr(sampleInfo, "icon") and sampleInfo.icon != "":
|
||||
iconPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.icon))
|
||||
downloadParams.append(
|
||||
{
|
||||
"url": sampleInfo.icon,
|
||||
"saveTo": iconPath,
|
||||
"position": 2,
|
||||
}
|
||||
)
|
||||
|
||||
print("[Voice Changer] Downloading model files...", end="")
|
||||
with ThreadPoolExecutor() as pool:
|
||||
pool.map(download_no_tqdm, downloadParams)
|
||||
print("")
|
||||
return modelPath, indexPath, iconPath
|
@ -4,7 +4,7 @@ import torch
|
||||
from onnxsim import simplify
|
||||
import onnx
|
||||
from const import TMP_DIR, EnumInferenceTypes
|
||||
from voice_changer.RVC.ModelSlot import ModelSlot
|
||||
from data.ModelSlot import ModelSlot
|
||||
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
|
||||
from voice_changer.RVC.onnxExporter.SynthesizerTrnMs256NSFsid_ONNX import (
|
||||
SynthesizerTrnMs256NSFsid_ONNX,
|
||||
@ -30,9 +30,7 @@ def export2onnx(gpu: int, modelSlot: ModelSlot):
|
||||
modelFile = modelSlot.modelFile
|
||||
|
||||
output_file = os.path.splitext(os.path.basename(modelFile))[0] + ".onnx"
|
||||
output_file_simple = (
|
||||
os.path.splitext(os.path.basename(modelFile))[0] + "_simple.onnx"
|
||||
)
|
||||
output_file_simple = os.path.splitext(os.path.basename(modelFile))[0] + "_simple.onnx"
|
||||
output_path = os.path.join(TMP_DIR, output_file)
|
||||
output_path_simple = os.path.join(TMP_DIR, output_file_simple)
|
||||
metadata = {
|
||||
@ -52,9 +50,7 @@ def export2onnx(gpu: int, modelSlot: ModelSlot):
|
||||
if gpuMomory > 0:
|
||||
_export2onnx(modelFile, output_path, output_path_simple, True, metadata)
|
||||
else:
|
||||
print(
|
||||
"[Voice Changer] Warning!!! onnx export with float32. maybe size is doubled."
|
||||
)
|
||||
print("[Voice Changer] Warning!!! onnx export with float32. maybe size is doubled.")
|
||||
_export2onnx(modelFile, output_path, output_path_simple, False, metadata)
|
||||
return output_file_simple
|
||||
|
||||
|
@ -1,8 +1,8 @@
|
||||
import os
|
||||
import traceback
|
||||
import faiss
|
||||
from data.ModelSlot import RVCModelSlot
|
||||
|
||||
from voice_changer.RVC.ModelSlot import ModelSlot
|
||||
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
|
||||
from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager
|
||||
from voice_changer.RVC.inferencer.InferencerManager import InferencerManager
|
||||
@ -10,15 +10,13 @@ from voice_changer.RVC.pipeline.Pipeline import Pipeline
|
||||
from voice_changer.RVC.pitchExtractor.PitchExtractorManager import PitchExtractorManager
|
||||
|
||||
|
||||
def createPipeline(modelSlot: ModelSlot, gpu: int, f0Detector: str):
|
||||
def createPipeline(modelSlot: RVCModelSlot, gpu: int, f0Detector: str):
|
||||
dev = DeviceManager.get_instance().getDevice(gpu)
|
||||
half = DeviceManager.get_instance().halfPrecisionAvailable(gpu)
|
||||
|
||||
# Inferencer 生成
|
||||
try:
|
||||
inferencer = InferencerManager.getInferencer(
|
||||
modelSlot.modelType, modelSlot.modelFile, gpu
|
||||
)
|
||||
inferencer = InferencerManager.getInferencer(modelSlot.modelType, modelSlot.modelFile, gpu)
|
||||
except Exception as e:
|
||||
print("[Voice Changer] exception! loading inferencer", e)
|
||||
traceback.print_exc()
|
||||
@ -54,7 +52,7 @@ def createPipeline(modelSlot: ModelSlot, gpu: int, f0Detector: str):
|
||||
return pipeline
|
||||
|
||||
|
||||
def _loadIndex(modelSlot: ModelSlot):
|
||||
def _loadIndex(modelSlot: RVCModelSlot):
|
||||
# Indexのロード
|
||||
print("[Voice Changer] Loading index...")
|
||||
# ファイル指定がない場合はNone
|
||||
|
Loading…
Reference in New Issue
Block a user