This commit is contained in:
wataru 2023-05-03 17:12:40 +09:00
parent 48846aad7f
commit 7f1cdb9ddc
7 changed files with 93 additions and 113 deletions

View File

@ -63,7 +63,7 @@ class MMVC_Rest_Fileuploader:
def post_update_settings(
self, key: str = Form(...), val: Union[int, str, float] = Form(...)
):
print("post_update_settings", key, val)
print("[Voice Changer] update configuration:", key, val)
info = self.voiceChangerManager.update_settings(key, val)
json_compatible_item_data = jsonable_encoder(info)
return JSONResponse(content=json_compatible_item_data)

View File

@ -70,7 +70,7 @@ def _setInfoByONNX(slot: ModelSlot, file: str):
try:
metadata = json.loads(modelmeta.custom_metadata_map["metadata"])
slot.modelType = metadata["modelType"]
# slot.modelType = metadata["modelType"]
slot.embChannels = metadata["embChannels"]
if "embedder" not in metadata:

View File

@ -1,5 +1,6 @@
import sys
import os
from Exceptions import NoModeLoadedException
from voice_changer.RVC.ModelSlot import ModelSlot
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
@ -42,7 +43,7 @@ import torch
import traceback
import faiss
from const import TMP_DIR, UPLOAD_DIR, EnumEmbedderTypes
from const import UPLOAD_DIR, EnumEmbedderTypes
from voice_changer.RVC.custom_vc_infer_pipeline import VC
@ -89,9 +90,6 @@ class RVC:
f"[Voice Changer] RVC new model is uploaded,{target_slot_idx}",
asdict(modelSlot),
)
"""
[Voice Changer] RVC new model is uploaded,0 {'pyTorchModelFile': 'upload_dir/0/kurage.pth', 'onnxModelFile': None, 'featureFile': None, 'indexFile': None, 'defaultTrans': 16, 'isONNX': False, 'modelType': <EnumInferenceTypes.pyTorchWebUI: 'pyTorchWebUI'>, 'samplingRate': 48000, 'f0': True, 'embChannels': 768, 'deprecated': False, 'embedder': 'hubert-base-japanese'}
"""
# 初回のみロード
if self.initialLoad:
@ -139,7 +137,6 @@ class RVC:
# Embedder 生成
try:
print("AFASFDAFDAFDASDFASDFSADFASDFA", half, self.settings.gpu)
embedder = EmbedderManager.getEmbedder(
modelSlot.embedder,
emmbedderFilename,
@ -179,8 +176,14 @@ class RVC:
def prepareModel(self, slot: int):
if slot < 0:
return self.get_info()
print("[Voice Changer] Prepare Model of slot:", slot)
modelSlot = self.settings.modelSlots[slot]
inferencerFilename = (
modelSlot.onnxModelFile if modelSlot.isONNX else modelSlot.pyTorchModelFile
)
if inferencerFilename == "":
return self.get_info()
print("[Voice Changer] Prepare Model of slot:", slot)
# Inferencer, embedderのロード
inferencer, embedder = self.createPipeline(modelSlot)
@ -240,13 +243,13 @@ class RVC:
and self.embedder.isHalf == half
):
print(
"NOT NEED CHAGE TO NEW PIPELINE!!!!!!!!!!!!!!!!!!!!!!!!!!!",
"USE EXSISTING PIPELINE",
half,
)
self.embedder.setDevice(dev)
self.inferencer.setDevice(dev)
else:
print("CHAGE TO NEW PIPELINE!!!!!!!!!!!!!!!!!!!!!!!!!!!", half)
print("CHAGE TO NEW PIPELINE", half)
self.prepareModel(self.settings.modelSlotIndex)
elif key in self.settings.floatData:
setattr(self.settings, key, float(val))
@ -307,7 +310,9 @@ class RVC:
)
raise NoModeLoadedException("model_common")
if self.needSwitch:
print(f"Switch model {self.currentSlot} -> {self.settings.modelSlotIndex}")
print(
f"[Voice Changer] Switch model {self.currentSlot} -> {self.settings.modelSlotIndex}"
)
self.currentSlot = self.settings.modelSlotIndex
self.switchModel()
self.needSwitch = False
@ -363,8 +368,8 @@ class RVC:
return result
def __del__(self):
del self.net_g
del self.onnx_session
del self.inferencer
del self.embedder
print("---------- REMOVING ---------------")
@ -383,57 +388,17 @@ class RVC:
pass
def export2onnx(self):
if hasattr(self, "net_g") is False or self.net_g is None:
print("[Voice Changer] export2onnx, No pyTorch session.")
return {"status": "ng", "path": ""}
modelSlot = self.settings.modelSlots[self.settings.modelSlotIndex]
pyTorchModelFile = modelSlot.pyTorchModelFile
pyTorchModelFile = self.settings.modelSlots[
self.settings.modelSlotIndex
].pyTorchModelFile # inference前にexportできるようにcurrentSlotではなくslot
if pyTorchModelFile is None:
# PyTorchのファイルが存在しない場合はエラーを返す
if pyTorchModelFile is None or pyTorchModelFile == "":
print("[Voice Changer] export2onnx, No pyTorch filepath.")
return {"status": "ng", "path": ""}
import voice_changer.RVC.export2onnx as onnxExporter
output_file = os.path.splitext(os.path.basename(pyTorchModelFile))[0] + ".onnx"
output_file_simple = (
os.path.splitext(os.path.basename(pyTorchModelFile))[0] + "_simple.onnx"
)
output_path = os.path.join(TMP_DIR, output_file)
output_path_simple = os.path.join(TMP_DIR, output_file_simple)
print(
"embChannels",
self.settings.modelSlots[self.settings.modelSlotIndex].embChannels,
)
metadata = {
"application": "VC_CLIENT",
"version": "1",
"modelType": self.settings.modelSlots[
self.settings.modelSlotIndex
].modelType,
"samplingRate": self.settings.modelSlots[
self.settings.modelSlotIndex
].samplingRate,
"f0": self.settings.modelSlots[self.settings.modelSlotIndex].f0,
"embChannels": self.settings.modelSlots[
self.settings.modelSlotIndex
].embChannels,
"embedder": self.settings.modelSlots[self.settings.modelSlotIndex].embedder,
}
if torch.cuda.device_count() > 0:
onnxExporter.export2onnx(
pyTorchModelFile, output_path, output_path_simple, True, metadata
)
else:
print(
"[Voice Changer] Warning!!! onnx export with float32. maybe size is doubled."
)
onnxExporter.export2onnx(
pyTorchModelFile, output_path, output_path_simple, False, metadata
)
output_file_simple = onnxExporter.export2onnx(modelSlot)
return {
"status": "ok",
"path": f"/tmp/{output_file_simple}",

View File

@ -55,7 +55,7 @@ class VC(object):
pitchf, device=self.device, dtype=torch.float
).unsqueeze(0)
# tensor
# tensor型調整
feats = torch.from_numpy(audio_pad)
if self.is_half is True:
feats = feats.half()
@ -94,11 +94,9 @@ class VC(object):
torch.from_numpy(npy).unsqueeze(0).to(self.device) * index_rate
+ (1 - index_rate) * feats
)
#
feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
# ピッチ抽出
# ピッチサイズ調整
p_len = audio_pad.shape[0] // self.window
if feats.shape[1] < p_len:
p_len = feats.shape[1]
@ -108,47 +106,23 @@ class VC(object):
p_len = torch.tensor([p_len], device=self.device).long()
# 推論実行
try:
with torch.no_grad():
audio1 = (
(inferencer.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0] * 32768)
(
inferencer.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0]
* 32768
)
.data.cpu()
.float()
.numpy()
.astype(np.int16)
)
# if pitch is not None:
# print("INFERENCE 1 ")
# audio1 = (
# (
# inferencer.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0]
# * 32768
# )
# .data.cpu()
# .float()
# .numpy()
# .astype(np.int16)
# )
# else:
# if hasattr(inferencer, "infer_pitchless"):
# print("INFERENCE 2 ")
# audio1 = (
# (inferencer.infer_pitchless(feats, p_len, sid)[0][0, 0] * 32768)
# .data.cpu()
# .float()
# .numpy()
# .astype(np.int16)
# )
# else:
# print("INFERENCE 3 ")
# audio1 = (
# (inferencer.infer(feats, p_len, sid)[0][0, 0] * 32768)
# .data.cpu()
# .float()
# .numpy()
# .astype(np.int16)
# )
except RuntimeError as e:
if "HALF" in e.__str__().upper():
raise HalfPrecisionChangingException()
else:
raise e
del feats, p_len, padding_mask
torch.cuda.empty_cache()

View File

@ -1,7 +1,10 @@
import os
import json
import torch
from onnxsim import simplify
import onnx
from const import TMP_DIR, EnumInferenceTypes
from voice_changer.RVC.ModelSlot import ModelSlot
from voice_changer.RVC.onnx.SynthesizerTrnMs256NSFsid_ONNX import (
SynthesizerTrnMs256NSFsid_ONNX,
@ -15,24 +18,60 @@ from voice_changer.RVC.onnx.SynthesizerTrnMsNSFsidNono_webui_ONNX import (
from voice_changer.RVC.onnx.SynthesizerTrnMsNSFsid_webui_ONNX import (
SynthesizerTrnMsNSFsid_webui_ONNX,
)
from .const import RVC_MODEL_TYPE_RVC, RVC_MODEL_TYPE_WEBUI
def export2onnx(input_model, output_model, output_model_simple, is_half, metadata):
def export2onnx(modelSlot: ModelSlot):
pyTorchModelFile = modelSlot.pyTorchModelFile
output_file = os.path.splitext(os.path.basename(pyTorchModelFile))[0] + ".onnx"
output_file_simple = (
os.path.splitext(os.path.basename(pyTorchModelFile))[0] + "_simple.onnx"
)
output_path = os.path.join(TMP_DIR, output_file)
output_path_simple = os.path.join(TMP_DIR, output_file_simple)
metadata = {
"application": "VC_CLIENT",
"version": "2",
# ↓EnumInferenceTypesのままだとシリアライズできないのでテキスト化
"modelType": modelSlot.modelType.value,
"samplingRate": modelSlot.samplingRate,
"f0": modelSlot.f0,
"embChannels": modelSlot.embChannels,
"embedder": modelSlot.embedder.value,
}
if torch.cuda.device_count() > 0:
_export2onnx(pyTorchModelFile, output_path, output_path_simple, True, metadata)
else:
print(
"[Voice Changer] Warning!!! onnx export with float32. maybe size is doubled."
)
_export2onnx(pyTorchModelFile, output_path, output_path_simple, False, metadata)
return output_file_simple
def _export2onnx(input_model, output_model, output_model_simple, is_half, metadata):
cpt = torch.load(input_model, map_location="cpu")
if is_half:
dev = torch.device("cuda", index=0)
else:
dev = torch.device("cpu")
if metadata["f0"] is True and metadata["modelType"] == RVC_MODEL_TYPE_RVC:
# EnumInferenceTypesのままだとシリアライズできないのでテキスト化
if metadata["modelType"] == EnumInferenceTypes.pyTorchRVC.value:
net_g_onnx = SynthesizerTrnMs256NSFsid_ONNX(*cpt["config"], is_half=is_half)
elif metadata["f0"] is True and metadata["modelType"] == RVC_MODEL_TYPE_WEBUI:
elif metadata["modelType"] == EnumInferenceTypes.pyTorchWebUI.value:
net_g_onnx = SynthesizerTrnMsNSFsid_webui_ONNX(**cpt["params"], is_half=is_half)
elif metadata["f0"] is False and metadata["modelType"] == RVC_MODEL_TYPE_RVC:
elif metadata["modelType"] == EnumInferenceTypes.pyTorchRVCNono.value:
net_g_onnx = SynthesizerTrnMs256NSFsid_nono_ONNX(*cpt["config"])
elif metadata["f0"] is False and metadata["modelType"] == RVC_MODEL_TYPE_WEBUI:
elif metadata["modelType"] == EnumInferenceTypes.pyTorchWebUINono.value:
net_g_onnx = SynthesizerTrnMsNSFsidNono_webui_ONNX(**cpt["params"])
else:
print(
"unknwon::::: ",
metadata["modelType"],
EnumInferenceTypes.pyTorchWebUI.value,
)
net_g_onnx.eval().to(dev)
net_g_onnx.load_state_dict(cpt["weight"], strict=False)

View File

@ -24,6 +24,7 @@ class OnnxRVCInferencer(Inferencer):
self.isHalf = True
self.model = onnx_session
self.setDevice(dev)
return self
def infer(
@ -37,9 +38,9 @@ class OnnxRVCInferencer(Inferencer):
if pitch is None or pitchf is None:
raise RuntimeError("[Voice Changer] Pitch or Pitchf is not found.")
print("INFER1", self.model.get_providers())
print("INFER2", self.model.get_provider_options())
print("INFER3", self.model.get_session_options())
# print("INFER1", self.model.get_providers())
# print("INFER2", self.model.get_provider_options())
# print("INFER3", self.model.get_session_options())
if self.isHalf:
audio1 = self.model.run(
["audio"],

View File

@ -11,6 +11,7 @@ from infer_pack.models import ( # type:ignore
class RVCInferencer(Inferencer):
def loadModel(self, file: str, dev: device, isHalf: bool = True):
super().setProps(EnumInferenceTypes.pyTorchRVC, file, dev, isHalf)
print("load inf", file)
cpt = torch.load(file, map_location="cpu")
model = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=isHalf)