mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-01-23 13:35:12 +03:00
update
This commit is contained in:
parent
48846aad7f
commit
7f1cdb9ddc
@ -63,7 +63,7 @@ class MMVC_Rest_Fileuploader:
|
||||
def post_update_settings(
|
||||
self, key: str = Form(...), val: Union[int, str, float] = Form(...)
|
||||
):
|
||||
print("post_update_settings", key, val)
|
||||
print("[Voice Changer] update configuration:", key, val)
|
||||
info = self.voiceChangerManager.update_settings(key, val)
|
||||
json_compatible_item_data = jsonable_encoder(info)
|
||||
return JSONResponse(content=json_compatible_item_data)
|
||||
|
@ -70,7 +70,7 @@ def _setInfoByONNX(slot: ModelSlot, file: str):
|
||||
try:
|
||||
metadata = json.loads(modelmeta.custom_metadata_map["metadata"])
|
||||
|
||||
slot.modelType = metadata["modelType"]
|
||||
# slot.modelType = metadata["modelType"]
|
||||
slot.embChannels = metadata["embChannels"]
|
||||
|
||||
if "embedder" not in metadata:
|
||||
|
@ -1,5 +1,6 @@
|
||||
import sys
|
||||
import os
|
||||
from Exceptions import NoModeLoadedException
|
||||
from voice_changer.RVC.ModelSlot import ModelSlot
|
||||
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
|
||||
|
||||
@ -42,7 +43,7 @@ import torch
|
||||
import traceback
|
||||
import faiss
|
||||
|
||||
from const import TMP_DIR, UPLOAD_DIR, EnumEmbedderTypes
|
||||
from const import UPLOAD_DIR, EnumEmbedderTypes
|
||||
|
||||
|
||||
from voice_changer.RVC.custom_vc_infer_pipeline import VC
|
||||
@ -89,9 +90,6 @@ class RVC:
|
||||
f"[Voice Changer] RVC new model is uploaded,{target_slot_idx}",
|
||||
asdict(modelSlot),
|
||||
)
|
||||
"""
|
||||
[Voice Changer] RVC new model is uploaded,0 {'pyTorchModelFile': 'upload_dir/0/kurage.pth', 'onnxModelFile': None, 'featureFile': None, 'indexFile': None, 'defaultTrans': 16, 'isONNX': False, 'modelType': <EnumInferenceTypes.pyTorchWebUI: 'pyTorchWebUI'>, 'samplingRate': 48000, 'f0': True, 'embChannels': 768, 'deprecated': False, 'embedder': 'hubert-base-japanese'}
|
||||
"""
|
||||
|
||||
# 初回のみロード
|
||||
if self.initialLoad:
|
||||
@ -139,7 +137,6 @@ class RVC:
|
||||
|
||||
# Embedder 生成
|
||||
try:
|
||||
print("AFASFDAFDAFDASDFASDFSADFASDFA", half, self.settings.gpu)
|
||||
embedder = EmbedderManager.getEmbedder(
|
||||
modelSlot.embedder,
|
||||
emmbedderFilename,
|
||||
@ -179,8 +176,14 @@ class RVC:
|
||||
def prepareModel(self, slot: int):
|
||||
if slot < 0:
|
||||
return self.get_info()
|
||||
print("[Voice Changer] Prepare Model of slot:", slot)
|
||||
modelSlot = self.settings.modelSlots[slot]
|
||||
inferencerFilename = (
|
||||
modelSlot.onnxModelFile if modelSlot.isONNX else modelSlot.pyTorchModelFile
|
||||
)
|
||||
if inferencerFilename == "":
|
||||
return self.get_info()
|
||||
|
||||
print("[Voice Changer] Prepare Model of slot:", slot)
|
||||
|
||||
# Inferencer, embedderのロード
|
||||
inferencer, embedder = self.createPipeline(modelSlot)
|
||||
@ -240,13 +243,13 @@ class RVC:
|
||||
and self.embedder.isHalf == half
|
||||
):
|
||||
print(
|
||||
"NOT NEED CHAGE TO NEW PIPELINE!!!!!!!!!!!!!!!!!!!!!!!!!!!",
|
||||
"USE EXSISTING PIPELINE",
|
||||
half,
|
||||
)
|
||||
self.embedder.setDevice(dev)
|
||||
self.inferencer.setDevice(dev)
|
||||
else:
|
||||
print("CHAGE TO NEW PIPELINE!!!!!!!!!!!!!!!!!!!!!!!!!!!", half)
|
||||
print("CHAGE TO NEW PIPELINE", half)
|
||||
self.prepareModel(self.settings.modelSlotIndex)
|
||||
elif key in self.settings.floatData:
|
||||
setattr(self.settings, key, float(val))
|
||||
@ -307,7 +310,9 @@ class RVC:
|
||||
)
|
||||
raise NoModeLoadedException("model_common")
|
||||
if self.needSwitch:
|
||||
print(f"Switch model {self.currentSlot} -> {self.settings.modelSlotIndex}")
|
||||
print(
|
||||
f"[Voice Changer] Switch model {self.currentSlot} -> {self.settings.modelSlotIndex}"
|
||||
)
|
||||
self.currentSlot = self.settings.modelSlotIndex
|
||||
self.switchModel()
|
||||
self.needSwitch = False
|
||||
@ -363,8 +368,8 @@ class RVC:
|
||||
return result
|
||||
|
||||
def __del__(self):
|
||||
del self.net_g
|
||||
del self.onnx_session
|
||||
del self.inferencer
|
||||
del self.embedder
|
||||
|
||||
print("---------- REMOVING ---------------")
|
||||
|
||||
@ -383,57 +388,17 @@ class RVC:
|
||||
pass
|
||||
|
||||
def export2onnx(self):
|
||||
if hasattr(self, "net_g") is False or self.net_g is None:
|
||||
print("[Voice Changer] export2onnx, No pyTorch session.")
|
||||
return {"status": "ng", "path": ""}
|
||||
modelSlot = self.settings.modelSlots[self.settings.modelSlotIndex]
|
||||
pyTorchModelFile = modelSlot.pyTorchModelFile
|
||||
|
||||
pyTorchModelFile = self.settings.modelSlots[
|
||||
self.settings.modelSlotIndex
|
||||
].pyTorchModelFile # inference前にexportできるようにcurrentSlotではなくslot
|
||||
|
||||
if pyTorchModelFile is None:
|
||||
# PyTorchのファイルが存在しない場合はエラーを返す
|
||||
if pyTorchModelFile is None or pyTorchModelFile == "":
|
||||
print("[Voice Changer] export2onnx, No pyTorch filepath.")
|
||||
return {"status": "ng", "path": ""}
|
||||
|
||||
import voice_changer.RVC.export2onnx as onnxExporter
|
||||
|
||||
output_file = os.path.splitext(os.path.basename(pyTorchModelFile))[0] + ".onnx"
|
||||
output_file_simple = (
|
||||
os.path.splitext(os.path.basename(pyTorchModelFile))[0] + "_simple.onnx"
|
||||
)
|
||||
output_path = os.path.join(TMP_DIR, output_file)
|
||||
output_path_simple = os.path.join(TMP_DIR, output_file_simple)
|
||||
print(
|
||||
"embChannels",
|
||||
self.settings.modelSlots[self.settings.modelSlotIndex].embChannels,
|
||||
)
|
||||
metadata = {
|
||||
"application": "VC_CLIENT",
|
||||
"version": "1",
|
||||
"modelType": self.settings.modelSlots[
|
||||
self.settings.modelSlotIndex
|
||||
].modelType,
|
||||
"samplingRate": self.settings.modelSlots[
|
||||
self.settings.modelSlotIndex
|
||||
].samplingRate,
|
||||
"f0": self.settings.modelSlots[self.settings.modelSlotIndex].f0,
|
||||
"embChannels": self.settings.modelSlots[
|
||||
self.settings.modelSlotIndex
|
||||
].embChannels,
|
||||
"embedder": self.settings.modelSlots[self.settings.modelSlotIndex].embedder,
|
||||
}
|
||||
|
||||
if torch.cuda.device_count() > 0:
|
||||
onnxExporter.export2onnx(
|
||||
pyTorchModelFile, output_path, output_path_simple, True, metadata
|
||||
)
|
||||
else:
|
||||
print(
|
||||
"[Voice Changer] Warning!!! onnx export with float32. maybe size is doubled."
|
||||
)
|
||||
onnxExporter.export2onnx(
|
||||
pyTorchModelFile, output_path, output_path_simple, False, metadata
|
||||
)
|
||||
|
||||
output_file_simple = onnxExporter.export2onnx(modelSlot)
|
||||
return {
|
||||
"status": "ok",
|
||||
"path": f"/tmp/{output_file_simple}",
|
||||
|
@ -55,7 +55,7 @@ class VC(object):
|
||||
pitchf, device=self.device, dtype=torch.float
|
||||
).unsqueeze(0)
|
||||
|
||||
# tensor
|
||||
# tensor型調整
|
||||
feats = torch.from_numpy(audio_pad)
|
||||
if self.is_half is True:
|
||||
feats = feats.half()
|
||||
@ -94,11 +94,9 @@ class VC(object):
|
||||
torch.from_numpy(npy).unsqueeze(0).to(self.device) * index_rate
|
||||
+ (1 - index_rate) * feats
|
||||
)
|
||||
|
||||
#
|
||||
feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
|
||||
|
||||
# ピッチ抽出
|
||||
# ピッチサイズ調整
|
||||
p_len = audio_pad.shape[0] // self.window
|
||||
if feats.shape[1] < p_len:
|
||||
p_len = feats.shape[1]
|
||||
@ -108,47 +106,23 @@ class VC(object):
|
||||
p_len = torch.tensor([p_len], device=self.device).long()
|
||||
|
||||
# 推論実行
|
||||
with torch.no_grad():
|
||||
audio1 = (
|
||||
(inferencer.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0] * 32768)
|
||||
.data.cpu()
|
||||
.float()
|
||||
.numpy()
|
||||
.astype(np.int16)
|
||||
)
|
||||
|
||||
# if pitch is not None:
|
||||
# print("INFERENCE 1 ")
|
||||
# audio1 = (
|
||||
# (
|
||||
# inferencer.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0]
|
||||
# * 32768
|
||||
# )
|
||||
# .data.cpu()
|
||||
# .float()
|
||||
# .numpy()
|
||||
# .astype(np.int16)
|
||||
# )
|
||||
# else:
|
||||
# if hasattr(inferencer, "infer_pitchless"):
|
||||
# print("INFERENCE 2 ")
|
||||
|
||||
# audio1 = (
|
||||
# (inferencer.infer_pitchless(feats, p_len, sid)[0][0, 0] * 32768)
|
||||
# .data.cpu()
|
||||
# .float()
|
||||
# .numpy()
|
||||
# .astype(np.int16)
|
||||
# )
|
||||
# else:
|
||||
# print("INFERENCE 3 ")
|
||||
# audio1 = (
|
||||
# (inferencer.infer(feats, p_len, sid)[0][0, 0] * 32768)
|
||||
# .data.cpu()
|
||||
# .float()
|
||||
# .numpy()
|
||||
# .astype(np.int16)
|
||||
# )
|
||||
try:
|
||||
with torch.no_grad():
|
||||
audio1 = (
|
||||
(
|
||||
inferencer.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0]
|
||||
* 32768
|
||||
)
|
||||
.data.cpu()
|
||||
.float()
|
||||
.numpy()
|
||||
.astype(np.int16)
|
||||
)
|
||||
except RuntimeError as e:
|
||||
if "HALF" in e.__str__().upper():
|
||||
raise HalfPrecisionChangingException()
|
||||
else:
|
||||
raise e
|
||||
|
||||
del feats, p_len, padding_mask
|
||||
torch.cuda.empty_cache()
|
||||
|
@ -1,7 +1,10 @@
|
||||
import os
|
||||
import json
|
||||
import torch
|
||||
from onnxsim import simplify
|
||||
import onnx
|
||||
from const import TMP_DIR, EnumInferenceTypes
|
||||
from voice_changer.RVC.ModelSlot import ModelSlot
|
||||
|
||||
from voice_changer.RVC.onnx.SynthesizerTrnMs256NSFsid_ONNX import (
|
||||
SynthesizerTrnMs256NSFsid_ONNX,
|
||||
@ -15,24 +18,60 @@ from voice_changer.RVC.onnx.SynthesizerTrnMsNSFsidNono_webui_ONNX import (
|
||||
from voice_changer.RVC.onnx.SynthesizerTrnMsNSFsid_webui_ONNX import (
|
||||
SynthesizerTrnMsNSFsid_webui_ONNX,
|
||||
)
|
||||
from .const import RVC_MODEL_TYPE_RVC, RVC_MODEL_TYPE_WEBUI
|
||||
|
||||
|
||||
def export2onnx(input_model, output_model, output_model_simple, is_half, metadata):
|
||||
def export2onnx(modelSlot: ModelSlot):
|
||||
pyTorchModelFile = modelSlot.pyTorchModelFile
|
||||
|
||||
output_file = os.path.splitext(os.path.basename(pyTorchModelFile))[0] + ".onnx"
|
||||
output_file_simple = (
|
||||
os.path.splitext(os.path.basename(pyTorchModelFile))[0] + "_simple.onnx"
|
||||
)
|
||||
output_path = os.path.join(TMP_DIR, output_file)
|
||||
output_path_simple = os.path.join(TMP_DIR, output_file_simple)
|
||||
metadata = {
|
||||
"application": "VC_CLIENT",
|
||||
"version": "2",
|
||||
# ↓EnumInferenceTypesのままだとシリアライズできないのでテキスト化
|
||||
"modelType": modelSlot.modelType.value,
|
||||
"samplingRate": modelSlot.samplingRate,
|
||||
"f0": modelSlot.f0,
|
||||
"embChannels": modelSlot.embChannels,
|
||||
"embedder": modelSlot.embedder.value,
|
||||
}
|
||||
|
||||
if torch.cuda.device_count() > 0:
|
||||
_export2onnx(pyTorchModelFile, output_path, output_path_simple, True, metadata)
|
||||
else:
|
||||
print(
|
||||
"[Voice Changer] Warning!!! onnx export with float32. maybe size is doubled."
|
||||
)
|
||||
_export2onnx(pyTorchModelFile, output_path, output_path_simple, False, metadata)
|
||||
return output_file_simple
|
||||
|
||||
|
||||
def _export2onnx(input_model, output_model, output_model_simple, is_half, metadata):
|
||||
cpt = torch.load(input_model, map_location="cpu")
|
||||
if is_half:
|
||||
dev = torch.device("cuda", index=0)
|
||||
else:
|
||||
dev = torch.device("cpu")
|
||||
|
||||
if metadata["f0"] is True and metadata["modelType"] == RVC_MODEL_TYPE_RVC:
|
||||
# EnumInferenceTypesのままだとシリアライズできないのでテキスト化
|
||||
if metadata["modelType"] == EnumInferenceTypes.pyTorchRVC.value:
|
||||
net_g_onnx = SynthesizerTrnMs256NSFsid_ONNX(*cpt["config"], is_half=is_half)
|
||||
elif metadata["f0"] is True and metadata["modelType"] == RVC_MODEL_TYPE_WEBUI:
|
||||
elif metadata["modelType"] == EnumInferenceTypes.pyTorchWebUI.value:
|
||||
net_g_onnx = SynthesizerTrnMsNSFsid_webui_ONNX(**cpt["params"], is_half=is_half)
|
||||
elif metadata["f0"] is False and metadata["modelType"] == RVC_MODEL_TYPE_RVC:
|
||||
elif metadata["modelType"] == EnumInferenceTypes.pyTorchRVCNono.value:
|
||||
net_g_onnx = SynthesizerTrnMs256NSFsid_nono_ONNX(*cpt["config"])
|
||||
elif metadata["f0"] is False and metadata["modelType"] == RVC_MODEL_TYPE_WEBUI:
|
||||
elif metadata["modelType"] == EnumInferenceTypes.pyTorchWebUINono.value:
|
||||
net_g_onnx = SynthesizerTrnMsNSFsidNono_webui_ONNX(**cpt["params"])
|
||||
else:
|
||||
print(
|
||||
"unknwon::::: ",
|
||||
metadata["modelType"],
|
||||
EnumInferenceTypes.pyTorchWebUI.value,
|
||||
)
|
||||
|
||||
net_g_onnx.eval().to(dev)
|
||||
net_g_onnx.load_state_dict(cpt["weight"], strict=False)
|
||||
|
@ -24,6 +24,7 @@ class OnnxRVCInferencer(Inferencer):
|
||||
self.isHalf = True
|
||||
|
||||
self.model = onnx_session
|
||||
self.setDevice(dev)
|
||||
return self
|
||||
|
||||
def infer(
|
||||
@ -37,9 +38,9 @@ class OnnxRVCInferencer(Inferencer):
|
||||
if pitch is None or pitchf is None:
|
||||
raise RuntimeError("[Voice Changer] Pitch or Pitchf is not found.")
|
||||
|
||||
print("INFER1", self.model.get_providers())
|
||||
print("INFER2", self.model.get_provider_options())
|
||||
print("INFER3", self.model.get_session_options())
|
||||
# print("INFER1", self.model.get_providers())
|
||||
# print("INFER2", self.model.get_provider_options())
|
||||
# print("INFER3", self.model.get_session_options())
|
||||
if self.isHalf:
|
||||
audio1 = self.model.run(
|
||||
["audio"],
|
||||
|
@ -11,6 +11,7 @@ from infer_pack.models import ( # type:ignore
|
||||
class RVCInferencer(Inferencer):
|
||||
def loadModel(self, file: str, dev: device, isHalf: bool = True):
|
||||
super().setProps(EnumInferenceTypes.pyTorchRVC, file, dev, isHalf)
|
||||
print("load inf", file)
|
||||
cpt = torch.load(file, map_location="cpu")
|
||||
model = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=isHalf)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user