mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-02-03 00:33:57 +03:00
update
This commit is contained in:
parent
48846aad7f
commit
7f1cdb9ddc
@ -63,7 +63,7 @@ class MMVC_Rest_Fileuploader:
|
|||||||
def post_update_settings(
|
def post_update_settings(
|
||||||
self, key: str = Form(...), val: Union[int, str, float] = Form(...)
|
self, key: str = Form(...), val: Union[int, str, float] = Form(...)
|
||||||
):
|
):
|
||||||
print("post_update_settings", key, val)
|
print("[Voice Changer] update configuration:", key, val)
|
||||||
info = self.voiceChangerManager.update_settings(key, val)
|
info = self.voiceChangerManager.update_settings(key, val)
|
||||||
json_compatible_item_data = jsonable_encoder(info)
|
json_compatible_item_data = jsonable_encoder(info)
|
||||||
return JSONResponse(content=json_compatible_item_data)
|
return JSONResponse(content=json_compatible_item_data)
|
||||||
|
@ -70,7 +70,7 @@ def _setInfoByONNX(slot: ModelSlot, file: str):
|
|||||||
try:
|
try:
|
||||||
metadata = json.loads(modelmeta.custom_metadata_map["metadata"])
|
metadata = json.loads(modelmeta.custom_metadata_map["metadata"])
|
||||||
|
|
||||||
slot.modelType = metadata["modelType"]
|
# slot.modelType = metadata["modelType"]
|
||||||
slot.embChannels = metadata["embChannels"]
|
slot.embChannels = metadata["embChannels"]
|
||||||
|
|
||||||
if "embedder" not in metadata:
|
if "embedder" not in metadata:
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
from Exceptions import NoModeLoadedException
|
||||||
from voice_changer.RVC.ModelSlot import ModelSlot
|
from voice_changer.RVC.ModelSlot import ModelSlot
|
||||||
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
|
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
|
||||||
|
|
||||||
@ -42,7 +43,7 @@ import torch
|
|||||||
import traceback
|
import traceback
|
||||||
import faiss
|
import faiss
|
||||||
|
|
||||||
from const import TMP_DIR, UPLOAD_DIR, EnumEmbedderTypes
|
from const import UPLOAD_DIR, EnumEmbedderTypes
|
||||||
|
|
||||||
|
|
||||||
from voice_changer.RVC.custom_vc_infer_pipeline import VC
|
from voice_changer.RVC.custom_vc_infer_pipeline import VC
|
||||||
@ -89,9 +90,6 @@ class RVC:
|
|||||||
f"[Voice Changer] RVC new model is uploaded,{target_slot_idx}",
|
f"[Voice Changer] RVC new model is uploaded,{target_slot_idx}",
|
||||||
asdict(modelSlot),
|
asdict(modelSlot),
|
||||||
)
|
)
|
||||||
"""
|
|
||||||
[Voice Changer] RVC new model is uploaded,0 {'pyTorchModelFile': 'upload_dir/0/kurage.pth', 'onnxModelFile': None, 'featureFile': None, 'indexFile': None, 'defaultTrans': 16, 'isONNX': False, 'modelType': <EnumInferenceTypes.pyTorchWebUI: 'pyTorchWebUI'>, 'samplingRate': 48000, 'f0': True, 'embChannels': 768, 'deprecated': False, 'embedder': 'hubert-base-japanese'}
|
|
||||||
"""
|
|
||||||
|
|
||||||
# 初回のみロード
|
# 初回のみロード
|
||||||
if self.initialLoad:
|
if self.initialLoad:
|
||||||
@ -139,7 +137,6 @@ class RVC:
|
|||||||
|
|
||||||
# Embedder 生成
|
# Embedder 生成
|
||||||
try:
|
try:
|
||||||
print("AFASFDAFDAFDASDFASDFSADFASDFA", half, self.settings.gpu)
|
|
||||||
embedder = EmbedderManager.getEmbedder(
|
embedder = EmbedderManager.getEmbedder(
|
||||||
modelSlot.embedder,
|
modelSlot.embedder,
|
||||||
emmbedderFilename,
|
emmbedderFilename,
|
||||||
@ -179,8 +176,14 @@ class RVC:
|
|||||||
def prepareModel(self, slot: int):
|
def prepareModel(self, slot: int):
|
||||||
if slot < 0:
|
if slot < 0:
|
||||||
return self.get_info()
|
return self.get_info()
|
||||||
print("[Voice Changer] Prepare Model of slot:", slot)
|
|
||||||
modelSlot = self.settings.modelSlots[slot]
|
modelSlot = self.settings.modelSlots[slot]
|
||||||
|
inferencerFilename = (
|
||||||
|
modelSlot.onnxModelFile if modelSlot.isONNX else modelSlot.pyTorchModelFile
|
||||||
|
)
|
||||||
|
if inferencerFilename == "":
|
||||||
|
return self.get_info()
|
||||||
|
|
||||||
|
print("[Voice Changer] Prepare Model of slot:", slot)
|
||||||
|
|
||||||
# Inferencer, embedderのロード
|
# Inferencer, embedderのロード
|
||||||
inferencer, embedder = self.createPipeline(modelSlot)
|
inferencer, embedder = self.createPipeline(modelSlot)
|
||||||
@ -240,13 +243,13 @@ class RVC:
|
|||||||
and self.embedder.isHalf == half
|
and self.embedder.isHalf == half
|
||||||
):
|
):
|
||||||
print(
|
print(
|
||||||
"NOT NEED CHAGE TO NEW PIPELINE!!!!!!!!!!!!!!!!!!!!!!!!!!!",
|
"USE EXSISTING PIPELINE",
|
||||||
half,
|
half,
|
||||||
)
|
)
|
||||||
self.embedder.setDevice(dev)
|
self.embedder.setDevice(dev)
|
||||||
self.inferencer.setDevice(dev)
|
self.inferencer.setDevice(dev)
|
||||||
else:
|
else:
|
||||||
print("CHAGE TO NEW PIPELINE!!!!!!!!!!!!!!!!!!!!!!!!!!!", half)
|
print("CHAGE TO NEW PIPELINE", half)
|
||||||
self.prepareModel(self.settings.modelSlotIndex)
|
self.prepareModel(self.settings.modelSlotIndex)
|
||||||
elif key in self.settings.floatData:
|
elif key in self.settings.floatData:
|
||||||
setattr(self.settings, key, float(val))
|
setattr(self.settings, key, float(val))
|
||||||
@ -307,7 +310,9 @@ class RVC:
|
|||||||
)
|
)
|
||||||
raise NoModeLoadedException("model_common")
|
raise NoModeLoadedException("model_common")
|
||||||
if self.needSwitch:
|
if self.needSwitch:
|
||||||
print(f"Switch model {self.currentSlot} -> {self.settings.modelSlotIndex}")
|
print(
|
||||||
|
f"[Voice Changer] Switch model {self.currentSlot} -> {self.settings.modelSlotIndex}"
|
||||||
|
)
|
||||||
self.currentSlot = self.settings.modelSlotIndex
|
self.currentSlot = self.settings.modelSlotIndex
|
||||||
self.switchModel()
|
self.switchModel()
|
||||||
self.needSwitch = False
|
self.needSwitch = False
|
||||||
@ -363,8 +368,8 @@ class RVC:
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
del self.net_g
|
del self.inferencer
|
||||||
del self.onnx_session
|
del self.embedder
|
||||||
|
|
||||||
print("---------- REMOVING ---------------")
|
print("---------- REMOVING ---------------")
|
||||||
|
|
||||||
@ -383,57 +388,17 @@ class RVC:
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
def export2onnx(self):
|
def export2onnx(self):
|
||||||
if hasattr(self, "net_g") is False or self.net_g is None:
|
modelSlot = self.settings.modelSlots[self.settings.modelSlotIndex]
|
||||||
print("[Voice Changer] export2onnx, No pyTorch session.")
|
pyTorchModelFile = modelSlot.pyTorchModelFile
|
||||||
return {"status": "ng", "path": ""}
|
|
||||||
|
|
||||||
pyTorchModelFile = self.settings.modelSlots[
|
# PyTorchのファイルが存在しない場合はエラーを返す
|
||||||
self.settings.modelSlotIndex
|
if pyTorchModelFile is None or pyTorchModelFile == "":
|
||||||
].pyTorchModelFile # inference前にexportできるようにcurrentSlotではなくslot
|
|
||||||
|
|
||||||
if pyTorchModelFile is None:
|
|
||||||
print("[Voice Changer] export2onnx, No pyTorch filepath.")
|
print("[Voice Changer] export2onnx, No pyTorch filepath.")
|
||||||
return {"status": "ng", "path": ""}
|
return {"status": "ng", "path": ""}
|
||||||
|
|
||||||
import voice_changer.RVC.export2onnx as onnxExporter
|
import voice_changer.RVC.export2onnx as onnxExporter
|
||||||
|
|
||||||
output_file = os.path.splitext(os.path.basename(pyTorchModelFile))[0] + ".onnx"
|
output_file_simple = onnxExporter.export2onnx(modelSlot)
|
||||||
output_file_simple = (
|
|
||||||
os.path.splitext(os.path.basename(pyTorchModelFile))[0] + "_simple.onnx"
|
|
||||||
)
|
|
||||||
output_path = os.path.join(TMP_DIR, output_file)
|
|
||||||
output_path_simple = os.path.join(TMP_DIR, output_file_simple)
|
|
||||||
print(
|
|
||||||
"embChannels",
|
|
||||||
self.settings.modelSlots[self.settings.modelSlotIndex].embChannels,
|
|
||||||
)
|
|
||||||
metadata = {
|
|
||||||
"application": "VC_CLIENT",
|
|
||||||
"version": "1",
|
|
||||||
"modelType": self.settings.modelSlots[
|
|
||||||
self.settings.modelSlotIndex
|
|
||||||
].modelType,
|
|
||||||
"samplingRate": self.settings.modelSlots[
|
|
||||||
self.settings.modelSlotIndex
|
|
||||||
].samplingRate,
|
|
||||||
"f0": self.settings.modelSlots[self.settings.modelSlotIndex].f0,
|
|
||||||
"embChannels": self.settings.modelSlots[
|
|
||||||
self.settings.modelSlotIndex
|
|
||||||
].embChannels,
|
|
||||||
"embedder": self.settings.modelSlots[self.settings.modelSlotIndex].embedder,
|
|
||||||
}
|
|
||||||
|
|
||||||
if torch.cuda.device_count() > 0:
|
|
||||||
onnxExporter.export2onnx(
|
|
||||||
pyTorchModelFile, output_path, output_path_simple, True, metadata
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
print(
|
|
||||||
"[Voice Changer] Warning!!! onnx export with float32. maybe size is doubled."
|
|
||||||
)
|
|
||||||
onnxExporter.export2onnx(
|
|
||||||
pyTorchModelFile, output_path, output_path_simple, False, metadata
|
|
||||||
)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"status": "ok",
|
"status": "ok",
|
||||||
"path": f"/tmp/{output_file_simple}",
|
"path": f"/tmp/{output_file_simple}",
|
||||||
|
@ -55,7 +55,7 @@ class VC(object):
|
|||||||
pitchf, device=self.device, dtype=torch.float
|
pitchf, device=self.device, dtype=torch.float
|
||||||
).unsqueeze(0)
|
).unsqueeze(0)
|
||||||
|
|
||||||
# tensor
|
# tensor型調整
|
||||||
feats = torch.from_numpy(audio_pad)
|
feats = torch.from_numpy(audio_pad)
|
||||||
if self.is_half is True:
|
if self.is_half is True:
|
||||||
feats = feats.half()
|
feats = feats.half()
|
||||||
@ -94,11 +94,9 @@ class VC(object):
|
|||||||
torch.from_numpy(npy).unsqueeze(0).to(self.device) * index_rate
|
torch.from_numpy(npy).unsqueeze(0).to(self.device) * index_rate
|
||||||
+ (1 - index_rate) * feats
|
+ (1 - index_rate) * feats
|
||||||
)
|
)
|
||||||
|
|
||||||
#
|
|
||||||
feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
|
feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
|
||||||
|
|
||||||
# ピッチ抽出
|
# ピッチサイズ調整
|
||||||
p_len = audio_pad.shape[0] // self.window
|
p_len = audio_pad.shape[0] // self.window
|
||||||
if feats.shape[1] < p_len:
|
if feats.shape[1] < p_len:
|
||||||
p_len = feats.shape[1]
|
p_len = feats.shape[1]
|
||||||
@ -108,47 +106,23 @@ class VC(object):
|
|||||||
p_len = torch.tensor([p_len], device=self.device).long()
|
p_len = torch.tensor([p_len], device=self.device).long()
|
||||||
|
|
||||||
# 推論実行
|
# 推論実行
|
||||||
with torch.no_grad():
|
try:
|
||||||
audio1 = (
|
with torch.no_grad():
|
||||||
(inferencer.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0] * 32768)
|
audio1 = (
|
||||||
.data.cpu()
|
(
|
||||||
.float()
|
inferencer.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0]
|
||||||
.numpy()
|
* 32768
|
||||||
.astype(np.int16)
|
)
|
||||||
)
|
.data.cpu()
|
||||||
|
.float()
|
||||||
# if pitch is not None:
|
.numpy()
|
||||||
# print("INFERENCE 1 ")
|
.astype(np.int16)
|
||||||
# audio1 = (
|
)
|
||||||
# (
|
except RuntimeError as e:
|
||||||
# inferencer.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0]
|
if "HALF" in e.__str__().upper():
|
||||||
# * 32768
|
raise HalfPrecisionChangingException()
|
||||||
# )
|
else:
|
||||||
# .data.cpu()
|
raise e
|
||||||
# .float()
|
|
||||||
# .numpy()
|
|
||||||
# .astype(np.int16)
|
|
||||||
# )
|
|
||||||
# else:
|
|
||||||
# if hasattr(inferencer, "infer_pitchless"):
|
|
||||||
# print("INFERENCE 2 ")
|
|
||||||
|
|
||||||
# audio1 = (
|
|
||||||
# (inferencer.infer_pitchless(feats, p_len, sid)[0][0, 0] * 32768)
|
|
||||||
# .data.cpu()
|
|
||||||
# .float()
|
|
||||||
# .numpy()
|
|
||||||
# .astype(np.int16)
|
|
||||||
# )
|
|
||||||
# else:
|
|
||||||
# print("INFERENCE 3 ")
|
|
||||||
# audio1 = (
|
|
||||||
# (inferencer.infer(feats, p_len, sid)[0][0, 0] * 32768)
|
|
||||||
# .data.cpu()
|
|
||||||
# .float()
|
|
||||||
# .numpy()
|
|
||||||
# .astype(np.int16)
|
|
||||||
# )
|
|
||||||
|
|
||||||
del feats, p_len, padding_mask
|
del feats, p_len, padding_mask
|
||||||
torch.cuda.empty_cache()
|
torch.cuda.empty_cache()
|
||||||
|
@ -1,7 +1,10 @@
|
|||||||
|
import os
|
||||||
import json
|
import json
|
||||||
import torch
|
import torch
|
||||||
from onnxsim import simplify
|
from onnxsim import simplify
|
||||||
import onnx
|
import onnx
|
||||||
|
from const import TMP_DIR, EnumInferenceTypes
|
||||||
|
from voice_changer.RVC.ModelSlot import ModelSlot
|
||||||
|
|
||||||
from voice_changer.RVC.onnx.SynthesizerTrnMs256NSFsid_ONNX import (
|
from voice_changer.RVC.onnx.SynthesizerTrnMs256NSFsid_ONNX import (
|
||||||
SynthesizerTrnMs256NSFsid_ONNX,
|
SynthesizerTrnMs256NSFsid_ONNX,
|
||||||
@ -15,24 +18,60 @@ from voice_changer.RVC.onnx.SynthesizerTrnMsNSFsidNono_webui_ONNX import (
|
|||||||
from voice_changer.RVC.onnx.SynthesizerTrnMsNSFsid_webui_ONNX import (
|
from voice_changer.RVC.onnx.SynthesizerTrnMsNSFsid_webui_ONNX import (
|
||||||
SynthesizerTrnMsNSFsid_webui_ONNX,
|
SynthesizerTrnMsNSFsid_webui_ONNX,
|
||||||
)
|
)
|
||||||
from .const import RVC_MODEL_TYPE_RVC, RVC_MODEL_TYPE_WEBUI
|
|
||||||
|
|
||||||
|
|
||||||
def export2onnx(input_model, output_model, output_model_simple, is_half, metadata):
|
def export2onnx(modelSlot: ModelSlot):
|
||||||
|
pyTorchModelFile = modelSlot.pyTorchModelFile
|
||||||
|
|
||||||
|
output_file = os.path.splitext(os.path.basename(pyTorchModelFile))[0] + ".onnx"
|
||||||
|
output_file_simple = (
|
||||||
|
os.path.splitext(os.path.basename(pyTorchModelFile))[0] + "_simple.onnx"
|
||||||
|
)
|
||||||
|
output_path = os.path.join(TMP_DIR, output_file)
|
||||||
|
output_path_simple = os.path.join(TMP_DIR, output_file_simple)
|
||||||
|
metadata = {
|
||||||
|
"application": "VC_CLIENT",
|
||||||
|
"version": "2",
|
||||||
|
# ↓EnumInferenceTypesのままだとシリアライズできないのでテキスト化
|
||||||
|
"modelType": modelSlot.modelType.value,
|
||||||
|
"samplingRate": modelSlot.samplingRate,
|
||||||
|
"f0": modelSlot.f0,
|
||||||
|
"embChannels": modelSlot.embChannels,
|
||||||
|
"embedder": modelSlot.embedder.value,
|
||||||
|
}
|
||||||
|
|
||||||
|
if torch.cuda.device_count() > 0:
|
||||||
|
_export2onnx(pyTorchModelFile, output_path, output_path_simple, True, metadata)
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
"[Voice Changer] Warning!!! onnx export with float32. maybe size is doubled."
|
||||||
|
)
|
||||||
|
_export2onnx(pyTorchModelFile, output_path, output_path_simple, False, metadata)
|
||||||
|
return output_file_simple
|
||||||
|
|
||||||
|
|
||||||
|
def _export2onnx(input_model, output_model, output_model_simple, is_half, metadata):
|
||||||
cpt = torch.load(input_model, map_location="cpu")
|
cpt = torch.load(input_model, map_location="cpu")
|
||||||
if is_half:
|
if is_half:
|
||||||
dev = torch.device("cuda", index=0)
|
dev = torch.device("cuda", index=0)
|
||||||
else:
|
else:
|
||||||
dev = torch.device("cpu")
|
dev = torch.device("cpu")
|
||||||
|
|
||||||
if metadata["f0"] is True and metadata["modelType"] == RVC_MODEL_TYPE_RVC:
|
# EnumInferenceTypesのままだとシリアライズできないのでテキスト化
|
||||||
|
if metadata["modelType"] == EnumInferenceTypes.pyTorchRVC.value:
|
||||||
net_g_onnx = SynthesizerTrnMs256NSFsid_ONNX(*cpt["config"], is_half=is_half)
|
net_g_onnx = SynthesizerTrnMs256NSFsid_ONNX(*cpt["config"], is_half=is_half)
|
||||||
elif metadata["f0"] is True and metadata["modelType"] == RVC_MODEL_TYPE_WEBUI:
|
elif metadata["modelType"] == EnumInferenceTypes.pyTorchWebUI.value:
|
||||||
net_g_onnx = SynthesizerTrnMsNSFsid_webui_ONNX(**cpt["params"], is_half=is_half)
|
net_g_onnx = SynthesizerTrnMsNSFsid_webui_ONNX(**cpt["params"], is_half=is_half)
|
||||||
elif metadata["f0"] is False and metadata["modelType"] == RVC_MODEL_TYPE_RVC:
|
elif metadata["modelType"] == EnumInferenceTypes.pyTorchRVCNono.value:
|
||||||
net_g_onnx = SynthesizerTrnMs256NSFsid_nono_ONNX(*cpt["config"])
|
net_g_onnx = SynthesizerTrnMs256NSFsid_nono_ONNX(*cpt["config"])
|
||||||
elif metadata["f0"] is False and metadata["modelType"] == RVC_MODEL_TYPE_WEBUI:
|
elif metadata["modelType"] == EnumInferenceTypes.pyTorchWebUINono.value:
|
||||||
net_g_onnx = SynthesizerTrnMsNSFsidNono_webui_ONNX(**cpt["params"])
|
net_g_onnx = SynthesizerTrnMsNSFsidNono_webui_ONNX(**cpt["params"])
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
"unknwon::::: ",
|
||||||
|
metadata["modelType"],
|
||||||
|
EnumInferenceTypes.pyTorchWebUI.value,
|
||||||
|
)
|
||||||
|
|
||||||
net_g_onnx.eval().to(dev)
|
net_g_onnx.eval().to(dev)
|
||||||
net_g_onnx.load_state_dict(cpt["weight"], strict=False)
|
net_g_onnx.load_state_dict(cpt["weight"], strict=False)
|
||||||
|
@ -24,6 +24,7 @@ class OnnxRVCInferencer(Inferencer):
|
|||||||
self.isHalf = True
|
self.isHalf = True
|
||||||
|
|
||||||
self.model = onnx_session
|
self.model = onnx_session
|
||||||
|
self.setDevice(dev)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def infer(
|
def infer(
|
||||||
@ -37,9 +38,9 @@ class OnnxRVCInferencer(Inferencer):
|
|||||||
if pitch is None or pitchf is None:
|
if pitch is None or pitchf is None:
|
||||||
raise RuntimeError("[Voice Changer] Pitch or Pitchf is not found.")
|
raise RuntimeError("[Voice Changer] Pitch or Pitchf is not found.")
|
||||||
|
|
||||||
print("INFER1", self.model.get_providers())
|
# print("INFER1", self.model.get_providers())
|
||||||
print("INFER2", self.model.get_provider_options())
|
# print("INFER2", self.model.get_provider_options())
|
||||||
print("INFER3", self.model.get_session_options())
|
# print("INFER3", self.model.get_session_options())
|
||||||
if self.isHalf:
|
if self.isHalf:
|
||||||
audio1 = self.model.run(
|
audio1 = self.model.run(
|
||||||
["audio"],
|
["audio"],
|
||||||
|
@ -11,6 +11,7 @@ from infer_pack.models import ( # type:ignore
|
|||||||
class RVCInferencer(Inferencer):
|
class RVCInferencer(Inferencer):
|
||||||
def loadModel(self, file: str, dev: device, isHalf: bool = True):
|
def loadModel(self, file: str, dev: device, isHalf: bool = True):
|
||||||
super().setProps(EnumInferenceTypes.pyTorchRVC, file, dev, isHalf)
|
super().setProps(EnumInferenceTypes.pyTorchRVC, file, dev, isHalf)
|
||||||
|
print("load inf", file)
|
||||||
cpt = torch.load(file, map_location="cpu")
|
cpt = torch.load(file, map_location="cpu")
|
||||||
model = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=isHalf)
|
model = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=isHalf)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user