mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-01-23 21:45:00 +03:00
Beatrice
This commit is contained in:
parent
5b43daa705
commit
50f963ff6b
3
.gitignore
vendored
3
.gitignore
vendored
@ -61,3 +61,6 @@ start_trainer.sh
|
|||||||
|
|
||||||
# venv
|
# venv
|
||||||
venv/
|
venv/
|
||||||
|
|
||||||
|
|
||||||
|
beatrice_internal_api.cp310-win_amd64.pyd
|
11
client/demo/dist/index.html
vendored
11
client/demo/dist/index.html
vendored
@ -1 +1,10 @@
|
|||||||
<!doctype html><html style="width:100%;height:100%;overflow:hidden"><head><meta charset="utf-8"/><title>Voice Changer Client Demo</title><script defer="defer" src="index.js"></script></head><body style="width:100%;height:100%;margin:0"><div id="app" style="width:100%;height:100%"></div></body></html>
|
<!DOCTYPE html>
|
||||||
|
<html style="width: 100%; height: 100%; overflow: hidden">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<title>Voice Changer Client Demo</title>
|
||||||
|
<script defer src="index.js"></script></head>
|
||||||
|
<body style="width: 100%; height: 100%; margin: 0px">
|
||||||
|
<div id="app" style="width: 100%; height: 100%"></div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
1244
client/demo/dist/index.js
vendored
1244
client/demo/dist/index.js
vendored
File diff suppressed because one or more lines are too long
3043
client/demo/dist/index.js.LICENSE.txt
vendored
3043
client/demo/dist/index.js.LICENSE.txt
vendored
File diff suppressed because it is too large
Load Diff
@ -111,6 +111,11 @@ export const FileUploaderScreen = (props: FileUploaderScreenProps) => {
|
|||||||
return x.kind == "diffusionSVCModel";
|
return x.kind == "diffusionSVCModel";
|
||||||
});
|
});
|
||||||
return enough;
|
return enough;
|
||||||
|
} else if (setting.voiceChangerType == "Beatrice") {
|
||||||
|
const enough = !!setting.files.find((x) => {
|
||||||
|
return x.kind == "beatriceModel";
|
||||||
|
});
|
||||||
|
return enough;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
@ -170,6 +175,8 @@ export const FileUploaderScreen = (props: FileUploaderScreenProps) => {
|
|||||||
rows.push(generateFileRow(uploadSetting!, "Model(diff)", "ddspSvcDiffusion", ["pth", "pt"], "diff/"));
|
rows.push(generateFileRow(uploadSetting!, "Model(diff)", "ddspSvcDiffusion", ["pth", "pt"], "diff/"));
|
||||||
} else if (vcType == "Diffusion-SVC") {
|
} else if (vcType == "Diffusion-SVC") {
|
||||||
rows.push(generateFileRow(uploadSetting!, "Model(combo)", "diffusionSVCModel", ["ptc"]));
|
rows.push(generateFileRow(uploadSetting!, "Model(combo)", "diffusionSVCModel", ["ptc"]));
|
||||||
|
} else if (vcType == "Beatrice") {
|
||||||
|
rows.push(generateFileRow(uploadSetting!, "Beatrice", "beatriceModel", ["bin"]));
|
||||||
}
|
}
|
||||||
return rows;
|
return rows;
|
||||||
};
|
};
|
||||||
|
@ -10,7 +10,8 @@ export const VoiceChangerType = {
|
|||||||
"so-vits-svc-40": "so-vits-svc-40",
|
"so-vits-svc-40": "so-vits-svc-40",
|
||||||
"DDSP-SVC": "DDSP-SVC",
|
"DDSP-SVC": "DDSP-SVC",
|
||||||
"RVC": "RVC",
|
"RVC": "RVC",
|
||||||
"Diffusion-SVC":"Diffusion-SVC"
|
"Diffusion-SVC":"Diffusion-SVC",
|
||||||
|
"Beatrice": "Beatrice"
|
||||||
|
|
||||||
} as const
|
} as const
|
||||||
export type VoiceChangerType = typeof VoiceChangerType[keyof typeof VoiceChangerType]
|
export type VoiceChangerType = typeof VoiceChangerType[keyof typeof VoiceChangerType]
|
||||||
@ -287,7 +288,15 @@ export type DiffusionSVCModelSlot = ModelSlot & {
|
|||||||
speakers: { [key: number]: string }
|
speakers: { [key: number]: string }
|
||||||
}
|
}
|
||||||
|
|
||||||
export type ModelSlotUnion = RVCModelSlot | MMVCv13ModelSlot | MMVCv15ModelSlot | SoVitsSvc40ModelSlot | DDSPSVCModelSlot | DiffusionSVCModelSlot
|
|
||||||
|
export type BeatriceModelSlot = ModelSlot & {
|
||||||
|
modelFile: string
|
||||||
|
dstId: number
|
||||||
|
|
||||||
|
speakers: { [key: number]: string }
|
||||||
|
}
|
||||||
|
|
||||||
|
export type ModelSlotUnion = RVCModelSlot | MMVCv13ModelSlot | MMVCv15ModelSlot | SoVitsSvc40ModelSlot | DDSPSVCModelSlot | DiffusionSVCModelSlot | BeatriceModelSlot
|
||||||
|
|
||||||
type ServerAudioDevice = {
|
type ServerAudioDevice = {
|
||||||
kind: "audioinput" | "audiooutput",
|
kind: "audioinput" | "audiooutput",
|
||||||
|
@ -29,6 +29,8 @@ export const ModelFileKind = {
|
|||||||
|
|
||||||
"diffusionSVCModel": "diffusionSVCModel",
|
"diffusionSVCModel": "diffusionSVCModel",
|
||||||
|
|
||||||
|
"beatriceModel": "beatriceModel",
|
||||||
|
|
||||||
} as const
|
} as const
|
||||||
export type ModelFileKind = typeof ModelFileKind[keyof typeof ModelFileKind]
|
export type ModelFileKind = typeof ModelFileKind[keyof typeof ModelFileKind]
|
||||||
|
|
||||||
|
@ -11,7 +11,8 @@ VoiceChangerType: TypeAlias = Literal[
|
|||||||
"so-vits-svc-40",
|
"so-vits-svc-40",
|
||||||
"DDSP-SVC",
|
"DDSP-SVC",
|
||||||
"RVC",
|
"RVC",
|
||||||
"Diffusion-SVC"
|
"Diffusion-SVC",
|
||||||
|
"Beatrice",
|
||||||
]
|
]
|
||||||
|
|
||||||
STORED_SETTING_FILE = "stored_setting.json"
|
STORED_SETTING_FILE = "stored_setting.json"
|
||||||
|
@ -124,7 +124,15 @@ class DiffusionSVCModelSlot(ModelSlot):
|
|||||||
embChannels: int = 768
|
embChannels: int = 768
|
||||||
|
|
||||||
|
|
||||||
ModelSlots: TypeAlias = Union[ModelSlot, RVCModelSlot, MMVCv13ModelSlot, MMVCv15ModelSlot, SoVitsSvc40ModelSlot, DDSPSVCModelSlot, DiffusionSVCModelSlot]
|
@dataclass
|
||||||
|
class BeatriceModelSlot(ModelSlot):
|
||||||
|
voiceChangerType: VoiceChangerType = "Beatrice"
|
||||||
|
modelFile: str = ""
|
||||||
|
dstId: int = 1
|
||||||
|
speakers: dict = field(default_factory=lambda: {1: "user1", 2: "user2"})
|
||||||
|
|
||||||
|
|
||||||
|
ModelSlots: TypeAlias = Union[ModelSlot, RVCModelSlot, MMVCv13ModelSlot, MMVCv15ModelSlot, SoVitsSvc40ModelSlot, DDSPSVCModelSlot, DiffusionSVCModelSlot, BeatriceModelSlot]
|
||||||
|
|
||||||
|
|
||||||
def loadSlotInfo(model_dir: str, slotIndex: int) -> ModelSlots:
|
def loadSlotInfo(model_dir: str, slotIndex: int) -> ModelSlots:
|
||||||
@ -153,6 +161,9 @@ def loadSlotInfo(model_dir: str, slotIndex: int) -> ModelSlots:
|
|||||||
elif slotInfo.voiceChangerType == "Diffusion-SVC":
|
elif slotInfo.voiceChangerType == "Diffusion-SVC":
|
||||||
slotInfoKey.extend(list(DiffusionSVCModelSlot.__annotations__.keys()))
|
slotInfoKey.extend(list(DiffusionSVCModelSlot.__annotations__.keys()))
|
||||||
return DiffusionSVCModelSlot(**{k: v for k, v in jsonDict.items() if k in slotInfoKey})
|
return DiffusionSVCModelSlot(**{k: v for k, v in jsonDict.items() if k in slotInfoKey})
|
||||||
|
elif slotInfo.voiceChangerType == "Beatrice":
|
||||||
|
slotInfoKey.extend(list(BeatriceModelSlot.__annotations__.keys()))
|
||||||
|
return BeatriceModelSlot(**{k: v for k, v in jsonDict.items() if k in slotInfoKey})
|
||||||
else:
|
else:
|
||||||
return ModelSlot()
|
return ModelSlot()
|
||||||
|
|
||||||
@ -168,6 +179,7 @@ def loadAllSlotInfo(model_dir: str):
|
|||||||
|
|
||||||
def saveSlotInfo(model_dir: str, slotIndex: int, slotInfo: ModelSlots):
|
def saveSlotInfo(model_dir: str, slotIndex: int, slotInfo: ModelSlots):
|
||||||
slotDir = os.path.join(model_dir, str(slotIndex))
|
slotDir = os.path.join(model_dir, str(slotIndex))
|
||||||
|
print("SlotInfo:::", slotInfo)
|
||||||
slotInfoDict = asdict(slotInfo)
|
slotInfoDict = asdict(slotInfo)
|
||||||
slotInfo.slotIndex = -1 # スロットインデックスは動的に注入
|
slotInfo.slotIndex = -1 # スロットインデックスは動的に注入
|
||||||
json.dump(slotInfoDict, open(os.path.join(slotDir, "params.json"), "w"), indent=4)
|
json.dump(slotInfoDict, open(os.path.join(slotDir, "params.json"), "w"), indent=4)
|
||||||
|
59
server/voice_changer/Beatrice/Beatrice.py
Normal file
59
server/voice_changer/Beatrice/Beatrice.py
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
from typing import Union
|
||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
from data.ModelSlot import BeatriceModelSlot
|
||||||
|
from mods.log_control import VoiceChangaerLogger
|
||||||
|
|
||||||
|
from voice_changer.utils.VoiceChangerModel import AudioInOut, VoiceChangerModel
|
||||||
|
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
||||||
|
|
||||||
|
from beatrice_internal_api import BeatriceInternalAPI
|
||||||
|
|
||||||
|
logger = VoiceChangaerLogger.get_instance().getLogger()
|
||||||
|
|
||||||
|
|
||||||
|
class BeatriceAPI(BeatriceInternalAPI):
|
||||||
|
def __init__(self, sample_rate: float = 48000.0):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class Beatrice(VoiceChangerModel):
|
||||||
|
def __init__(self, params: VoiceChangerParams, slotInfo: BeatriceModelSlot):
|
||||||
|
raise RuntimeError("not implemented")
|
||||||
|
|
||||||
|
def initialize(self):
|
||||||
|
raise RuntimeError("not implemented")
|
||||||
|
|
||||||
|
def setSamplingRate(self, inputSampleRate, outputSampleRate):
|
||||||
|
raise RuntimeError("not implemented")
|
||||||
|
|
||||||
|
def update_settings(self, key: str, val: int | float | str):
|
||||||
|
raise RuntimeError("not implemented")
|
||||||
|
|
||||||
|
def get_info(self):
|
||||||
|
raise RuntimeError("not implemented")
|
||||||
|
|
||||||
|
def get_processing_sampling_rate(self):
|
||||||
|
raise RuntimeError("not implemented")
|
||||||
|
|
||||||
|
def generate_input(
|
||||||
|
self,
|
||||||
|
newData: AudioInOut,
|
||||||
|
crossfadeSize: int,
|
||||||
|
solaSearchFrame: int = 0,
|
||||||
|
):
|
||||||
|
raise RuntimeError("not implemented")
|
||||||
|
|
||||||
|
def inference(self, receivedData: AudioInOut, crossfade_frame: int, sola_search_frame: int):
|
||||||
|
raise RuntimeError("not implemented")
|
||||||
|
|
||||||
|
def __del__(self):
|
||||||
|
del self.pipeline
|
||||||
|
|
||||||
|
def get_model_current(self):
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"key": "dstId",
|
||||||
|
"val": self.settings.dstId,
|
||||||
|
},
|
||||||
|
]
|
17
server/voice_changer/Beatrice/BeatriceModelSlotGenerator.py
Normal file
17
server/voice_changer/Beatrice/BeatriceModelSlotGenerator.py
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
from data.ModelSlot import BeatriceModelSlot
|
||||||
|
from voice_changer.utils.LoadModelParams import LoadModelParams
|
||||||
|
from voice_changer.utils.ModelSlotGenerator import ModelSlotGenerator
|
||||||
|
|
||||||
|
|
||||||
|
class BeatriceModelSlotGenerator(ModelSlotGenerator):
|
||||||
|
@classmethod
|
||||||
|
def loadModel(cls, props: LoadModelParams):
|
||||||
|
slotInfo: BeatriceModelSlot = BeatriceModelSlot()
|
||||||
|
for file in props.files:
|
||||||
|
if file.kind == "beatriceModel":
|
||||||
|
slotInfo.modelFile = file.name
|
||||||
|
slotInfo.name = os.path.splitext(os.path.basename(slotInfo.modelFile))[0]
|
||||||
|
slotInfo.slotIndex = props.slot
|
||||||
|
return slotInfo
|
16
server/voice_changer/Beatrice/BeatriceSettings.py
Normal file
16
server/voice_changer/Beatrice/BeatriceSettings.py
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class BeatriceSettings:
|
||||||
|
# gpu: int = -9999
|
||||||
|
dstId: int = 0
|
||||||
|
modelSamplingRate: int = 48000
|
||||||
|
silentThreshold: float = 0.00001
|
||||||
|
speakers: dict[str, int] = field(default_factory=lambda: {})
|
||||||
|
intData = [
|
||||||
|
# "gpu",
|
||||||
|
"dstId",
|
||||||
|
]
|
||||||
|
floatData = ["silentThreshold"]
|
||||||
|
strData = []
|
@ -1,8 +1,5 @@
|
|||||||
import os
|
import os
|
||||||
from dataclasses import asdict
|
from data.ModelSlot import DiffusionSVCModelSlot, ModelSlot
|
||||||
|
|
||||||
|
|
||||||
from data.ModelSlot import DiffusionSVCModelSlot, ModelSlot, RVCModelSlot
|
|
||||||
from voice_changer.DiffusionSVC.inferencer.diffusion_svc_model.diffusion.unit2mel import load_model_vocoder_from_combo
|
from voice_changer.DiffusionSVC.inferencer.diffusion_svc_model.diffusion.unit2mel import load_model_vocoder_from_combo
|
||||||
from voice_changer.VoiceChangerParamsManager import VoiceChangerParamsManager
|
from voice_changer.VoiceChangerParamsManager import VoiceChangerParamsManager
|
||||||
from voice_changer.utils.LoadModelParams import LoadModelParams
|
from voice_changer.utils.LoadModelParams import LoadModelParams
|
||||||
|
@ -63,7 +63,7 @@ class RVCModelSlotGenerator(ModelSlotGenerator):
|
|||||||
# elif slot.embedder == "hubert_jp":
|
# elif slot.embedder == "hubert_jp":
|
||||||
# slot.embedder = "hubert_jp"
|
# slot.embedder = "hubert_jp"
|
||||||
else:
|
else:
|
||||||
raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder")
|
raise RuntimeError("[Voice Changer][setInfoByPytorch] unknown embedder")
|
||||||
|
|
||||||
elif config_len == 18:
|
elif config_len == 18:
|
||||||
# Original RVC
|
# Original RVC
|
||||||
|
@ -185,6 +185,11 @@ class VoiceChangerManager(ServerDeviceCallbacks):
|
|||||||
|
|
||||||
slotInfo = DiffusionSVCModelSlotGenerator.loadModel(params)
|
slotInfo = DiffusionSVCModelSlotGenerator.loadModel(params)
|
||||||
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
|
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
|
||||||
|
elif params.voiceChangerType == "Beatrice":
|
||||||
|
from voice_changer.Beatrice.BeatriceModelSlotGenerator import BeatriceModelSlotGenerator
|
||||||
|
|
||||||
|
slotInfo = BeatriceModelSlotGenerator.loadModel(params)
|
||||||
|
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
|
||||||
logger.info(f"params, {params}")
|
logger.info(f"params, {params}")
|
||||||
|
|
||||||
def get_info(self):
|
def get_info(self):
|
||||||
@ -267,6 +272,13 @@ class VoiceChangerManager(ServerDeviceCallbacks):
|
|||||||
self.voiceChangerModel = DiffusionSVC(self.params, slotInfo)
|
self.voiceChangerModel = DiffusionSVC(self.params, slotInfo)
|
||||||
self.voiceChanger = VoiceChangerV2(self.params)
|
self.voiceChanger = VoiceChangerV2(self.params)
|
||||||
self.voiceChanger.setModel(self.voiceChangerModel)
|
self.voiceChanger.setModel(self.voiceChangerModel)
|
||||||
|
elif slotInfo.voiceChangerType == "Beatrice":
|
||||||
|
logger.info("................Beatrice")
|
||||||
|
from voice_changer.Beatrice.Beatrice import Beatrice
|
||||||
|
|
||||||
|
self.voiceChangerModel = Beatrice(self.params, slotInfo)
|
||||||
|
self.voiceChanger = VoiceChangerV2(self.params)
|
||||||
|
self.voiceChanger.setModel(self.voiceChangerModel)
|
||||||
else:
|
else:
|
||||||
logger.info(f"[Voice Changer] unknown voice changer model: {slotInfo.voiceChangerType}")
|
logger.info(f"[Voice Changer] unknown voice changer model: {slotInfo.voiceChangerType}")
|
||||||
if hasattr(self, "voiceChangerModel"):
|
if hasattr(self, "voiceChangerModel"):
|
||||||
|
@ -18,6 +18,7 @@ import numpy as np
|
|||||||
from dataclasses import dataclass, asdict, field
|
from dataclasses import dataclass, asdict, field
|
||||||
import onnxruntime
|
import onnxruntime
|
||||||
from mods.log_control import VoiceChangaerLogger
|
from mods.log_control import VoiceChangaerLogger
|
||||||
|
from voice_changer.Beatrice.Beatrice import Beatrice
|
||||||
|
|
||||||
from voice_changer.IORecorder import IORecorder
|
from voice_changer.IORecorder import IORecorder
|
||||||
|
|
||||||
@ -89,12 +90,17 @@ class VoiceChangerV2(VoiceChangerIF):
|
|||||||
self.prev_audio = np.zeros(4096)
|
self.prev_audio = np.zeros(4096)
|
||||||
self.mps_enabled: bool = getattr(torch.backends, "mps", None) is not None and torch.backends.mps.is_available()
|
self.mps_enabled: bool = getattr(torch.backends, "mps", None) is not None and torch.backends.mps.is_available()
|
||||||
self.onnx_device = onnxruntime.get_device()
|
self.onnx_device = onnxruntime.get_device()
|
||||||
|
self.noCrossFade = False
|
||||||
|
|
||||||
logger.info(f"VoiceChangerV2 Initialized (GPU_NUM(cuda):{self.gpu_num}, mps_enabled:{self.mps_enabled}, onnx_device:{self.onnx_device})")
|
logger.info(f"VoiceChangerV2 Initialized (GPU_NUM(cuda):{self.gpu_num}, mps_enabled:{self.mps_enabled}, onnx_device:{self.onnx_device})")
|
||||||
|
|
||||||
def setModel(self, model: VoiceChangerModel):
|
def setModel(self, model: VoiceChangerModel):
|
||||||
self.voiceChanger = model
|
self.voiceChanger = model
|
||||||
self.voiceChanger.setSamplingRate(self.settings.inputSampleRate, self.settings.outputSampleRate)
|
self.voiceChanger.setSamplingRate(self.settings.inputSampleRate, self.settings.outputSampleRate)
|
||||||
|
if isinstance(model, Beatrice):
|
||||||
|
self.noCrossFade = True
|
||||||
|
else:
|
||||||
|
self.noCrossFade = False
|
||||||
|
|
||||||
def setInputSampleRate(self, sr: int):
|
def setInputSampleRate(self, sr: int):
|
||||||
self.settings.inputSampleRate = sr
|
self.settings.inputSampleRate = sr
|
||||||
@ -202,8 +208,18 @@ class VoiceChangerV2(VoiceChangerIF):
|
|||||||
raise VoiceChangerIsNotSelectedException("Voice Changer is not selected.")
|
raise VoiceChangerIsNotSelectedException("Voice Changer is not selected.")
|
||||||
|
|
||||||
with Timer("main-process") as t:
|
with Timer("main-process") as t:
|
||||||
|
|
||||||
processing_sampling_rate = self.voiceChanger.get_processing_sampling_rate()
|
processing_sampling_rate = self.voiceChanger.get_processing_sampling_rate()
|
||||||
|
|
||||||
|
if self.noCrossFade: # Beatrice
|
||||||
|
audio = self.voiceChanger.inference(
|
||||||
|
receivedData,
|
||||||
|
crossfade_frame=0,
|
||||||
|
sola_search_frame=0,
|
||||||
|
)
|
||||||
|
# block_frame = receivedData.shape[0]
|
||||||
|
# result = audio[:block_frame]
|
||||||
|
result = audio
|
||||||
|
else:
|
||||||
sola_search_frame = int(0.012 * processing_sampling_rate)
|
sola_search_frame = int(0.012 * processing_sampling_rate)
|
||||||
block_frame = receivedData.shape[0]
|
block_frame = receivedData.shape[0]
|
||||||
crossfade_frame = min(self.settings.crossFadeOverlapSize, block_frame)
|
crossfade_frame = min(self.settings.crossFadeOverlapSize, block_frame)
|
||||||
|
@ -19,6 +19,7 @@ LoadModelParamFileKind: TypeAlias = Literal[
|
|||||||
"ddspSvcDiffusion",
|
"ddspSvcDiffusion",
|
||||||
"ddspSvcDiffusionConfig",
|
"ddspSvcDiffusionConfig",
|
||||||
"diffusionSVCModel",
|
"diffusionSVCModel",
|
||||||
|
"beatriceModel",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user