voice-changer/server/voice_changer/VoiceChangerManager.py

318 lines
13 KiB
Python
Raw Normal View History

2023-06-21 01:23:13 +03:00
import json
2023-06-19 05:40:16 +03:00
import os
import sys
2023-06-19 05:40:16 +03:00
import shutil
2023-06-26 04:03:12 +03:00
import threading
2022-12-31 10:02:53 +03:00
import numpy as np
2023-06-17 09:35:43 +03:00
from downloader.SampleDownloader import downloadSample, getSampleInfos
2023-06-16 09:06:35 +03:00
from voice_changer.Local.ServerDevice import ServerDevice, ServerDeviceCallbacks
2023-06-17 08:16:29 +03:00
from voice_changer.ModelSlotManager import ModelSlotManager
2023-06-23 08:54:39 +03:00
from voice_changer.RVC.RVCModelMerger import RVCModelMerger
2022-12-31 10:08:14 +03:00
from voice_changer.VoiceChanger import VoiceChanger
2023-07-12 18:59:48 +03:00
from const import STORED_SETTING_FILE, UPLOAD_DIR
2023-07-14 22:45:27 +03:00
from voice_changer.VoiceChangerV2 import VoiceChangerV2
2023-06-23 08:54:39 +03:00
from voice_changer.utils.LoadModelParams import LoadModelParamFile, LoadModelParams
from voice_changer.utils.ModelMerger import MergeElement, ModelMergerRequest
2023-04-28 00:39:51 +03:00
from voice_changer.utils.VoiceChangerModel import AudioInOut
2023-04-27 17:38:25 +03:00
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
2023-06-21 01:23:13 +03:00
from dataclasses import dataclass, asdict, field
import torch
2023-06-21 01:23:13 +03:00
# import threading
2023-06-16 10:10:46 +03:00
from typing import Callable
from typing import Any
@dataclass()
class GPUInfo:
id: int
name: str
memory: int
@dataclass()
class VoiceChangerManagerSettings:
2023-06-21 01:23:13 +03:00
modelSlotIndex: int = -1
# ↓mutableな物だけ列挙
intData: list[str] = field(default_factory=lambda: ["modelSlotIndex"])
2022-12-31 10:02:53 +03:00
2023-01-29 03:42:45 +03:00
2023-06-16 09:06:35 +03:00
class VoiceChangerManager(ServerDeviceCallbacks):
2023-04-27 17:38:25 +03:00
_instance = None
2023-06-16 09:06:35 +03:00
############################
# ServerDeviceCallbacks
############################
def on_request(self, unpackedData: AudioInOut):
return self.changeVoice(unpackedData)
def emitTo(self, performance: list[float]):
2023-06-16 10:10:46 +03:00
self.emitToFunc(performance)
2023-06-16 09:06:35 +03:00
def get_processing_sampling_rate(self):
return self.voiceChanger.get_processing_sampling_rate()
def setInputSamplingRate(self, sr: int):
2023-06-16 09:06:35 +03:00
self.voiceChanger.settings.inputSampleRate = sr
def setOutputSamplingRate(self, sr: int):
self.voiceChanger.settings.outputSampleRate = sr
2023-06-16 09:06:35 +03:00
############################
# VoiceChangerManager
############################
def __init__(self, params: VoiceChangerParams):
2023-06-16 11:51:46 +03:00
self.params = params
self.voiceChanger: VoiceChanger = None
2023-06-25 10:39:18 +03:00
self.settings: VoiceChangerManagerSettings = VoiceChangerManagerSettings()
2023-06-17 08:16:29 +03:00
self.modelSlotManager = ModelSlotManager.get_instance(self.params.model_dir)
# スタティックな情報を収集
self.gpus: list[GPUInfo] = self._get_gpuInfos()
2023-06-16 09:06:35 +03:00
self.serverDevice = ServerDevice(self)
2023-06-26 04:03:12 +03:00
thread = threading.Thread(target=self.serverDevice.start, args=())
thread.start()
2023-06-16 09:06:35 +03:00
2023-06-25 18:51:25 +03:00
# 設定保存用情報
self.stored_setting: dict[str, str | int | float] = {}
if os.path.exists(STORED_SETTING_FILE):
self.stored_setting = json.load(open(STORED_SETTING_FILE, "r", encoding="utf-8"))
for key, val in self.stored_setting.items():
self.update_settings(key, val)
def store_setting(self, key: str, val: str | int | float):
saveItemForServerDevice = ["enableServerAudio", "serverAudioSampleRate", "serverInputDeviceId", "serverOutputDeviceId", "serverMonitorDeviceId", "serverReadChunkSize", "serverInputAudioGain", "serverOutputAudioGain"]
2023-06-25 18:51:25 +03:00
saveItemForVoiceChanger = ["crossFadeOffsetRate", "crossFadeEndRate", "crossFadeOverlapSize"]
saveItemForVoiceChangerManager = ["modelSlotIndex"]
saveItemForRVC = ["extraConvertSize", "gpu", "silentThreshold"]
2023-07-08 20:35:06 +03:00
saveItemForAllVoiceChanger = ["f0Detector"] # 設定されたf0DetectorがVCに存在しない値の場合はデフォルトに落ちるように実装すること
2023-06-25 18:51:25 +03:00
saveItem = []
saveItem.extend(saveItemForServerDevice)
saveItem.extend(saveItemForVoiceChanger)
saveItem.extend(saveItemForVoiceChangerManager)
saveItem.extend(saveItemForRVC)
2023-07-08 20:35:06 +03:00
saveItem.extend(saveItemForAllVoiceChanger)
2023-06-25 18:51:25 +03:00
if key in saveItem:
self.stored_setting[key] = val
json.dump(self.stored_setting, open(STORED_SETTING_FILE, "w"))
def _get_gpuInfos(self):
devCount = torch.cuda.device_count()
gpus = []
for id in range(devCount):
name = torch.cuda.get_device_name(id)
memory = torch.cuda.get_device_properties(id).total_memory
gpu = {"id": id, "name": name, "memory": memory}
gpus.append(gpu)
return gpus
2023-04-27 17:38:25 +03:00
2022-12-31 10:02:53 +03:00
@classmethod
2023-04-27 17:38:25 +03:00
def get_instance(cls, params: VoiceChangerParams):
if cls._instance is None:
cls._instance = cls(params)
2023-06-25 18:51:25 +03:00
# cls._instance.voiceChanger = VoiceChanger(params)
2022-12-31 10:02:53 +03:00
return cls._instance
2023-06-21 03:18:51 +03:00
def loadModel(self, params: LoadModelParams):
if params.isSampleMode:
2023-06-19 05:40:16 +03:00
# サンプルダウンロード
2023-06-21 03:18:51 +03:00
downloadSample(self.params.sample_mode, params.sampleId, self.params.model_dir, params.slot, {"useIndex": params.params["rvcIndexDownload"]})
2023-06-17 08:16:29 +03:00
self.modelSlotManager.getAllSlotInfo(reload=True)
2023-06-16 11:51:46 +03:00
info = {"status": "OK"}
2023-04-14 05:03:52 +03:00
return info
2023-06-21 03:18:51 +03:00
else:
# アップローダ
2023-06-19 05:40:16 +03:00
# ファイルをslotにコピー
for file in params.files:
print("FILE", file)
2023-06-20 00:39:39 +03:00
srcPath = os.path.join(UPLOAD_DIR, file.dir, file.name)
dstDir = os.path.join(
self.params.model_dir,
str(params.slot),
file.dir,
)
2023-06-19 05:40:16 +03:00
dstPath = os.path.join(dstDir, file.name)
os.makedirs(dstDir, exist_ok=True)
print(f"move to {srcPath} -> {dstPath}")
shutil.move(srcPath, dstPath)
file.name = dstPath
2023-06-20 00:39:39 +03:00
2023-06-19 05:40:16 +03:00
# メタデータ作成(各VCで定義)
if params.voiceChangerType == "RVC":
2023-06-21 03:18:51 +03:00
from voice_changer.RVC.RVCModelSlotGenerator import RVCModelSlotGenerator # 起動時にインポートするとパラメータが取れない。
2023-06-19 05:40:16 +03:00
2023-06-21 03:18:51 +03:00
slotInfo = RVCModelSlotGenerator.loadModel(params)
2023-06-19 05:40:16 +03:00
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
2023-06-20 00:39:39 +03:00
elif params.voiceChangerType == "MMVCv13":
2023-06-21 10:00:03 +03:00
from voice_changer.MMVCv13.MMVCv13ModelSlotGenerator import MMVCv13ModelSlotGenerator
2023-06-20 00:39:39 +03:00
2023-06-21 10:00:03 +03:00
slotInfo = MMVCv13ModelSlotGenerator.loadModel(params)
2023-06-20 00:39:39 +03:00
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
elif params.voiceChangerType == "MMVCv15":
2023-06-21 10:00:03 +03:00
from voice_changer.MMVCv15.MMVCv15ModelSlotGenerator import MMVCv15ModelSlotGenerator
2023-06-20 00:39:39 +03:00
2023-06-21 10:00:03 +03:00
slotInfo = MMVCv15ModelSlotGenerator.loadModel(params)
2023-06-20 00:39:39 +03:00
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
elif params.voiceChangerType == "so-vits-svc-40":
2023-06-22 01:40:06 +03:00
from voice_changer.SoVitsSvc40.SoVitsSvc40ModelSlotGenerator import SoVitsSvc40ModelSlotGenerator
2023-06-20 00:39:39 +03:00
2023-06-22 01:40:06 +03:00
slotInfo = SoVitsSvc40ModelSlotGenerator.loadModel(params)
2023-06-20 00:39:39 +03:00
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
elif params.voiceChangerType == "DDSP-SVC":
2023-06-22 04:46:12 +03:00
from voice_changer.DDSP_SVC.DDSP_SVCModelSlotGenerator import DDSP_SVCModelSlotGenerator
2023-06-20 00:39:39 +03:00
2023-06-22 04:46:12 +03:00
slotInfo = DDSP_SVCModelSlotGenerator.loadModel(params)
2023-06-20 00:39:39 +03:00
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
2023-07-12 18:59:48 +03:00
elif params.voiceChangerType == "Diffusion-SVC":
from voice_changer.DiffusionSVC.DiffusionSVCModelSlotGenerator import DiffusionSVCModelSlotGenerator
slotInfo = DiffusionSVCModelSlotGenerator.loadModel(params)
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
2023-06-19 05:40:16 +03:00
print("params", params)
2023-01-07 18:25:21 +03:00
def get_info(self):
data = asdict(self.settings)
data["gpus"] = self.gpus
2023-06-17 08:16:29 +03:00
data["modelSlots"] = self.modelSlotManager.getAllSlotInfo(reload=True)
data["sampleModels"] = getSampleInfos(self.params.sample_mode)
data["python"] = sys.version
data["status"] = "OK"
2023-06-16 09:06:35 +03:00
info = self.serverDevice.get_info()
data.update(info)
2023-06-25 18:51:25 +03:00
if self.voiceChanger is not None:
2023-01-10 16:49:16 +03:00
info = self.voiceChanger.get_info()
data.update(info)
2023-06-25 18:51:25 +03:00
return data
2023-05-06 22:18:18 +03:00
def get_performance(self):
if hasattr(self, "voiceChanger"):
info = self.voiceChanger.get_performance()
return info
else:
return {"status": "ERROR", "msg": "no model loaded"}
2023-01-07 18:25:21 +03:00
2023-06-21 01:23:13 +03:00
def generateVoiceChanger(self, val: int):
slotInfo = self.modelSlotManager.get_slot_info(val)
if slotInfo is None:
print(f"[Voice Changer] model slot is not found {val}")
return
elif slotInfo.voiceChangerType == "RVC":
print("................RVC")
from voice_changer.RVC.RVC import RVC
self.voiceChangerModel = RVC(self.params, slotInfo)
self.voiceChanger = VoiceChanger(self.params)
self.voiceChanger.setModel(self.voiceChangerModel)
2023-06-21 10:00:03 +03:00
elif slotInfo.voiceChangerType == "MMVCv13":
print("................MMVCv13")
from voice_changer.MMVCv13.MMVCv13 import MMVCv13
2023-06-21 01:23:13 +03:00
2023-06-21 10:00:03 +03:00
self.voiceChangerModel = MMVCv13(slotInfo)
self.voiceChanger = VoiceChanger(self.params)
self.voiceChanger.setModel(self.voiceChangerModel)
elif slotInfo.voiceChangerType == "MMVCv15":
print("................MMVCv15")
from voice_changer.MMVCv15.MMVCv15 import MMVCv15
self.voiceChangerModel = MMVCv15(slotInfo)
self.voiceChanger = VoiceChanger(self.params)
self.voiceChanger.setModel(self.voiceChangerModel)
2023-06-22 01:40:06 +03:00
elif slotInfo.voiceChangerType == "so-vits-svc-40":
print("................so-vits-svc-40")
from voice_changer.SoVitsSvc40.SoVitsSvc40 import SoVitsSvc40
self.voiceChangerModel = SoVitsSvc40(self.params, slotInfo)
self.voiceChanger = VoiceChanger(self.params)
self.voiceChanger.setModel(self.voiceChangerModel)
2023-06-22 04:46:12 +03:00
elif slotInfo.voiceChangerType == "DDSP-SVC":
print("................DDSP-SVC")
from voice_changer.DDSP_SVC.DDSP_SVC import DDSP_SVC
self.voiceChangerModel = DDSP_SVC(self.params, slotInfo)
self.voiceChanger = VoiceChanger(self.params)
self.voiceChanger.setModel(self.voiceChangerModel)
2023-07-12 18:59:48 +03:00
elif slotInfo.voiceChangerType == "Diffusion-SVC":
print("................Diffusion-SVC")
from voice_changer.DiffusionSVC.DiffusionSVC import DiffusionSVC
self.voiceChangerModel = DiffusionSVC(self.params, slotInfo)
2023-07-14 22:45:27 +03:00
self.voiceChanger = VoiceChangerV2(self.params)
2023-07-12 18:59:48 +03:00
self.voiceChanger.setModel(self.voiceChangerModel)
2023-06-21 01:23:13 +03:00
else:
print(f"[Voice Changer] unknown voice changer model: {slotInfo.voiceChangerType}")
if hasattr(self, "voiceChangerModel"):
del self.voiceChangerModel
2023-06-21 01:23:13 +03:00
return
2023-04-28 00:39:51 +03:00
def update_settings(self, key: str, val: str | int | float):
2023-06-25 18:51:25 +03:00
self.store_setting(key, val)
2023-06-21 01:23:13 +03:00
if key in self.settings.intData:
newVal = int(val)
if key == "modelSlotIndex":
newVal = newVal % 1000
print(f"[Voice Changer] model slot is changed {self.settings.modelSlotIndex} -> {newVal}")
self.generateVoiceChanger(newVal)
2023-06-25 18:51:25 +03:00
# キャッシュ設定の反映
for k, v in self.stored_setting.items():
if k != "modelSlotIndex":
self.update_settings(k, v)
2023-06-21 01:23:13 +03:00
setattr(self.settings, key, newVal)
2023-06-25 20:19:37 +03:00
self.serverDevice.update_settings(key, val)
if self.voiceChanger is not None:
self.voiceChanger.update_settings(key, val)
2023-06-21 01:23:13 +03:00
return self.get_info()
2023-01-08 10:18:20 +03:00
2023-04-28 00:39:51 +03:00
def changeVoice(self, receivedData: AudioInOut):
2023-04-27 17:38:25 +03:00
if hasattr(self, "voiceChanger") is True:
return self.voiceChanger.on_request(receivedData)
2023-01-04 20:28:36 +03:00
else:
print("Voice Change is not loaded. Did you load a correct model?")
2023-02-20 22:07:43 +03:00
return np.zeros(1).astype(np.int16), []
2023-04-10 18:21:17 +03:00
2023-04-13 02:00:28 +03:00
def export2onnx(self):
return self.voiceChanger.export2onnx()
2023-04-30 20:34:01 +03:00
def merge_models(self, request: str):
2023-06-23 08:54:39 +03:00
# self.voiceChanger.merge_models(request)
req = json.loads(request)
req = ModelMergerRequest(**req)
req.files = [MergeElement(**f) for f in req.files]
slot = len(self.modelSlotManager.getAllSlotInfo()) - 1
if req.voiceChangerType == "RVC":
merged = RVCModelMerger.merge_models(req, slot)
loadParam = LoadModelParams(voiceChangerType="RVC", slot=slot, isSampleMode=False, sampleId="", files=[LoadModelParamFile(name=os.path.basename(merged), kind="rvcModel", dir=f"{slot}")], params={})
self.loadModel(loadParam)
2023-06-16 15:58:46 +03:00
return self.get_info()
2023-05-20 22:21:54 +03:00
2023-06-17 04:08:32 +03:00
def setEmitTo(self, emitTo: Callable[[Any], None]):
self.emitToFunc = emitTo
2023-05-20 22:21:54 +03:00
def update_model_default(self):
2023-06-21 01:23:13 +03:00
# self.voiceChanger.update_model_default()
current_settings = self.voiceChangerModel.get_model_current()
for current_setting in current_settings:
current_setting["slot"] = self.settings.modelSlotIndex
self.modelSlotManager.update_model_info(json.dumps(current_setting))
2023-06-16 15:58:46 +03:00
return self.get_info()
2023-06-07 21:08:59 +03:00
def update_model_info(self, newData: str):
2023-06-21 01:23:13 +03:00
# self.voiceChanger.update_model_info(newData)
self.modelSlotManager.update_model_info(newData)
2023-06-16 15:58:46 +03:00
return self.get_info()
2023-06-07 21:08:59 +03:00
def upload_model_assets(self, params: str):
2023-06-21 01:23:13 +03:00
# self.voiceChanger.upload_model_assets(params)
self.modelSlotManager.store_model_assets(params)
2023-06-16 15:58:46 +03:00
return self.get_info()