wip:support m1 mac 1

This commit is contained in:
wataru 2023-06-21 09:18:51 +09:00
parent 06a5250f61
commit e6b191abd2
22 changed files with 217 additions and 629 deletions

File diff suppressed because one or more lines are too long

View File

@ -1,6 +1,6 @@
import React, { useMemo, useState } from "react";
import { useAppState } from "../../001_provider/001_AppStateProvider";
import { InitialFileUploadSetting } from "@dannadori/voice-changer-client-js";
import { ModelUploadSetting } from "@dannadori/voice-changer-client-js";
import { useMessageBuilder } from "../../hooks/useMessageBuilder";
import { ModelSlotManagerDialogScreen } from "./904_ModelSlotManagerDialog";
@ -51,15 +51,18 @@ export const SampleDownloaderScreen = (props: SampleDownloaderScreenProps) => {
)
const onDownloadSampleClicked = async (id: string) => {
serverSetting.fileUploadSettings[props.targetIndex] = {
...InitialFileUploadSetting,
rvcModel: null,
rvcIndex: null,
const uploadParams: ModelUploadSetting = {
voiceChangerType: "RVC",
slot: props.targetIndex,
isSampleMode: true,
sampleId: id,
isSampleMode: true
files: [],
params: {
rvcIndexDownload: true
}
}
try {
await serverSetting.loadModel(props.targetIndex)
await serverSetting.uploadModel(uploadParams)
} catch (e) {
alert(e)
}

View File

@ -41,6 +41,7 @@ export const FileUploaderScreen = (props: FileUploaderScreenProps) => {
isSampleMode: false,
sampleId: null,
files: [],
params: {}
})
}, [props.targetIndex, voiceChangerType])

View File

@ -51,6 +51,7 @@ export type ModelUploadSetting = {
sampleId: string | null
files: ModelFile[]
params: any
}
export type ModelFileForServer = Omit<ModelFile, "file"> & {
name: string,
@ -341,7 +342,6 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
}
}, [props.voiceChangerClient])
// 古いアップローダ新GUIへ以降まで、当分残しておく。
const loadModel = useMemo(() => {
return async (slot: number) => {

View File

@ -7,7 +7,7 @@ from const import RVCSampleMode, getSampleJsonAndModelIds
from data.ModelSample import ModelSamples, generateModelSample
from data.ModelSlot import RVCModelSlot
from voice_changer.ModelSlotManager import ModelSlotManager
from voice_changer.RVC.ModelSlotGenerator import _setInfoByONNX, _setInfoByPytorch
from voice_changer.RVC.RVCModelSlotGenerator import RVCModelSlotGenerator
from downloader.Downloader import download, download_no_tqdm
@ -166,7 +166,7 @@ def _downloadSamples(samples: list[ModelSamples], sampleModelIds: list[Tuple[str
slotInfo = modelSlotManager.get_slot_info(targetSlotIndex)
if slotInfo.voiceChangerType == "RVC":
if slotInfo.isONNX:
_setInfoByONNX(slotInfo)
RVCModelSlotGenerator._setInfoByONNX(slotInfo)
else:
_setInfoByPytorch(slotInfo)
RVCModelSlotGenerator._setInfoByPytorch(slotInfo)
modelSlotManager.save_model_slot(targetSlotIndex, slotInfo)

View File

@ -1,7 +1,6 @@
import sys
import json
import os
import shutil
from typing import Union
from fastapi import APIRouter
from fastapi.encoders import jsonable_encoder
@ -12,7 +11,7 @@ from restapi.mods.FileUploader import upload_file, concat_file_chunks
from voice_changer.VoiceChangerManager import VoiceChangerManager
from const import MODEL_DIR, UPLOAD_DIR, ModelType
from voice_changer.utils.LoadModelParams import LoadModelParams
from voice_changer.utils.LoadModelParams import LoadModelParamFile, LoadModelParams
os.makedirs(UPLOAD_DIR, exist_ok=True)
@ -88,26 +87,11 @@ class MMVC_Rest_Fileuploader:
try:
paramDict = json.loads(params)
print("paramDict", paramDict)
if paramDict["voiceChangerType"]:
# 新しいアップローダ用
print("NEW UPLOADER")
props: LoadModelParams = LoadModelParams(slot=slot, isHalf=isHalf, params=paramDict)
else:
# 古いアップローダ用
# Change Filepath
newFilesDict = {}
for key, val in paramDict["files"].items():
if val != "-" and val != "":
uploadPath = os.path.join(UPLOAD_DIR, val)
storePath = os.path.join(UPLOAD_DIR, f"{slot}", val)
storeDir = os.path.dirname(storePath)
os.makedirs(storeDir, exist_ok=True)
shutil.move(uploadPath, storePath)
newFilesDict[key] = storePath
paramDict["files"] = newFilesDict
props = LoadModelParams(slot=slot, isHalf=isHalf, params=paramDict)
loadModelparams = LoadModelParams(**paramDict)
loadModelparams.files = [LoadModelParamFile(**x) for x in paramDict["files"]]
# print("paramDict", loadModelparams)
info = self.voiceChangerManager.loadModel(props)
info = self.voiceChangerManager.loadModel(loadModelparams)
json_compatible_item_data = jsonable_encoder(info)
return JSONResponse(content=json_compatible_item_data)
except Exception as e:

View File

@ -1,14 +0,0 @@
from fastapi.responses import FileResponse
import os
def mod_get_model(modelFile: str):
modelPath = os.path.join("MMVC_Trainer/logs", modelFile)
return FileResponse(path=modelPath)
def mod_delete_model(modelFile: str):
modelPath = os.path.join("MMVC_Trainer/logs", modelFile)
os.remove(modelPath)
return {"Model deleted": f"{modelFile}"}

View File

@ -1,23 +0,0 @@
from fastapi.responses import JSONResponse
from fastapi.encoders import jsonable_encoder
from restapi.utils.files import get_file_list
import os
def mod_get_models():
gModels = get_file_list(f'MMVC_Trainer/logs/G*.pth')
dModels = get_file_list(f'MMVC_Trainer/logs/D*.pth')
configs = get_file_list(f'MMVC_Trainer/logs/config.json')
models = []
models.extend(gModels)
models.extend(dModels)
models.extend(configs)
models = [ os.path.basename(x) for x in models]
models = sorted(models)
data = {
"models":models
}
json_compatible_item_data = jsonable_encoder(data)
return JSONResponse(content=json_compatible_item_data)

View File

@ -1,26 +0,0 @@
from fastapi.responses import JSONResponse
from fastapi.encoders import jsonable_encoder
import os
MULTI_SPEAKER_SETTING_PATH = "MMVC_Trainer/dataset/multi_speaker_correspondence.txt"
def mod_get_multi_speaker_setting():
data = {}
if os.path.isfile(MULTI_SPEAKER_SETTING_PATH) == False:
with open(MULTI_SPEAKER_SETTING_PATH, "w") as f:
f.write("")
f.flush()
f.close()
with open(MULTI_SPEAKER_SETTING_PATH, "r") as f:
setting = f.read()
data["multi_speaker_setting"] = setting
json_compatible_item_data = jsonable_encoder(data)
return JSONResponse(content=json_compatible_item_data)
def mod_post_multi_speaker_setting(setting:str):
with open(MULTI_SPEAKER_SETTING_PATH, "w") as f:
f.write(setting)
f.flush()
f.close()
return {"Write Multispeaker setting": f"{setting}"}

View File

@ -1,15 +0,0 @@
import shutil
from restapi.mods.Trainer_MultiSpeakerSetting import MULTI_SPEAKER_SETTING_PATH
def mod_delete_speaker(speaker:str):
shutil.rmtree(f"MMVC_Trainer/dataset/textful/{speaker}")
with open(MULTI_SPEAKER_SETTING_PATH, "r") as f:
setting = f.readlines()
filtered = filter(lambda x: x.startswith(f"{speaker}|")==False, setting)
with open(MULTI_SPEAKER_SETTING_PATH, "w") as f:
f.writelines(list(filtered))
f.flush()
f.close()
return {"Speaker deleted": f"{speaker}"}

View File

@ -1,28 +0,0 @@
from fastapi.responses import JSONResponse
from fastapi.encoders import jsonable_encoder
import os, base64
def mod_get_speaker_voice(speaker:str, voice:str):
wav_file = f'MMVC_Trainer/dataset/textful/{speaker}/wav/{voice}.wav'
text_file = f'MMVC_Trainer/dataset/textful/{speaker}/text/{voice}.txt'
readable_text_file = f'MMVC_Trainer/dataset/textful/{speaker}/readable_text/{voice}.txt'
data = {}
if os.path.exists(wav_file):
with open(wav_file, "rb") as f:
wav_data = f.read()
wav_data_base64 = base64.b64encode(wav_data).decode('utf-8')
data["wav"] = wav_data_base64
if os.path.exists(text_file):
with open(text_file, "r") as f:
text_data = f.read()
data["text"] = text_data
if os.path.exists(readable_text_file):
with open(readable_text_file, "r") as f:
text_data = f.read()
data["readable_text"] = text_data
json_compatible_item_data = jsonable_encoder(data)
return JSONResponse(content=json_compatible_item_data)

View File

@ -1,22 +0,0 @@
from fastapi.responses import JSONResponse
from fastapi.encoders import jsonable_encoder
from restapi.utils.files import get_file_list
import os
def mod_get_speaker_voices(speaker:str):
voices = get_file_list(f'MMVC_Trainer/dataset/textful/{speaker}/wav/*.wav')
texts = get_file_list(f'MMVC_Trainer/dataset/textful/{speaker}/text/*.txt')
readable_texts = get_file_list(f'MMVC_Trainer/dataset/textful/{speaker}/readable_text/*.txt')
items = voices
items.extend(texts)
items.extend(readable_texts)
items = [ os.path.splitext(os.path.basename(x))[0] for x in items]
items = sorted(set(items))
data = {
"voices":items
}
json_compatible_item_data = jsonable_encoder(data)
return JSONResponse(content=json_compatible_item_data)

View File

@ -1,15 +0,0 @@
from fastapi.responses import JSONResponse
from fastapi.encoders import jsonable_encoder
from restapi.utils.files import get_dir_list
import os
# CreateはFileUploaderで実装。
def mod_get_speakers():
os.makedirs("MMVC_Trainer/dataset/textful", exist_ok=True)
speakers = get_dir_list("MMVC_Trainer/dataset/textful/")
data = {
"speakers":sorted(speakers)
}
json_compatible_item_data = jsonable_encoder(data)
return JSONResponse(content=json_compatible_item_data)

View File

@ -1,176 +0,0 @@
import subprocess,os
from restapi.utils.files import get_file_list
from fastapi.responses import JSONResponse
from fastapi.encoders import jsonable_encoder
LOG_DIR = "info"
train_proc = None
SUCCESS = 0
ERROR = -1
### Submodule for Pre train
def sync_exec(cmd:str, log_path:str, cwd=None):
shortCmdStr = cmd[:20]
try:
with open(log_path, 'w') as log_file:
if cwd == None:
proc = subprocess.run(cmd, shell=True, text=True, stdout=log_file, stderr=log_file)
else:
proc = subprocess.run(cmd, shell=True, text=True, stdout=log_file, stderr=log_file, cwd=cwd)
print(f"{shortCmdStr} returncode:{proc.returncode}")
if proc.returncode != 0:
print(f"{shortCmdStr} exception:")
return (ERROR, f"returncode:{proc.returncode}")
except Exception as e:
print(f"{shortCmdStr} exception:", str(e))
return (ERROR, str(e))
return (SUCCESS, "success")
def sync_exec_with_stdout(cmd:str, log_path:str):
shortCmdStr = cmd[:20]
try:
with open(log_path, 'w') as log_file:
proc = subprocess.run(cmd, shell=True, text=True, stdout=subprocess.PIPE,
stderr=log_file, cwd="MMVC_Trainer")
print(f"STDOUT{shortCmdStr}",proc.stdout)
except Exception as e:
print(f"{shortCmdStr} exception:", str(e))
return (ERROR, str(e))
return (SUCCESS, proc.stdout)
def create_dataset():
cmd = "python3 create_dataset_jtalk.py -f train_config -s 24000 -m dataset/multi_speaker_correspondence.txt"
log_file = os.path.join(LOG_DIR, "log_create_dataset_jtalk.txt")
res = sync_exec(cmd, log_file, "MMVC_Trainer")
return res
def set_batch_size(batch:int):
cmd = "sed -i 's/\"batch_size\": [0-9]*/\"batch_size\": " + str(batch) + "/' MMVC_Trainer/configs/baseconfig.json"
log_file = os.path.join(LOG_DIR, "log_set_batch_size.txt")
res = sync_exec(cmd, log_file)
return res
def set_dummy_device_count():
cmd = 'sed -ie "s/torch.cuda.device_count()/1/" MMVC_Trainer/train_ms.py'
log_file = os.path.join(LOG_DIR, "log_set_dummy_device_count.txt")
res = sync_exec(cmd, log_file)
return res
### Submodule for Train
def exec_training(enable_finetuning:bool, GModel:str, DModel:str):
global train_proc
log_file = os.path.join(LOG_DIR, "training.txt")
# トレーニング開始確認(二重起動回避)
if train_proc != None:
status = train_proc.poll()
if status != None:
print("Training have ended.", status)
train_proc = None
else:
print("Training have stated.")
return (ERROR, "Training have started")
try:
with open(log_file, 'w') as log_file:
if enable_finetuning == True:
GModelPath = os.path.join("logs", GModel) # 実行時にcwdを指定しているのでフォルダはlogsでよい。
DModelPath = os.path.join("logs", DModel)
cmd = f'python3 train_ms.py -c configs/train_config.json -m ./ -fg {GModelPath} -fd {DModelPath}'
else:
cmd = 'python3 train_ms.py -c configs/train_config.json -m ./'
print("exec:",cmd)
train_proc = subprocess.Popen("exec "+cmd, shell=True, text=True, stdout=log_file, stderr=log_file, cwd="MMVC_Trainer")
print("Training stated")
print(f"returncode:{train_proc.returncode}")
except Exception as e:
print("start training exception:", str(e))
return (ERROR, str(e))
return (SUCCESS, "success")
def stop_training():
global train_proc
if train_proc == None:
print("Training have not stated.")
return (ERROR, "Training have not stated.")
status = train_proc.poll()
if status != None:
print("Training have already ended.", status)
train_proc = None
return (ERROR, "Training have already ended. " + status)
else:
train_proc.kill()
print("Training have stoped.")
return (SUCCESS, "success")
### Main
def mod_post_pre_training(batch:int):
res = set_batch_size(batch)
if res[0] == ERROR:
return {"result":"failed", "detail": f"Preprocess(set_batch_size) failed. {res[1]}"}
res = set_dummy_device_count()
if res[0] == ERROR:
return {"result":"failed", "detail": f"Preprocess(set_dummy_device_count) failed. {res[1]}"}
res = create_dataset()
if res[0] == ERROR:
return {"result":"failed", "detail": f"Preprocess failed(create_dataset). {res[1]}"}
return {"result":"success", "detail": f"Preprocess succeeded. {res[1]}"}
def mod_post_start_training(enable_finetuning:str, GModel:str, DModel:str):
print("START_TRAINING:::::::", enable_finetuning, GModel, DModel)
res = exec_training(enable_finetuning, GModel, DModel)
if res[0] == ERROR:
return {"result":"failed", "detail": f"Start training failed. {res[1]}"}
return {"result":"success", "detail": f"Start training succeeded. {res[1]}"}
def mod_post_stop_training():
res = stop_training()
if res[0] == ERROR:
return {"result":"failed", "detail": f"Stop training failed. {res[1]}"}
return {"result":"success", "detail": f"Stop training succeeded. {res[1]}"}
### DEBUG
def mod_get_related_files():
files = get_file_list(os.path.join(LOG_DIR,"*"))
files.extend([
"MMVC_Trainer/dataset/multi_speaker_correspondence.txt",
"MMVC_Trainer/train_ms.py",
])
files.extend(
get_file_list("MMVC_Trainer/configs/*")
)
res = []
for f in files:
size = os.path.getsize(f)
data = ""
if size < 1024*1024:
with open(f, "r") as input:
data = input.read()
res.append({
"name":f,
"size":size,
"data":data
})
json_compatible_item_data = jsonable_encoder(res)
return JSONResponse(content=json_compatible_item_data)
def mod_get_tail_training_log(num:int):
training_log_file = os.path.join(LOG_DIR, "training.txt")
res = sync_exec(f"cat {training_log_file} | sed -e 's/.*\r//' > /tmp/out","/dev/null")
cmd = f'tail -n {num} /tmp/out'
res = sync_exec_with_stdout(cmd, "/dev/null")
if res[0] == ERROR:
return {"result":"failed", "detail": f"Tail training log failed. {res[1]}"}
return {"result":"success", "detail":res[1]}

View File

@ -1,126 +0,0 @@
from const import EnumEmbedderTypes, EnumInferenceTypes
import torch
import onnxruntime
import json
from data.ModelSlot import ModelSlot
def _setInfoByPytorch(slot: ModelSlot):
cpt = torch.load(slot.modelFile, map_location="cpu")
config_len = len(cpt["config"])
if config_len == 18:
# Original RVC
slot.f0 = True if cpt["f0"] == 1 else False
version = cpt.get("version", "v1")
if version is None or version == "v1":
slot.modelType = EnumInferenceTypes.pyTorchRVC.value if slot.f0 else EnumInferenceTypes.pyTorchRVCNono.value
slot.embChannels = 256
slot.embOutputLayer = 9
slot.useFinalProj = True
slot.embedder = EnumEmbedderTypes.hubert.value
print("[Voice Changer] Official Model(pyTorch) : v1")
else:
slot.modelType = EnumInferenceTypes.pyTorchRVCv2.value if slot.f0 else EnumInferenceTypes.pyTorchRVCv2Nono.value
slot.embChannels = 768
slot.embOutputLayer = 12
slot.useFinalProj = False
slot.embedder = EnumEmbedderTypes.hubert.value
print("[Voice Changer] Official Model(pyTorch) : v2")
else:
# DDPN RVC
slot.f0 = True if cpt["f0"] == 1 else False
slot.modelType = EnumInferenceTypes.pyTorchWebUI.value if slot.f0 else EnumInferenceTypes.pyTorchWebUINono.value
slot.embChannels = cpt["config"][17]
slot.embOutputLayer = cpt["embedder_output_layer"] if "embedder_output_layer" in cpt else 9
if slot.embChannels == 256:
slot.useFinalProj = True
else:
slot.useFinalProj = False
# DDPNモデルの情報を表示
if slot.embChannels == 256 and slot.embOutputLayer == 9 and slot.useFinalProj is True:
print("[Voice Changer] DDPN Model(pyTorch) : Official v1 like")
elif slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False:
print("[Voice Changer] DDPN Model(pyTorch): Official v2 like")
else:
print(f"[Voice Changer] DDPN Model(pyTorch): ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}")
slot.embedder = cpt["embedder_name"]
if slot.embedder.endswith("768"):
slot.embedder = slot.embedder[:-3]
# if slot.embedder == EnumEmbedderTypes.hubert.value:
# slot.embedder = EnumEmbedderTypes.hubert
# elif slot.embedder == EnumEmbedderTypes.contentvec.value:
# slot.embedder = EnumEmbedderTypes.contentvec
# elif slot.embedder == EnumEmbedderTypes.hubert_jp.value:
# slot.embedder = EnumEmbedderTypes.hubert_jp
# else:
# raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder")
slot.samplingRate = cpt["config"][-1]
del cpt
def _setInfoByONNX(slot: ModelSlot):
tmp_onnx_session = onnxruntime.InferenceSession(slot.modelFile, providers=["CPUExecutionProvider"])
modelmeta = tmp_onnx_session.get_modelmeta()
try:
metadata = json.loads(modelmeta.custom_metadata_map["metadata"])
# slot.modelType = metadata["modelType"]
slot.embChannels = metadata["embChannels"]
slot.embOutputLayer = metadata["embOutputLayer"] if "embOutputLayer" in metadata else 9
slot.useFinalProj = metadata["useFinalProj"] if "useFinalProj" in metadata else True if slot.embChannels == 256 else False
if slot.embChannels == 256:
slot.useFinalProj = True
else:
slot.useFinalProj = False
# ONNXモデルの情報を表示
if slot.embChannels == 256 and slot.embOutputLayer == 9 and slot.useFinalProj is True:
print("[Voice Changer] ONNX Model: Official v1 like")
elif slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False:
print("[Voice Changer] ONNX Model: Official v2 like")
else:
print(f"[Voice Changer] ONNX Model: ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}")
if "embedder" not in metadata:
slot.embedder = EnumEmbedderTypes.hubert.value
else:
slot.embedder = metadata["embedder"]
# elif metadata["embedder"] == EnumEmbedderTypes.hubert.value:
# slot.embedder = EnumEmbedderTypes.hubert
# elif metadata["embedder"] == EnumEmbedderTypes.contentvec.value:
# slot.embedder = EnumEmbedderTypes.contentvec
# elif metadata["embedder"] == EnumEmbedderTypes.hubert_jp.value:
# slot.embedder = EnumEmbedderTypes.hubert_jp
# else:
# raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder")
slot.f0 = metadata["f0"]
slot.modelType = EnumInferenceTypes.onnxRVC.value if slot.f0 else EnumInferenceTypes.onnxRVCNono.value
slot.samplingRate = metadata["samplingRate"]
slot.deprecated = False
except Exception as e:
slot.modelType = EnumInferenceTypes.onnxRVC.value
slot.embChannels = 256
slot.embedder = EnumEmbedderTypes.hubert.value
slot.f0 = True
slot.samplingRate = 48000
slot.deprecated = True
print("[Voice Changer] setInfoByONNX", e)
print("[Voice Changer] ############## !!!! CAUTION !!!! ####################")
print("[Voice Changer] This onnxfie is depricated. Please regenerate onnxfile.")
print("[Voice Changer] ############## !!!! CAUTION !!!! ####################")
del tmp_onnx_session

View File

@ -20,14 +20,10 @@ if sys.platform.startswith("darwin"):
else:
sys.path.append("RVC")
from voice_changer.RVC.ModelSlotGenerator import (
_setInfoByONNX,
_setInfoByPytorch,
)
from voice_changer.RVC.RVCSettings import RVCSettings
from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager
from voice_changer.utils.LoadModelParams import LoadModelParams2
from voice_changer.utils.VoiceChangerModel import AudioInOut
from voice_changer.utils.VoiceChangerModel import AudioInOut, VoiceChangerModel
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
from voice_changer.RVC.onnxExporter.export2onnx import export2onnx
from voice_changer.RVC.pitchExtractor.PitchExtractorManager import PitchExtractorManager
@ -35,10 +31,10 @@ from voice_changer.RVC.pipeline.PipelineGenerator import createPipeline
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
from voice_changer.RVC.pipeline.Pipeline import Pipeline
from Exceptions import DeviceCannotSupportHalfPrecisionException, NoModeLoadedException
from Exceptions import DeviceCannotSupportHalfPrecisionException
class RVC:
class RVC(VoiceChangerModel):
initialLoad: bool = True
settings: RVCSettings = RVCSettings()
@ -70,33 +66,11 @@ class RVC:
self.pipeline = createPipeline(self.slotInfo, self.settings.gpu, self.settings.f0Detector)
# その他の設定
self.trans = self.slotInfo.defaultTune
self.index_ratio = self.slotInfo.defaultIndexRatio
self.protect = self.slotInfo.defaultProtect
self.samplingRate = self.slotInfo.samplingRate
self.settings.tran = self.slotInfo.defaultTune
self.settings.indexRatio = self.slotInfo.defaultIndexRatio
self.settings.protect = self.slotInfo.defaultProtect
print("[Voice Changer] [RVC] Initializing... done")
@classmethod
def loadModel2(cls, props: LoadModelParams2):
slotInfo: RVCModelSlot = RVCModelSlot()
for file in props.files:
if file.kind == "rvcModel":
slotInfo.modelFile = file.name
elif file.kind == "rvcIndex":
slotInfo.indexFile = file.name
slotInfo.defaultTune = 0
slotInfo.defaultIndexRatio = 0
slotInfo.defaultProtect = 0.5
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
slotInfo.name = os.path.splitext(os.path.basename(slotInfo.modelFile))[0]
# slotInfo.iconFile = "/assets/icons/noimage.png"
if slotInfo.isONNX:
_setInfoByONNX(slotInfo)
else:
_setInfoByPytorch(slotInfo)
return slotInfo
def update_settings(self, key: str, val: int | float | str):
print("[Voice Changer][RVC]: update_settings", key, val)
if key in self.settings.intData:
@ -123,7 +97,7 @@ class RVC:
return data
def get_processing_sampling_rate(self):
return self.settings.modelSamplingRate
return self.slotInfo.samplingRate
def generate_input(
self,
@ -170,14 +144,6 @@ class RVC:
return (audio_buffer, convertSize, vol)
def inference(self, data):
# if self.settings.modelSlotIndex < 0:
# print(
# "[Voice Changer] wait for loading model...",
# self.settings.modelSlotIndex,
# self.currentSlot,
# )
# raise NoModeLoadedException("model_common")
audio = data[0]
convertSize = data[1]
vol = data[2]
@ -185,17 +151,13 @@ class RVC:
if vol < self.settings.silentThreshold:
return np.zeros(convertSize).astype(np.int16)
audio = torchaudio.functional.resample(audio, self.settings.modelSamplingRate, 16000, rolloff=0.99)
audio = torchaudio.functional.resample(audio, self.slotInfo.samplingRate, 16000, rolloff=0.99)
repeat = 1 if self.settings.rvcQuality else 0
sid = 0
f0_up_key = self.settings.tran
index_rate = self.settings.indexRatio
protect = self.settings.protect
# if_f0 = 1 if self.settings.modelSlots[self.currentSlot].f0 else 0
# embOutputLayer = self.settings.modelSlots[self.currentSlot].embOutputLayer
# useFinalProj = self.settings.modelSlots[self.currentSlot].useFinalProj
if_f0 = 1 if self.slotInfo.f0 else 0
embOutputLayer = self.slotInfo.embOutputLayer
useFinalProj = self.slotInfo.useFinalProj
@ -207,7 +169,7 @@ class RVC:
f0_up_key,
index_rate,
if_f0,
self.settings.extraConvertSize / self.settings.modelSamplingRate, # extaraDataSizeの秒数。RVCのモデルのサンプリングレートで処理(★1)。
self.settings.extraConvertSize / self.slotInfo.samplingRate, # extaraDataSizeの秒数。RVCのモデルのサンプリングレートで処理(★1)。
embOutputLayer,
useFinalProj,
repeat,
@ -292,36 +254,6 @@ class RVC:
# self.settings.modelSlotIndex = targetSlot
# self.currentSlot = self.settings.modelSlotIndex
# def update_model_default(self):
# # {"slot":9,"key":"name","val":"dogsdododg"}
# self.modelSlotManager.update_model_info(
# json.dumps(
# {
# "slot": self.currentSlot,
# "key": "defaultTune",
# "val": self.settings.tran,
# }
# )
# )
# self.modelSlotManager.update_model_info(
# json.dumps(
# {
# "slot": self.currentSlot,
# "key": "defaultIndexRatio",
# "val": self.settings.indexRatio,
# }
# )
# )
# self.modelSlotManager.update_model_info(
# json.dumps(
# {
# "slot": self.currentSlot,
# "key": "defaultProtect",
# "val": self.settings.protect,
# }
# )
# )
def get_model_current(self):
return [
{
@ -337,9 +269,3 @@ class RVC:
"val": self.settings.protect,
},
]
# def update_model_info(self, newData: str):
# self.modelSlotManager.update_model_info(newData)
# def upload_model_assets(self, params: str):
# self.modelSlotManager.store_model_assets(params)

View File

@ -0,0 +1,152 @@
import os
from const import EnumEmbedderTypes, EnumInferenceTypes
import torch
import onnxruntime
import json
from data.ModelSlot import ModelSlot, RVCModelSlot
from voice_changer.utils.LoadModelParams import LoadModelParams
from voice_changer.utils.ModelSlotGenerator import ModelSlotGenerator
class RVCModelSlotGenerator(ModelSlotGenerator):
@classmethod
def loadModel(cls, props: LoadModelParams):
slotInfo: RVCModelSlot = RVCModelSlot()
for file in props.files:
if file.kind == "rvcModel":
slotInfo.modelFile = file.name
elif file.kind == "rvcIndex":
slotInfo.indexFile = file.name
slotInfo.defaultTune = 0
slotInfo.defaultIndexRatio = 0
slotInfo.defaultProtect = 0.5
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
slotInfo.name = os.path.splitext(os.path.basename(slotInfo.modelFile))[0]
# slotInfo.iconFile = "/assets/icons/noimage.png"
if slotInfo.isONNX:
cls._setInfoByONNX(slotInfo)
else:
cls._setInfoByPytorch(slotInfo)
return slotInfo
@classmethod
def _setInfoByPytorch(cls, slot: ModelSlot):
cpt = torch.load(slot.modelFile, map_location="cpu")
config_len = len(cpt["config"])
if config_len == 18:
# Original RVC
slot.f0 = True if cpt["f0"] == 1 else False
version = cpt.get("version", "v1")
if version is None or version == "v1":
slot.modelType = EnumInferenceTypes.pyTorchRVC.value if slot.f0 else EnumInferenceTypes.pyTorchRVCNono.value
slot.embChannels = 256
slot.embOutputLayer = 9
slot.useFinalProj = True
slot.embedder = EnumEmbedderTypes.hubert.value
print("[Voice Changer] Official Model(pyTorch) : v1")
else:
slot.modelType = EnumInferenceTypes.pyTorchRVCv2.value if slot.f0 else EnumInferenceTypes.pyTorchRVCv2Nono.value
slot.embChannels = 768
slot.embOutputLayer = 12
slot.useFinalProj = False
slot.embedder = EnumEmbedderTypes.hubert.value
print("[Voice Changer] Official Model(pyTorch) : v2")
else:
# DDPN RVC
slot.f0 = True if cpt["f0"] == 1 else False
slot.modelType = EnumInferenceTypes.pyTorchWebUI.value if slot.f0 else EnumInferenceTypes.pyTorchWebUINono.value
slot.embChannels = cpt["config"][17]
slot.embOutputLayer = cpt["embedder_output_layer"] if "embedder_output_layer" in cpt else 9
if slot.embChannels == 256:
slot.useFinalProj = True
else:
slot.useFinalProj = False
# DDPNモデルの情報を表示
if slot.embChannels == 256 and slot.embOutputLayer == 9 and slot.useFinalProj is True:
print("[Voice Changer] DDPN Model(pyTorch) : Official v1 like")
elif slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False:
print("[Voice Changer] DDPN Model(pyTorch): Official v2 like")
else:
print(f"[Voice Changer] DDPN Model(pyTorch): ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}")
slot.embedder = cpt["embedder_name"]
if slot.embedder.endswith("768"):
slot.embedder = slot.embedder[:-3]
# if slot.embedder == EnumEmbedderTypes.hubert.value:
# slot.embedder = EnumEmbedderTypes.hubert
# elif slot.embedder == EnumEmbedderTypes.contentvec.value:
# slot.embedder = EnumEmbedderTypes.contentvec
# elif slot.embedder == EnumEmbedderTypes.hubert_jp.value:
# slot.embedder = EnumEmbedderTypes.hubert_jp
# else:
# raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder")
slot.samplingRate = cpt["config"][-1]
del cpt
@classmethod
def _setInfoByONNX(cls, slot: ModelSlot):
tmp_onnx_session = onnxruntime.InferenceSession(slot.modelFile, providers=["CPUExecutionProvider"])
modelmeta = tmp_onnx_session.get_modelmeta()
try:
metadata = json.loads(modelmeta.custom_metadata_map["metadata"])
# slot.modelType = metadata["modelType"]
slot.embChannels = metadata["embChannels"]
slot.embOutputLayer = metadata["embOutputLayer"] if "embOutputLayer" in metadata else 9
slot.useFinalProj = metadata["useFinalProj"] if "useFinalProj" in metadata else True if slot.embChannels == 256 else False
if slot.embChannels == 256:
slot.useFinalProj = True
else:
slot.useFinalProj = False
# ONNXモデルの情報を表示
if slot.embChannels == 256 and slot.embOutputLayer == 9 and slot.useFinalProj is True:
print("[Voice Changer] ONNX Model: Official v1 like")
elif slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False:
print("[Voice Changer] ONNX Model: Official v2 like")
else:
print(f"[Voice Changer] ONNX Model: ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}")
if "embedder" not in metadata:
slot.embedder = EnumEmbedderTypes.hubert.value
else:
slot.embedder = metadata["embedder"]
# elif metadata["embedder"] == EnumEmbedderTypes.hubert.value:
# slot.embedder = EnumEmbedderTypes.hubert
# elif metadata["embedder"] == EnumEmbedderTypes.contentvec.value:
# slot.embedder = EnumEmbedderTypes.contentvec
# elif metadata["embedder"] == EnumEmbedderTypes.hubert_jp.value:
# slot.embedder = EnumEmbedderTypes.hubert_jp
# else:
# raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder")
slot.f0 = metadata["f0"]
slot.modelType = EnumInferenceTypes.onnxRVC.value if slot.f0 else EnumInferenceTypes.onnxRVCNono.value
slot.samplingRate = metadata["samplingRate"]
slot.deprecated = False
except Exception as e:
slot.modelType = EnumInferenceTypes.onnxRVC.value
slot.embChannels = 256
slot.embedder = EnumEmbedderTypes.hubert.value
slot.f0 = True
slot.samplingRate = 48000
slot.deprecated = True
print("[Voice Changer] setInfoByONNX", e)
print("[Voice Changer] ############## !!!! CAUTION !!!! ####################")
print("[Voice Changer] This onnxfie is depricated. Please regenerate onnxfile.")
print("[Voice Changer] ############## !!!! CAUTION !!!! ####################")
del tmp_onnx_session

View File

@ -7,12 +7,9 @@ class RVCSettings:
dstId: int = 0
f0Detector: str = "harvest" # dio or harvest
tran: int = 20
tran: int = 12
silentThreshold: float = 0.00001
extraConvertSize: int = 1024 * 32
clusterInferRatio: float = 0.1
framework: str = "PyTorch" # PyTorch or ONNX
extraConvertSize: int = 1024 * 4
indexRatio: float = 0
protect: float = 0.5
@ -21,8 +18,8 @@ class RVCSettings:
modelSamplingRate: int = 48000
speakers: dict[str, int] = field(default_factory=lambda: {})
isHalf: int = 1 # 0:off, 1:on
enableDirectML: int = 0 # 0:off, 1:on
# isHalf: int = 1 # 0:off, 1:on
# enableDirectML: int = 0 # 0:off, 1:on
# ↓mutableな物だけ列挙
intData = [
"gpu",
@ -30,11 +27,7 @@ class RVCSettings:
"tran",
"extraConvertSize",
"rvcQuality",
"modelSamplingRate",
"silenceFront",
"modelSlotIndex",
"isHalf",
"enableDirectML",
]
floatData = ["silentThreshold", "indexRatio", "protect"]
strData = ["framework", "f0Detector"]
strData = ["f0Detector"]

View File

@ -7,7 +7,7 @@ from voice_changer.Local.ServerDevice import ServerDevice, ServerDeviceCallbacks
from voice_changer.ModelSlotManager import ModelSlotManager
from voice_changer.VoiceChanger import VoiceChanger
from const import UPLOAD_DIR, ModelType
from voice_changer.utils.LoadModelParams import LoadModelParamFile, LoadModelParams, LoadModelParams2
from voice_changer.utils.LoadModelParams import LoadModelParams
from voice_changer.utils.VoiceChangerModel import AudioInOut
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
from dataclasses import dataclass, asdict, field
@ -85,20 +85,15 @@ class VoiceChangerManager(ServerDeviceCallbacks):
cls._instance.voiceChanger = VoiceChanger(params)
return cls._instance
def loadModel(self, props: LoadModelParams):
paramDict = props.params
if paramDict["sampleId"] is not None:
def loadModel(self, params: LoadModelParams):
if params.isSampleMode:
# サンプルダウンロード
downloadSample(self.params.sample_mode, paramDict["sampleId"], self.params.model_dir, props.slot, {"useIndex": paramDict["rvcIndexDownload"]})
downloadSample(self.params.sample_mode, params.sampleId, self.params.model_dir, params.slot, {"useIndex": params.params["rvcIndexDownload"]})
self.modelSlotManager.getAllSlotInfo(reload=True)
info = {"status": "OK"}
return info
elif paramDict["voiceChangerType"]:
# 新しいアップローダ
# Dataを展開
params = LoadModelParams2(**paramDict)
params.files = [LoadModelParamFile(**x) for x in paramDict["files"]]
else:
# アップローダ
# ファイルをslotにコピー
for file in params.files:
print("FILE", file)
@ -116,42 +111,32 @@ class VoiceChangerManager(ServerDeviceCallbacks):
# メタデータ作成(各VCで定義)
if params.voiceChangerType == "RVC":
from voice_changer.RVC.RVC import RVC # 起動時にインポートするとパラメータが取れない。
from voice_changer.RVC.RVCModelSlotGenerator import RVCModelSlotGenerator # 起動時にインポートするとパラメータが取れない。
slotInfo = RVC.loadModel2(params)
slotInfo = RVCModelSlotGenerator.loadModel(params)
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
elif params.voiceChangerType == "MMVCv13":
from voice_changer.MMVCv13.MMVCv13 import MMVCv13
slotInfo = MMVCv13.loadModel2(params)
slotInfo = MMVCv13.loadModel(params)
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
elif params.voiceChangerType == "MMVCv15":
from voice_changer.MMVCv15.MMVCv15 import MMVCv15
slotInfo = MMVCv15.loadModel2(params)
slotInfo = MMVCv15.loadModel(params)
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
elif params.voiceChangerType == "so-vits-svc-40":
from voice_changer.SoVitsSvc40.SoVitsSvc40 import SoVitsSvc40
slotInfo = SoVitsSvc40.loadModel2(params)
slotInfo = SoVitsSvc40.loadModel(params)
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
elif params.voiceChangerType == "DDSP-SVC":
from voice_changer.DDSP_SVC.DDSP_SVC import DDSP_SVC
slotInfo = DDSP_SVC.loadModel2(params)
slotInfo = DDSP_SVC.loadModel(params)
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
print("params", params)
else:
# 古いアップローダ
print("[Voice Canger]: upload models........")
info = self.voiceChanger.loadModel(props)
if hasattr(info, "status") and info["status"] == "NG":
return info
else:
info["status"] = "OK"
return info
def get_info(self):
data = asdict(self.settings)
data["gpus"] = self.gpus

View File

@ -1,27 +1,8 @@
from dataclasses import dataclass
from typing import Any
from const import VoiceChangerType
from typing import Literal, TypeAlias
@dataclass
class FilePaths:
configFilename: str | None
pyTorchModelFilename: str | None
onnxModelFilename: str | None
clusterTorchModelFilename: str | None
featureFilename: str | None
indexFilename: str | None
@dataclass
class LoadModelParams:
slot: int
isHalf: bool
params: Any
LoadModelParamFileKind: TypeAlias = Literal[
"mmvcv13Config",
"mmvcv13Model",
@ -47,9 +28,10 @@ class LoadModelParamFile:
@dataclass
class LoadModelParams2:
class LoadModelParams:
voiceChangerType: VoiceChangerType
slot: int
isSampleMode: bool
sampleId: str
files: list[LoadModelParamFile]
params: dict

View File

@ -0,0 +1,9 @@
from typing import Protocol
from voice_changer.utils.LoadModelParams import LoadModelParams
class ModelSlotGenerator(Protocol):
@classmethod
def loadModel(self, params: LoadModelParams):
...

View File

@ -21,10 +21,8 @@ class VoiceChangerModel(Protocol):
def inference(self, data: tuple[Any, ...]) -> Any:
...
def generate_input(
self, newData: AudioInOut, inputSize: int, crossfadeSize: int
) -> tuple[Any, ...]:
def generate_input(self, newData: AudioInOut, inputSize: int, crossfadeSize: int, solaSearchFrame: int) -> tuple[Any, ...]:
...
def update_settings(self, key: str, val: Any) -> bool:
def update_settings(self, key: str, val: int | float | str) -> bool:
...