wip:support m1 mac 1

This commit is contained in:
wataru 2023-06-21 09:18:51 +09:00
parent 06a5250f61
commit e6b191abd2
22 changed files with 217 additions and 629 deletions

File diff suppressed because one or more lines are too long

View File

@ -1,6 +1,6 @@
import React, { useMemo, useState } from "react"; import React, { useMemo, useState } from "react";
import { useAppState } from "../../001_provider/001_AppStateProvider"; import { useAppState } from "../../001_provider/001_AppStateProvider";
import { InitialFileUploadSetting } from "@dannadori/voice-changer-client-js"; import { ModelUploadSetting } from "@dannadori/voice-changer-client-js";
import { useMessageBuilder } from "../../hooks/useMessageBuilder"; import { useMessageBuilder } from "../../hooks/useMessageBuilder";
import { ModelSlotManagerDialogScreen } from "./904_ModelSlotManagerDialog"; import { ModelSlotManagerDialogScreen } from "./904_ModelSlotManagerDialog";
@ -51,15 +51,18 @@ export const SampleDownloaderScreen = (props: SampleDownloaderScreenProps) => {
) )
const onDownloadSampleClicked = async (id: string) => { const onDownloadSampleClicked = async (id: string) => {
serverSetting.fileUploadSettings[props.targetIndex] = { const uploadParams: ModelUploadSetting = {
...InitialFileUploadSetting, voiceChangerType: "RVC",
rvcModel: null, slot: props.targetIndex,
rvcIndex: null, isSampleMode: true,
sampleId: id, sampleId: id,
isSampleMode: true files: [],
params: {
rvcIndexDownload: true
}
} }
try { try {
await serverSetting.loadModel(props.targetIndex) await serverSetting.uploadModel(uploadParams)
} catch (e) { } catch (e) {
alert(e) alert(e)
} }

View File

@ -41,6 +41,7 @@ export const FileUploaderScreen = (props: FileUploaderScreenProps) => {
isSampleMode: false, isSampleMode: false,
sampleId: null, sampleId: null,
files: [], files: [],
params: {}
}) })
}, [props.targetIndex, voiceChangerType]) }, [props.targetIndex, voiceChangerType])

View File

@ -51,6 +51,7 @@ export type ModelUploadSetting = {
sampleId: string | null sampleId: string | null
files: ModelFile[] files: ModelFile[]
params: any
} }
export type ModelFileForServer = Omit<ModelFile, "file"> & { export type ModelFileForServer = Omit<ModelFile, "file"> & {
name: string, name: string,
@ -341,7 +342,6 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
} }
}, [props.voiceChangerClient]) }, [props.voiceChangerClient])
// 古いアップローダ新GUIへ以降まで、当分残しておく。 // 古いアップローダ新GUIへ以降まで、当分残しておく。
const loadModel = useMemo(() => { const loadModel = useMemo(() => {
return async (slot: number) => { return async (slot: number) => {

View File

@ -7,7 +7,7 @@ from const import RVCSampleMode, getSampleJsonAndModelIds
from data.ModelSample import ModelSamples, generateModelSample from data.ModelSample import ModelSamples, generateModelSample
from data.ModelSlot import RVCModelSlot from data.ModelSlot import RVCModelSlot
from voice_changer.ModelSlotManager import ModelSlotManager from voice_changer.ModelSlotManager import ModelSlotManager
from voice_changer.RVC.ModelSlotGenerator import _setInfoByONNX, _setInfoByPytorch from voice_changer.RVC.RVCModelSlotGenerator import RVCModelSlotGenerator
from downloader.Downloader import download, download_no_tqdm from downloader.Downloader import download, download_no_tqdm
@ -166,7 +166,7 @@ def _downloadSamples(samples: list[ModelSamples], sampleModelIds: list[Tuple[str
slotInfo = modelSlotManager.get_slot_info(targetSlotIndex) slotInfo = modelSlotManager.get_slot_info(targetSlotIndex)
if slotInfo.voiceChangerType == "RVC": if slotInfo.voiceChangerType == "RVC":
if slotInfo.isONNX: if slotInfo.isONNX:
_setInfoByONNX(slotInfo) RVCModelSlotGenerator._setInfoByONNX(slotInfo)
else: else:
_setInfoByPytorch(slotInfo) RVCModelSlotGenerator._setInfoByPytorch(slotInfo)
modelSlotManager.save_model_slot(targetSlotIndex, slotInfo) modelSlotManager.save_model_slot(targetSlotIndex, slotInfo)

View File

@ -1,7 +1,6 @@
import sys import sys
import json import json
import os import os
import shutil
from typing import Union from typing import Union
from fastapi import APIRouter from fastapi import APIRouter
from fastapi.encoders import jsonable_encoder from fastapi.encoders import jsonable_encoder
@ -12,7 +11,7 @@ from restapi.mods.FileUploader import upload_file, concat_file_chunks
from voice_changer.VoiceChangerManager import VoiceChangerManager from voice_changer.VoiceChangerManager import VoiceChangerManager
from const import MODEL_DIR, UPLOAD_DIR, ModelType from const import MODEL_DIR, UPLOAD_DIR, ModelType
from voice_changer.utils.LoadModelParams import LoadModelParams from voice_changer.utils.LoadModelParams import LoadModelParamFile, LoadModelParams
os.makedirs(UPLOAD_DIR, exist_ok=True) os.makedirs(UPLOAD_DIR, exist_ok=True)
@ -88,26 +87,11 @@ class MMVC_Rest_Fileuploader:
try: try:
paramDict = json.loads(params) paramDict = json.loads(params)
print("paramDict", paramDict) print("paramDict", paramDict)
if paramDict["voiceChangerType"]: loadModelparams = LoadModelParams(**paramDict)
# 新しいアップローダ用 loadModelparams.files = [LoadModelParamFile(**x) for x in paramDict["files"]]
print("NEW UPLOADER") # print("paramDict", loadModelparams)
props: LoadModelParams = LoadModelParams(slot=slot, isHalf=isHalf, params=paramDict)
else:
# 古いアップローダ用
# Change Filepath
newFilesDict = {}
for key, val in paramDict["files"].items():
if val != "-" and val != "":
uploadPath = os.path.join(UPLOAD_DIR, val)
storePath = os.path.join(UPLOAD_DIR, f"{slot}", val)
storeDir = os.path.dirname(storePath)
os.makedirs(storeDir, exist_ok=True)
shutil.move(uploadPath, storePath)
newFilesDict[key] = storePath
paramDict["files"] = newFilesDict
props = LoadModelParams(slot=slot, isHalf=isHalf, params=paramDict)
info = self.voiceChangerManager.loadModel(props) info = self.voiceChangerManager.loadModel(loadModelparams)
json_compatible_item_data = jsonable_encoder(info) json_compatible_item_data = jsonable_encoder(info)
return JSONResponse(content=json_compatible_item_data) return JSONResponse(content=json_compatible_item_data)
except Exception as e: except Exception as e:

View File

@ -1,14 +0,0 @@
from fastapi.responses import FileResponse
import os
def mod_get_model(modelFile: str):
modelPath = os.path.join("MMVC_Trainer/logs", modelFile)
return FileResponse(path=modelPath)
def mod_delete_model(modelFile: str):
modelPath = os.path.join("MMVC_Trainer/logs", modelFile)
os.remove(modelPath)
return {"Model deleted": f"{modelFile}"}

View File

@ -1,23 +0,0 @@
from fastapi.responses import JSONResponse
from fastapi.encoders import jsonable_encoder
from restapi.utils.files import get_file_list
import os
def mod_get_models():
gModels = get_file_list(f'MMVC_Trainer/logs/G*.pth')
dModels = get_file_list(f'MMVC_Trainer/logs/D*.pth')
configs = get_file_list(f'MMVC_Trainer/logs/config.json')
models = []
models.extend(gModels)
models.extend(dModels)
models.extend(configs)
models = [ os.path.basename(x) for x in models]
models = sorted(models)
data = {
"models":models
}
json_compatible_item_data = jsonable_encoder(data)
return JSONResponse(content=json_compatible_item_data)

View File

@ -1,26 +0,0 @@
from fastapi.responses import JSONResponse
from fastapi.encoders import jsonable_encoder
import os
MULTI_SPEAKER_SETTING_PATH = "MMVC_Trainer/dataset/multi_speaker_correspondence.txt"
def mod_get_multi_speaker_setting():
data = {}
if os.path.isfile(MULTI_SPEAKER_SETTING_PATH) == False:
with open(MULTI_SPEAKER_SETTING_PATH, "w") as f:
f.write("")
f.flush()
f.close()
with open(MULTI_SPEAKER_SETTING_PATH, "r") as f:
setting = f.read()
data["multi_speaker_setting"] = setting
json_compatible_item_data = jsonable_encoder(data)
return JSONResponse(content=json_compatible_item_data)
def mod_post_multi_speaker_setting(setting:str):
with open(MULTI_SPEAKER_SETTING_PATH, "w") as f:
f.write(setting)
f.flush()
f.close()
return {"Write Multispeaker setting": f"{setting}"}

View File

@ -1,15 +0,0 @@
import shutil
from restapi.mods.Trainer_MultiSpeakerSetting import MULTI_SPEAKER_SETTING_PATH
def mod_delete_speaker(speaker:str):
shutil.rmtree(f"MMVC_Trainer/dataset/textful/{speaker}")
with open(MULTI_SPEAKER_SETTING_PATH, "r") as f:
setting = f.readlines()
filtered = filter(lambda x: x.startswith(f"{speaker}|")==False, setting)
with open(MULTI_SPEAKER_SETTING_PATH, "w") as f:
f.writelines(list(filtered))
f.flush()
f.close()
return {"Speaker deleted": f"{speaker}"}

View File

@ -1,28 +0,0 @@
from fastapi.responses import JSONResponse
from fastapi.encoders import jsonable_encoder
import os, base64
def mod_get_speaker_voice(speaker:str, voice:str):
wav_file = f'MMVC_Trainer/dataset/textful/{speaker}/wav/{voice}.wav'
text_file = f'MMVC_Trainer/dataset/textful/{speaker}/text/{voice}.txt'
readable_text_file = f'MMVC_Trainer/dataset/textful/{speaker}/readable_text/{voice}.txt'
data = {}
if os.path.exists(wav_file):
with open(wav_file, "rb") as f:
wav_data = f.read()
wav_data_base64 = base64.b64encode(wav_data).decode('utf-8')
data["wav"] = wav_data_base64
if os.path.exists(text_file):
with open(text_file, "r") as f:
text_data = f.read()
data["text"] = text_data
if os.path.exists(readable_text_file):
with open(readable_text_file, "r") as f:
text_data = f.read()
data["readable_text"] = text_data
json_compatible_item_data = jsonable_encoder(data)
return JSONResponse(content=json_compatible_item_data)

View File

@ -1,22 +0,0 @@
from fastapi.responses import JSONResponse
from fastapi.encoders import jsonable_encoder
from restapi.utils.files import get_file_list
import os
def mod_get_speaker_voices(speaker:str):
voices = get_file_list(f'MMVC_Trainer/dataset/textful/{speaker}/wav/*.wav')
texts = get_file_list(f'MMVC_Trainer/dataset/textful/{speaker}/text/*.txt')
readable_texts = get_file_list(f'MMVC_Trainer/dataset/textful/{speaker}/readable_text/*.txt')
items = voices
items.extend(texts)
items.extend(readable_texts)
items = [ os.path.splitext(os.path.basename(x))[0] for x in items]
items = sorted(set(items))
data = {
"voices":items
}
json_compatible_item_data = jsonable_encoder(data)
return JSONResponse(content=json_compatible_item_data)

View File

@ -1,15 +0,0 @@
from fastapi.responses import JSONResponse
from fastapi.encoders import jsonable_encoder
from restapi.utils.files import get_dir_list
import os
# CreateはFileUploaderで実装。
def mod_get_speakers():
os.makedirs("MMVC_Trainer/dataset/textful", exist_ok=True)
speakers = get_dir_list("MMVC_Trainer/dataset/textful/")
data = {
"speakers":sorted(speakers)
}
json_compatible_item_data = jsonable_encoder(data)
return JSONResponse(content=json_compatible_item_data)

View File

@ -1,176 +0,0 @@
import subprocess,os
from restapi.utils.files import get_file_list
from fastapi.responses import JSONResponse
from fastapi.encoders import jsonable_encoder
LOG_DIR = "info"
train_proc = None
SUCCESS = 0
ERROR = -1
### Submodule for Pre train
def sync_exec(cmd:str, log_path:str, cwd=None):
shortCmdStr = cmd[:20]
try:
with open(log_path, 'w') as log_file:
if cwd == None:
proc = subprocess.run(cmd, shell=True, text=True, stdout=log_file, stderr=log_file)
else:
proc = subprocess.run(cmd, shell=True, text=True, stdout=log_file, stderr=log_file, cwd=cwd)
print(f"{shortCmdStr} returncode:{proc.returncode}")
if proc.returncode != 0:
print(f"{shortCmdStr} exception:")
return (ERROR, f"returncode:{proc.returncode}")
except Exception as e:
print(f"{shortCmdStr} exception:", str(e))
return (ERROR, str(e))
return (SUCCESS, "success")
def sync_exec_with_stdout(cmd:str, log_path:str):
shortCmdStr = cmd[:20]
try:
with open(log_path, 'w') as log_file:
proc = subprocess.run(cmd, shell=True, text=True, stdout=subprocess.PIPE,
stderr=log_file, cwd="MMVC_Trainer")
print(f"STDOUT{shortCmdStr}",proc.stdout)
except Exception as e:
print(f"{shortCmdStr} exception:", str(e))
return (ERROR, str(e))
return (SUCCESS, proc.stdout)
def create_dataset():
cmd = "python3 create_dataset_jtalk.py -f train_config -s 24000 -m dataset/multi_speaker_correspondence.txt"
log_file = os.path.join(LOG_DIR, "log_create_dataset_jtalk.txt")
res = sync_exec(cmd, log_file, "MMVC_Trainer")
return res
def set_batch_size(batch:int):
cmd = "sed -i 's/\"batch_size\": [0-9]*/\"batch_size\": " + str(batch) + "/' MMVC_Trainer/configs/baseconfig.json"
log_file = os.path.join(LOG_DIR, "log_set_batch_size.txt")
res = sync_exec(cmd, log_file)
return res
def set_dummy_device_count():
cmd = 'sed -ie "s/torch.cuda.device_count()/1/" MMVC_Trainer/train_ms.py'
log_file = os.path.join(LOG_DIR, "log_set_dummy_device_count.txt")
res = sync_exec(cmd, log_file)
return res
### Submodule for Train
def exec_training(enable_finetuning:bool, GModel:str, DModel:str):
global train_proc
log_file = os.path.join(LOG_DIR, "training.txt")
# トレーニング開始確認(二重起動回避)
if train_proc != None:
status = train_proc.poll()
if status != None:
print("Training have ended.", status)
train_proc = None
else:
print("Training have stated.")
return (ERROR, "Training have started")
try:
with open(log_file, 'w') as log_file:
if enable_finetuning == True:
GModelPath = os.path.join("logs", GModel) # 実行時にcwdを指定しているのでフォルダはlogsでよい。
DModelPath = os.path.join("logs", DModel)
cmd = f'python3 train_ms.py -c configs/train_config.json -m ./ -fg {GModelPath} -fd {DModelPath}'
else:
cmd = 'python3 train_ms.py -c configs/train_config.json -m ./'
print("exec:",cmd)
train_proc = subprocess.Popen("exec "+cmd, shell=True, text=True, stdout=log_file, stderr=log_file, cwd="MMVC_Trainer")
print("Training stated")
print(f"returncode:{train_proc.returncode}")
except Exception as e:
print("start training exception:", str(e))
return (ERROR, str(e))
return (SUCCESS, "success")
def stop_training():
global train_proc
if train_proc == None:
print("Training have not stated.")
return (ERROR, "Training have not stated.")
status = train_proc.poll()
if status != None:
print("Training have already ended.", status)
train_proc = None
return (ERROR, "Training have already ended. " + status)
else:
train_proc.kill()
print("Training have stoped.")
return (SUCCESS, "success")
### Main
def mod_post_pre_training(batch:int):
res = set_batch_size(batch)
if res[0] == ERROR:
return {"result":"failed", "detail": f"Preprocess(set_batch_size) failed. {res[1]}"}
res = set_dummy_device_count()
if res[0] == ERROR:
return {"result":"failed", "detail": f"Preprocess(set_dummy_device_count) failed. {res[1]}"}
res = create_dataset()
if res[0] == ERROR:
return {"result":"failed", "detail": f"Preprocess failed(create_dataset). {res[1]}"}
return {"result":"success", "detail": f"Preprocess succeeded. {res[1]}"}
def mod_post_start_training(enable_finetuning:str, GModel:str, DModel:str):
print("START_TRAINING:::::::", enable_finetuning, GModel, DModel)
res = exec_training(enable_finetuning, GModel, DModel)
if res[0] == ERROR:
return {"result":"failed", "detail": f"Start training failed. {res[1]}"}
return {"result":"success", "detail": f"Start training succeeded. {res[1]}"}
def mod_post_stop_training():
res = stop_training()
if res[0] == ERROR:
return {"result":"failed", "detail": f"Stop training failed. {res[1]}"}
return {"result":"success", "detail": f"Stop training succeeded. {res[1]}"}
### DEBUG
def mod_get_related_files():
files = get_file_list(os.path.join(LOG_DIR,"*"))
files.extend([
"MMVC_Trainer/dataset/multi_speaker_correspondence.txt",
"MMVC_Trainer/train_ms.py",
])
files.extend(
get_file_list("MMVC_Trainer/configs/*")
)
res = []
for f in files:
size = os.path.getsize(f)
data = ""
if size < 1024*1024:
with open(f, "r") as input:
data = input.read()
res.append({
"name":f,
"size":size,
"data":data
})
json_compatible_item_data = jsonable_encoder(res)
return JSONResponse(content=json_compatible_item_data)
def mod_get_tail_training_log(num:int):
training_log_file = os.path.join(LOG_DIR, "training.txt")
res = sync_exec(f"cat {training_log_file} | sed -e 's/.*\r//' > /tmp/out","/dev/null")
cmd = f'tail -n {num} /tmp/out'
res = sync_exec_with_stdout(cmd, "/dev/null")
if res[0] == ERROR:
return {"result":"failed", "detail": f"Tail training log failed. {res[1]}"}
return {"result":"success", "detail":res[1]}

View File

@ -1,126 +0,0 @@
from const import EnumEmbedderTypes, EnumInferenceTypes
import torch
import onnxruntime
import json
from data.ModelSlot import ModelSlot
def _setInfoByPytorch(slot: ModelSlot):
cpt = torch.load(slot.modelFile, map_location="cpu")
config_len = len(cpt["config"])
if config_len == 18:
# Original RVC
slot.f0 = True if cpt["f0"] == 1 else False
version = cpt.get("version", "v1")
if version is None or version == "v1":
slot.modelType = EnumInferenceTypes.pyTorchRVC.value if slot.f0 else EnumInferenceTypes.pyTorchRVCNono.value
slot.embChannels = 256
slot.embOutputLayer = 9
slot.useFinalProj = True
slot.embedder = EnumEmbedderTypes.hubert.value
print("[Voice Changer] Official Model(pyTorch) : v1")
else:
slot.modelType = EnumInferenceTypes.pyTorchRVCv2.value if slot.f0 else EnumInferenceTypes.pyTorchRVCv2Nono.value
slot.embChannels = 768
slot.embOutputLayer = 12
slot.useFinalProj = False
slot.embedder = EnumEmbedderTypes.hubert.value
print("[Voice Changer] Official Model(pyTorch) : v2")
else:
# DDPN RVC
slot.f0 = True if cpt["f0"] == 1 else False
slot.modelType = EnumInferenceTypes.pyTorchWebUI.value if slot.f0 else EnumInferenceTypes.pyTorchWebUINono.value
slot.embChannels = cpt["config"][17]
slot.embOutputLayer = cpt["embedder_output_layer"] if "embedder_output_layer" in cpt else 9
if slot.embChannels == 256:
slot.useFinalProj = True
else:
slot.useFinalProj = False
# DDPNモデルの情報を表示
if slot.embChannels == 256 and slot.embOutputLayer == 9 and slot.useFinalProj is True:
print("[Voice Changer] DDPN Model(pyTorch) : Official v1 like")
elif slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False:
print("[Voice Changer] DDPN Model(pyTorch): Official v2 like")
else:
print(f"[Voice Changer] DDPN Model(pyTorch): ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}")
slot.embedder = cpt["embedder_name"]
if slot.embedder.endswith("768"):
slot.embedder = slot.embedder[:-3]
# if slot.embedder == EnumEmbedderTypes.hubert.value:
# slot.embedder = EnumEmbedderTypes.hubert
# elif slot.embedder == EnumEmbedderTypes.contentvec.value:
# slot.embedder = EnumEmbedderTypes.contentvec
# elif slot.embedder == EnumEmbedderTypes.hubert_jp.value:
# slot.embedder = EnumEmbedderTypes.hubert_jp
# else:
# raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder")
slot.samplingRate = cpt["config"][-1]
del cpt
def _setInfoByONNX(slot: ModelSlot):
tmp_onnx_session = onnxruntime.InferenceSession(slot.modelFile, providers=["CPUExecutionProvider"])
modelmeta = tmp_onnx_session.get_modelmeta()
try:
metadata = json.loads(modelmeta.custom_metadata_map["metadata"])
# slot.modelType = metadata["modelType"]
slot.embChannels = metadata["embChannels"]
slot.embOutputLayer = metadata["embOutputLayer"] if "embOutputLayer" in metadata else 9
slot.useFinalProj = metadata["useFinalProj"] if "useFinalProj" in metadata else True if slot.embChannels == 256 else False
if slot.embChannels == 256:
slot.useFinalProj = True
else:
slot.useFinalProj = False
# ONNXモデルの情報を表示
if slot.embChannels == 256 and slot.embOutputLayer == 9 and slot.useFinalProj is True:
print("[Voice Changer] ONNX Model: Official v1 like")
elif slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False:
print("[Voice Changer] ONNX Model: Official v2 like")
else:
print(f"[Voice Changer] ONNX Model: ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}")
if "embedder" not in metadata:
slot.embedder = EnumEmbedderTypes.hubert.value
else:
slot.embedder = metadata["embedder"]
# elif metadata["embedder"] == EnumEmbedderTypes.hubert.value:
# slot.embedder = EnumEmbedderTypes.hubert
# elif metadata["embedder"] == EnumEmbedderTypes.contentvec.value:
# slot.embedder = EnumEmbedderTypes.contentvec
# elif metadata["embedder"] == EnumEmbedderTypes.hubert_jp.value:
# slot.embedder = EnumEmbedderTypes.hubert_jp
# else:
# raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder")
slot.f0 = metadata["f0"]
slot.modelType = EnumInferenceTypes.onnxRVC.value if slot.f0 else EnumInferenceTypes.onnxRVCNono.value
slot.samplingRate = metadata["samplingRate"]
slot.deprecated = False
except Exception as e:
slot.modelType = EnumInferenceTypes.onnxRVC.value
slot.embChannels = 256
slot.embedder = EnumEmbedderTypes.hubert.value
slot.f0 = True
slot.samplingRate = 48000
slot.deprecated = True
print("[Voice Changer] setInfoByONNX", e)
print("[Voice Changer] ############## !!!! CAUTION !!!! ####################")
print("[Voice Changer] This onnxfie is depricated. Please regenerate onnxfile.")
print("[Voice Changer] ############## !!!! CAUTION !!!! ####################")
del tmp_onnx_session

View File

@ -20,14 +20,10 @@ if sys.platform.startswith("darwin"):
else: else:
sys.path.append("RVC") sys.path.append("RVC")
from voice_changer.RVC.ModelSlotGenerator import (
_setInfoByONNX,
_setInfoByPytorch,
)
from voice_changer.RVC.RVCSettings import RVCSettings from voice_changer.RVC.RVCSettings import RVCSettings
from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager
from voice_changer.utils.LoadModelParams import LoadModelParams2 from voice_changer.utils.VoiceChangerModel import AudioInOut, VoiceChangerModel
from voice_changer.utils.VoiceChangerModel import AudioInOut
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
from voice_changer.RVC.onnxExporter.export2onnx import export2onnx from voice_changer.RVC.onnxExporter.export2onnx import export2onnx
from voice_changer.RVC.pitchExtractor.PitchExtractorManager import PitchExtractorManager from voice_changer.RVC.pitchExtractor.PitchExtractorManager import PitchExtractorManager
@ -35,10 +31,10 @@ from voice_changer.RVC.pipeline.PipelineGenerator import createPipeline
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
from voice_changer.RVC.pipeline.Pipeline import Pipeline from voice_changer.RVC.pipeline.Pipeline import Pipeline
from Exceptions import DeviceCannotSupportHalfPrecisionException, NoModeLoadedException from Exceptions import DeviceCannotSupportHalfPrecisionException
class RVC: class RVC(VoiceChangerModel):
initialLoad: bool = True initialLoad: bool = True
settings: RVCSettings = RVCSettings() settings: RVCSettings = RVCSettings()
@ -53,7 +49,7 @@ class RVC:
needSwitch: bool = False needSwitch: bool = False
def __init__(self, params: VoiceChangerParams, slotInfo: RVCModelSlot): def __init__(self, params: VoiceChangerParams, slotInfo: RVCModelSlot):
print("[Voice Changer][RVC] Creating instance ") print("[Voice Changer] [RVC] Creating instance ")
EmbedderManager.initialize(params) EmbedderManager.initialize(params)
self.params = params self.params = params
@ -64,38 +60,16 @@ class RVC:
self.initialize() self.initialize()
def initialize(self): def initialize(self):
print("[Voice Changer][RVC] Initializing... ") print("[Voice Changer] [RVC] Initializing... ")
# pipelineの生成 # pipelineの生成
self.pipeline = createPipeline(self.slotInfo, self.settings.gpu, self.settings.f0Detector) self.pipeline = createPipeline(self.slotInfo, self.settings.gpu, self.settings.f0Detector)
# その他の設定 # その他の設定
self.trans = self.slotInfo.defaultTune self.settings.tran = self.slotInfo.defaultTune
self.index_ratio = self.slotInfo.defaultIndexRatio self.settings.indexRatio = self.slotInfo.defaultIndexRatio
self.protect = self.slotInfo.defaultProtect self.settings.protect = self.slotInfo.defaultProtect
self.samplingRate = self.slotInfo.samplingRate print("[Voice Changer] [RVC] Initializing... done")
print("[Voice Changer][RVC] Initializing... done")
@classmethod
def loadModel2(cls, props: LoadModelParams2):
slotInfo: RVCModelSlot = RVCModelSlot()
for file in props.files:
if file.kind == "rvcModel":
slotInfo.modelFile = file.name
elif file.kind == "rvcIndex":
slotInfo.indexFile = file.name
slotInfo.defaultTune = 0
slotInfo.defaultIndexRatio = 0
slotInfo.defaultProtect = 0.5
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
slotInfo.name = os.path.splitext(os.path.basename(slotInfo.modelFile))[0]
# slotInfo.iconFile = "/assets/icons/noimage.png"
if slotInfo.isONNX:
_setInfoByONNX(slotInfo)
else:
_setInfoByPytorch(slotInfo)
return slotInfo
def update_settings(self, key: str, val: int | float | str): def update_settings(self, key: str, val: int | float | str):
print("[Voice Changer][RVC]: update_settings", key, val) print("[Voice Changer][RVC]: update_settings", key, val)
@ -123,7 +97,7 @@ class RVC:
return data return data
def get_processing_sampling_rate(self): def get_processing_sampling_rate(self):
return self.settings.modelSamplingRate return self.slotInfo.samplingRate
def generate_input( def generate_input(
self, self,
@ -170,14 +144,6 @@ class RVC:
return (audio_buffer, convertSize, vol) return (audio_buffer, convertSize, vol)
def inference(self, data): def inference(self, data):
# if self.settings.modelSlotIndex < 0:
# print(
# "[Voice Changer] wait for loading model...",
# self.settings.modelSlotIndex,
# self.currentSlot,
# )
# raise NoModeLoadedException("model_common")
audio = data[0] audio = data[0]
convertSize = data[1] convertSize = data[1]
vol = data[2] vol = data[2]
@ -185,17 +151,13 @@ class RVC:
if vol < self.settings.silentThreshold: if vol < self.settings.silentThreshold:
return np.zeros(convertSize).astype(np.int16) return np.zeros(convertSize).astype(np.int16)
audio = torchaudio.functional.resample(audio, self.settings.modelSamplingRate, 16000, rolloff=0.99) audio = torchaudio.functional.resample(audio, self.slotInfo.samplingRate, 16000, rolloff=0.99)
repeat = 1 if self.settings.rvcQuality else 0 repeat = 1 if self.settings.rvcQuality else 0
sid = 0 sid = 0
f0_up_key = self.settings.tran f0_up_key = self.settings.tran
index_rate = self.settings.indexRatio index_rate = self.settings.indexRatio
protect = self.settings.protect protect = self.settings.protect
# if_f0 = 1 if self.settings.modelSlots[self.currentSlot].f0 else 0
# embOutputLayer = self.settings.modelSlots[self.currentSlot].embOutputLayer
# useFinalProj = self.settings.modelSlots[self.currentSlot].useFinalProj
if_f0 = 1 if self.slotInfo.f0 else 0 if_f0 = 1 if self.slotInfo.f0 else 0
embOutputLayer = self.slotInfo.embOutputLayer embOutputLayer = self.slotInfo.embOutputLayer
useFinalProj = self.slotInfo.useFinalProj useFinalProj = self.slotInfo.useFinalProj
@ -207,7 +169,7 @@ class RVC:
f0_up_key, f0_up_key,
index_rate, index_rate,
if_f0, if_f0,
self.settings.extraConvertSize / self.settings.modelSamplingRate, # extaraDataSizeの秒数。RVCのモデルのサンプリングレートで処理(★1)。 self.settings.extraConvertSize / self.slotInfo.samplingRate, # extaraDataSizeの秒数。RVCのモデルのサンプリングレートで処理(★1)。
embOutputLayer, embOutputLayer,
useFinalProj, useFinalProj,
repeat, repeat,
@ -292,36 +254,6 @@ class RVC:
# self.settings.modelSlotIndex = targetSlot # self.settings.modelSlotIndex = targetSlot
# self.currentSlot = self.settings.modelSlotIndex # self.currentSlot = self.settings.modelSlotIndex
# def update_model_default(self):
# # {"slot":9,"key":"name","val":"dogsdododg"}
# self.modelSlotManager.update_model_info(
# json.dumps(
# {
# "slot": self.currentSlot,
# "key": "defaultTune",
# "val": self.settings.tran,
# }
# )
# )
# self.modelSlotManager.update_model_info(
# json.dumps(
# {
# "slot": self.currentSlot,
# "key": "defaultIndexRatio",
# "val": self.settings.indexRatio,
# }
# )
# )
# self.modelSlotManager.update_model_info(
# json.dumps(
# {
# "slot": self.currentSlot,
# "key": "defaultProtect",
# "val": self.settings.protect,
# }
# )
# )
def get_model_current(self): def get_model_current(self):
return [ return [
{ {
@ -337,9 +269,3 @@ class RVC:
"val": self.settings.protect, "val": self.settings.protect,
}, },
] ]
# def update_model_info(self, newData: str):
# self.modelSlotManager.update_model_info(newData)
# def upload_model_assets(self, params: str):
# self.modelSlotManager.store_model_assets(params)

View File

@ -0,0 +1,152 @@
import os
from const import EnumEmbedderTypes, EnumInferenceTypes
import torch
import onnxruntime
import json
from data.ModelSlot import ModelSlot, RVCModelSlot
from voice_changer.utils.LoadModelParams import LoadModelParams
from voice_changer.utils.ModelSlotGenerator import ModelSlotGenerator
class RVCModelSlotGenerator(ModelSlotGenerator):
@classmethod
def loadModel(cls, props: LoadModelParams):
slotInfo: RVCModelSlot = RVCModelSlot()
for file in props.files:
if file.kind == "rvcModel":
slotInfo.modelFile = file.name
elif file.kind == "rvcIndex":
slotInfo.indexFile = file.name
slotInfo.defaultTune = 0
slotInfo.defaultIndexRatio = 0
slotInfo.defaultProtect = 0.5
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
slotInfo.name = os.path.splitext(os.path.basename(slotInfo.modelFile))[0]
# slotInfo.iconFile = "/assets/icons/noimage.png"
if slotInfo.isONNX:
cls._setInfoByONNX(slotInfo)
else:
cls._setInfoByPytorch(slotInfo)
return slotInfo
@classmethod
def _setInfoByPytorch(cls, slot: ModelSlot):
cpt = torch.load(slot.modelFile, map_location="cpu")
config_len = len(cpt["config"])
if config_len == 18:
# Original RVC
slot.f0 = True if cpt["f0"] == 1 else False
version = cpt.get("version", "v1")
if version is None or version == "v1":
slot.modelType = EnumInferenceTypes.pyTorchRVC.value if slot.f0 else EnumInferenceTypes.pyTorchRVCNono.value
slot.embChannels = 256
slot.embOutputLayer = 9
slot.useFinalProj = True
slot.embedder = EnumEmbedderTypes.hubert.value
print("[Voice Changer] Official Model(pyTorch) : v1")
else:
slot.modelType = EnumInferenceTypes.pyTorchRVCv2.value if slot.f0 else EnumInferenceTypes.pyTorchRVCv2Nono.value
slot.embChannels = 768
slot.embOutputLayer = 12
slot.useFinalProj = False
slot.embedder = EnumEmbedderTypes.hubert.value
print("[Voice Changer] Official Model(pyTorch) : v2")
else:
# DDPN RVC
slot.f0 = True if cpt["f0"] == 1 else False
slot.modelType = EnumInferenceTypes.pyTorchWebUI.value if slot.f0 else EnumInferenceTypes.pyTorchWebUINono.value
slot.embChannels = cpt["config"][17]
slot.embOutputLayer = cpt["embedder_output_layer"] if "embedder_output_layer" in cpt else 9
if slot.embChannels == 256:
slot.useFinalProj = True
else:
slot.useFinalProj = False
# DDPNモデルの情報を表示
if slot.embChannels == 256 and slot.embOutputLayer == 9 and slot.useFinalProj is True:
print("[Voice Changer] DDPN Model(pyTorch) : Official v1 like")
elif slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False:
print("[Voice Changer] DDPN Model(pyTorch): Official v2 like")
else:
print(f"[Voice Changer] DDPN Model(pyTorch): ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}")
slot.embedder = cpt["embedder_name"]
if slot.embedder.endswith("768"):
slot.embedder = slot.embedder[:-3]
# if slot.embedder == EnumEmbedderTypes.hubert.value:
# slot.embedder = EnumEmbedderTypes.hubert
# elif slot.embedder == EnumEmbedderTypes.contentvec.value:
# slot.embedder = EnumEmbedderTypes.contentvec
# elif slot.embedder == EnumEmbedderTypes.hubert_jp.value:
# slot.embedder = EnumEmbedderTypes.hubert_jp
# else:
# raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder")
slot.samplingRate = cpt["config"][-1]
del cpt
@classmethod
def _setInfoByONNX(cls, slot: ModelSlot):
tmp_onnx_session = onnxruntime.InferenceSession(slot.modelFile, providers=["CPUExecutionProvider"])
modelmeta = tmp_onnx_session.get_modelmeta()
try:
metadata = json.loads(modelmeta.custom_metadata_map["metadata"])
# slot.modelType = metadata["modelType"]
slot.embChannels = metadata["embChannels"]
slot.embOutputLayer = metadata["embOutputLayer"] if "embOutputLayer" in metadata else 9
slot.useFinalProj = metadata["useFinalProj"] if "useFinalProj" in metadata else True if slot.embChannels == 256 else False
if slot.embChannels == 256:
slot.useFinalProj = True
else:
slot.useFinalProj = False
# ONNXモデルの情報を表示
if slot.embChannels == 256 and slot.embOutputLayer == 9 and slot.useFinalProj is True:
print("[Voice Changer] ONNX Model: Official v1 like")
elif slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False:
print("[Voice Changer] ONNX Model: Official v2 like")
else:
print(f"[Voice Changer] ONNX Model: ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}")
if "embedder" not in metadata:
slot.embedder = EnumEmbedderTypes.hubert.value
else:
slot.embedder = metadata["embedder"]
# elif metadata["embedder"] == EnumEmbedderTypes.hubert.value:
# slot.embedder = EnumEmbedderTypes.hubert
# elif metadata["embedder"] == EnumEmbedderTypes.contentvec.value:
# slot.embedder = EnumEmbedderTypes.contentvec
# elif metadata["embedder"] == EnumEmbedderTypes.hubert_jp.value:
# slot.embedder = EnumEmbedderTypes.hubert_jp
# else:
# raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder")
slot.f0 = metadata["f0"]
slot.modelType = EnumInferenceTypes.onnxRVC.value if slot.f0 else EnumInferenceTypes.onnxRVCNono.value
slot.samplingRate = metadata["samplingRate"]
slot.deprecated = False
except Exception as e:
slot.modelType = EnumInferenceTypes.onnxRVC.value
slot.embChannels = 256
slot.embedder = EnumEmbedderTypes.hubert.value
slot.f0 = True
slot.samplingRate = 48000
slot.deprecated = True
print("[Voice Changer] setInfoByONNX", e)
print("[Voice Changer] ############## !!!! CAUTION !!!! ####################")
print("[Voice Changer] This onnxfie is depricated. Please regenerate onnxfile.")
print("[Voice Changer] ############## !!!! CAUTION !!!! ####################")
del tmp_onnx_session

View File

@ -7,12 +7,9 @@ class RVCSettings:
dstId: int = 0 dstId: int = 0
f0Detector: str = "harvest" # dio or harvest f0Detector: str = "harvest" # dio or harvest
tran: int = 20 tran: int = 12
silentThreshold: float = 0.00001 silentThreshold: float = 0.00001
extraConvertSize: int = 1024 * 32 extraConvertSize: int = 1024 * 4
clusterInferRatio: float = 0.1
framework: str = "PyTorch" # PyTorch or ONNX
indexRatio: float = 0 indexRatio: float = 0
protect: float = 0.5 protect: float = 0.5
@ -21,8 +18,8 @@ class RVCSettings:
modelSamplingRate: int = 48000 modelSamplingRate: int = 48000
speakers: dict[str, int] = field(default_factory=lambda: {}) speakers: dict[str, int] = field(default_factory=lambda: {})
isHalf: int = 1 # 0:off, 1:on # isHalf: int = 1 # 0:off, 1:on
enableDirectML: int = 0 # 0:off, 1:on # enableDirectML: int = 0 # 0:off, 1:on
# ↓mutableな物だけ列挙 # ↓mutableな物だけ列挙
intData = [ intData = [
"gpu", "gpu",
@ -30,11 +27,7 @@ class RVCSettings:
"tran", "tran",
"extraConvertSize", "extraConvertSize",
"rvcQuality", "rvcQuality",
"modelSamplingRate",
"silenceFront", "silenceFront",
"modelSlotIndex",
"isHalf",
"enableDirectML",
] ]
floatData = ["silentThreshold", "indexRatio", "protect"] floatData = ["silentThreshold", "indexRatio", "protect"]
strData = ["framework", "f0Detector"] strData = ["f0Detector"]

View File

@ -7,7 +7,7 @@ from voice_changer.Local.ServerDevice import ServerDevice, ServerDeviceCallbacks
from voice_changer.ModelSlotManager import ModelSlotManager from voice_changer.ModelSlotManager import ModelSlotManager
from voice_changer.VoiceChanger import VoiceChanger from voice_changer.VoiceChanger import VoiceChanger
from const import UPLOAD_DIR, ModelType from const import UPLOAD_DIR, ModelType
from voice_changer.utils.LoadModelParams import LoadModelParamFile, LoadModelParams, LoadModelParams2 from voice_changer.utils.LoadModelParams import LoadModelParams
from voice_changer.utils.VoiceChangerModel import AudioInOut from voice_changer.utils.VoiceChangerModel import AudioInOut
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
from dataclasses import dataclass, asdict, field from dataclasses import dataclass, asdict, field
@ -85,20 +85,15 @@ class VoiceChangerManager(ServerDeviceCallbacks):
cls._instance.voiceChanger = VoiceChanger(params) cls._instance.voiceChanger = VoiceChanger(params)
return cls._instance return cls._instance
def loadModel(self, props: LoadModelParams): def loadModel(self, params: LoadModelParams):
paramDict = props.params if params.isSampleMode:
if paramDict["sampleId"] is not None:
# サンプルダウンロード # サンプルダウンロード
downloadSample(self.params.sample_mode, paramDict["sampleId"], self.params.model_dir, props.slot, {"useIndex": paramDict["rvcIndexDownload"]}) downloadSample(self.params.sample_mode, params.sampleId, self.params.model_dir, params.slot, {"useIndex": params.params["rvcIndexDownload"]})
self.modelSlotManager.getAllSlotInfo(reload=True) self.modelSlotManager.getAllSlotInfo(reload=True)
info = {"status": "OK"} info = {"status": "OK"}
return info return info
elif paramDict["voiceChangerType"]: else:
# 新しいアップローダ # アップローダ
# Dataを展開
params = LoadModelParams2(**paramDict)
params.files = [LoadModelParamFile(**x) for x in paramDict["files"]]
# ファイルをslotにコピー # ファイルをslotにコピー
for file in params.files: for file in params.files:
print("FILE", file) print("FILE", file)
@ -116,42 +111,32 @@ class VoiceChangerManager(ServerDeviceCallbacks):
# メタデータ作成(各VCで定義) # メタデータ作成(各VCで定義)
if params.voiceChangerType == "RVC": if params.voiceChangerType == "RVC":
from voice_changer.RVC.RVC import RVC # 起動時にインポートするとパラメータが取れない。 from voice_changer.RVC.RVCModelSlotGenerator import RVCModelSlotGenerator # 起動時にインポートするとパラメータが取れない。
slotInfo = RVC.loadModel2(params) slotInfo = RVCModelSlotGenerator.loadModel(params)
self.modelSlotManager.save_model_slot(params.slot, slotInfo) self.modelSlotManager.save_model_slot(params.slot, slotInfo)
elif params.voiceChangerType == "MMVCv13": elif params.voiceChangerType == "MMVCv13":
from voice_changer.MMVCv13.MMVCv13 import MMVCv13 from voice_changer.MMVCv13.MMVCv13 import MMVCv13
slotInfo = MMVCv13.loadModel2(params) slotInfo = MMVCv13.loadModel(params)
self.modelSlotManager.save_model_slot(params.slot, slotInfo) self.modelSlotManager.save_model_slot(params.slot, slotInfo)
elif params.voiceChangerType == "MMVCv15": elif params.voiceChangerType == "MMVCv15":
from voice_changer.MMVCv15.MMVCv15 import MMVCv15 from voice_changer.MMVCv15.MMVCv15 import MMVCv15
slotInfo = MMVCv15.loadModel2(params) slotInfo = MMVCv15.loadModel(params)
self.modelSlotManager.save_model_slot(params.slot, slotInfo) self.modelSlotManager.save_model_slot(params.slot, slotInfo)
elif params.voiceChangerType == "so-vits-svc-40": elif params.voiceChangerType == "so-vits-svc-40":
from voice_changer.SoVitsSvc40.SoVitsSvc40 import SoVitsSvc40 from voice_changer.SoVitsSvc40.SoVitsSvc40 import SoVitsSvc40
slotInfo = SoVitsSvc40.loadModel2(params) slotInfo = SoVitsSvc40.loadModel(params)
self.modelSlotManager.save_model_slot(params.slot, slotInfo) self.modelSlotManager.save_model_slot(params.slot, slotInfo)
elif params.voiceChangerType == "DDSP-SVC": elif params.voiceChangerType == "DDSP-SVC":
from voice_changer.DDSP_SVC.DDSP_SVC import DDSP_SVC from voice_changer.DDSP_SVC.DDSP_SVC import DDSP_SVC
slotInfo = DDSP_SVC.loadModel2(params) slotInfo = DDSP_SVC.loadModel(params)
self.modelSlotManager.save_model_slot(params.slot, slotInfo) self.modelSlotManager.save_model_slot(params.slot, slotInfo)
print("params", params) print("params", params)
else:
# 古いアップローダ
print("[Voice Canger]: upload models........")
info = self.voiceChanger.loadModel(props)
if hasattr(info, "status") and info["status"] == "NG":
return info
else:
info["status"] = "OK"
return info
def get_info(self): def get_info(self):
data = asdict(self.settings) data = asdict(self.settings)
data["gpus"] = self.gpus data["gpus"] = self.gpus

View File

@ -1,27 +1,8 @@
from dataclasses import dataclass from dataclasses import dataclass
from typing import Any
from const import VoiceChangerType from const import VoiceChangerType
from typing import Literal, TypeAlias from typing import Literal, TypeAlias
@dataclass
class FilePaths:
configFilename: str | None
pyTorchModelFilename: str | None
onnxModelFilename: str | None
clusterTorchModelFilename: str | None
featureFilename: str | None
indexFilename: str | None
@dataclass
class LoadModelParams:
slot: int
isHalf: bool
params: Any
LoadModelParamFileKind: TypeAlias = Literal[ LoadModelParamFileKind: TypeAlias = Literal[
"mmvcv13Config", "mmvcv13Config",
"mmvcv13Model", "mmvcv13Model",
@ -47,9 +28,10 @@ class LoadModelParamFile:
@dataclass @dataclass
class LoadModelParams2: class LoadModelParams:
voiceChangerType: VoiceChangerType voiceChangerType: VoiceChangerType
slot: int slot: int
isSampleMode: bool isSampleMode: bool
sampleId: str sampleId: str
files: list[LoadModelParamFile] files: list[LoadModelParamFile]
params: dict

View File

@ -0,0 +1,9 @@
from typing import Protocol
from voice_changer.utils.LoadModelParams import LoadModelParams
class ModelSlotGenerator(Protocol):
@classmethod
def loadModel(self, params: LoadModelParams):
...

View File

@ -21,10 +21,8 @@ class VoiceChangerModel(Protocol):
def inference(self, data: tuple[Any, ...]) -> Any: def inference(self, data: tuple[Any, ...]) -> Any:
... ...
def generate_input( def generate_input(self, newData: AudioInOut, inputSize: int, crossfadeSize: int, solaSearchFrame: int) -> tuple[Any, ...]:
self, newData: AudioInOut, inputSize: int, crossfadeSize: int
) -> tuple[Any, ...]:
... ...
def update_settings(self, key: str, val: Any) -> bool: def update_settings(self, key: str, val: int | float | str) -> bool:
... ...