mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-02-09 03:37:51 +03:00
wip:support m1 mac 1
This commit is contained in:
parent
06a5250f61
commit
e6b191abd2
4
client/demo/dist/index.js
vendored
4
client/demo/dist/index.js
vendored
File diff suppressed because one or more lines are too long
@ -1,6 +1,6 @@
|
|||||||
import React, { useMemo, useState } from "react";
|
import React, { useMemo, useState } from "react";
|
||||||
import { useAppState } from "../../001_provider/001_AppStateProvider";
|
import { useAppState } from "../../001_provider/001_AppStateProvider";
|
||||||
import { InitialFileUploadSetting } from "@dannadori/voice-changer-client-js";
|
import { ModelUploadSetting } from "@dannadori/voice-changer-client-js";
|
||||||
import { useMessageBuilder } from "../../hooks/useMessageBuilder";
|
import { useMessageBuilder } from "../../hooks/useMessageBuilder";
|
||||||
import { ModelSlotManagerDialogScreen } from "./904_ModelSlotManagerDialog";
|
import { ModelSlotManagerDialogScreen } from "./904_ModelSlotManagerDialog";
|
||||||
|
|
||||||
@ -51,15 +51,18 @@ export const SampleDownloaderScreen = (props: SampleDownloaderScreenProps) => {
|
|||||||
)
|
)
|
||||||
|
|
||||||
const onDownloadSampleClicked = async (id: string) => {
|
const onDownloadSampleClicked = async (id: string) => {
|
||||||
serverSetting.fileUploadSettings[props.targetIndex] = {
|
const uploadParams: ModelUploadSetting = {
|
||||||
...InitialFileUploadSetting,
|
voiceChangerType: "RVC",
|
||||||
rvcModel: null,
|
slot: props.targetIndex,
|
||||||
rvcIndex: null,
|
isSampleMode: true,
|
||||||
sampleId: id,
|
sampleId: id,
|
||||||
isSampleMode: true
|
files: [],
|
||||||
|
params: {
|
||||||
|
rvcIndexDownload: true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
await serverSetting.loadModel(props.targetIndex)
|
await serverSetting.uploadModel(uploadParams)
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
alert(e)
|
alert(e)
|
||||||
}
|
}
|
||||||
|
@ -41,6 +41,7 @@ export const FileUploaderScreen = (props: FileUploaderScreenProps) => {
|
|||||||
isSampleMode: false,
|
isSampleMode: false,
|
||||||
sampleId: null,
|
sampleId: null,
|
||||||
files: [],
|
files: [],
|
||||||
|
params: {}
|
||||||
})
|
})
|
||||||
}, [props.targetIndex, voiceChangerType])
|
}, [props.targetIndex, voiceChangerType])
|
||||||
|
|
||||||
|
@ -51,6 +51,7 @@ export type ModelUploadSetting = {
|
|||||||
sampleId: string | null
|
sampleId: string | null
|
||||||
|
|
||||||
files: ModelFile[]
|
files: ModelFile[]
|
||||||
|
params: any
|
||||||
}
|
}
|
||||||
export type ModelFileForServer = Omit<ModelFile, "file"> & {
|
export type ModelFileForServer = Omit<ModelFile, "file"> & {
|
||||||
name: string,
|
name: string,
|
||||||
@ -341,7 +342,6 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
|||||||
}
|
}
|
||||||
}, [props.voiceChangerClient])
|
}, [props.voiceChangerClient])
|
||||||
|
|
||||||
|
|
||||||
// 古いアップローダ(新GUIへ以降まで、当分残しておく。)
|
// 古いアップローダ(新GUIへ以降まで、当分残しておく。)
|
||||||
const loadModel = useMemo(() => {
|
const loadModel = useMemo(() => {
|
||||||
return async (slot: number) => {
|
return async (slot: number) => {
|
||||||
|
@ -7,7 +7,7 @@ from const import RVCSampleMode, getSampleJsonAndModelIds
|
|||||||
from data.ModelSample import ModelSamples, generateModelSample
|
from data.ModelSample import ModelSamples, generateModelSample
|
||||||
from data.ModelSlot import RVCModelSlot
|
from data.ModelSlot import RVCModelSlot
|
||||||
from voice_changer.ModelSlotManager import ModelSlotManager
|
from voice_changer.ModelSlotManager import ModelSlotManager
|
||||||
from voice_changer.RVC.ModelSlotGenerator import _setInfoByONNX, _setInfoByPytorch
|
from voice_changer.RVC.RVCModelSlotGenerator import RVCModelSlotGenerator
|
||||||
from downloader.Downloader import download, download_no_tqdm
|
from downloader.Downloader import download, download_no_tqdm
|
||||||
|
|
||||||
|
|
||||||
@ -166,7 +166,7 @@ def _downloadSamples(samples: list[ModelSamples], sampleModelIds: list[Tuple[str
|
|||||||
slotInfo = modelSlotManager.get_slot_info(targetSlotIndex)
|
slotInfo = modelSlotManager.get_slot_info(targetSlotIndex)
|
||||||
if slotInfo.voiceChangerType == "RVC":
|
if slotInfo.voiceChangerType == "RVC":
|
||||||
if slotInfo.isONNX:
|
if slotInfo.isONNX:
|
||||||
_setInfoByONNX(slotInfo)
|
RVCModelSlotGenerator._setInfoByONNX(slotInfo)
|
||||||
else:
|
else:
|
||||||
_setInfoByPytorch(slotInfo)
|
RVCModelSlotGenerator._setInfoByPytorch(slotInfo)
|
||||||
modelSlotManager.save_model_slot(targetSlotIndex, slotInfo)
|
modelSlotManager.save_model_slot(targetSlotIndex, slotInfo)
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
import sys
|
import sys
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import shutil
|
|
||||||
from typing import Union
|
from typing import Union
|
||||||
from fastapi import APIRouter
|
from fastapi import APIRouter
|
||||||
from fastapi.encoders import jsonable_encoder
|
from fastapi.encoders import jsonable_encoder
|
||||||
@ -12,7 +11,7 @@ from restapi.mods.FileUploader import upload_file, concat_file_chunks
|
|||||||
from voice_changer.VoiceChangerManager import VoiceChangerManager
|
from voice_changer.VoiceChangerManager import VoiceChangerManager
|
||||||
|
|
||||||
from const import MODEL_DIR, UPLOAD_DIR, ModelType
|
from const import MODEL_DIR, UPLOAD_DIR, ModelType
|
||||||
from voice_changer.utils.LoadModelParams import LoadModelParams
|
from voice_changer.utils.LoadModelParams import LoadModelParamFile, LoadModelParams
|
||||||
|
|
||||||
|
|
||||||
os.makedirs(UPLOAD_DIR, exist_ok=True)
|
os.makedirs(UPLOAD_DIR, exist_ok=True)
|
||||||
@ -88,26 +87,11 @@ class MMVC_Rest_Fileuploader:
|
|||||||
try:
|
try:
|
||||||
paramDict = json.loads(params)
|
paramDict = json.loads(params)
|
||||||
print("paramDict", paramDict)
|
print("paramDict", paramDict)
|
||||||
if paramDict["voiceChangerType"]:
|
loadModelparams = LoadModelParams(**paramDict)
|
||||||
# 新しいアップローダ用
|
loadModelparams.files = [LoadModelParamFile(**x) for x in paramDict["files"]]
|
||||||
print("NEW UPLOADER")
|
# print("paramDict", loadModelparams)
|
||||||
props: LoadModelParams = LoadModelParams(slot=slot, isHalf=isHalf, params=paramDict)
|
|
||||||
else:
|
|
||||||
# 古いアップローダ用
|
|
||||||
# Change Filepath
|
|
||||||
newFilesDict = {}
|
|
||||||
for key, val in paramDict["files"].items():
|
|
||||||
if val != "-" and val != "":
|
|
||||||
uploadPath = os.path.join(UPLOAD_DIR, val)
|
|
||||||
storePath = os.path.join(UPLOAD_DIR, f"{slot}", val)
|
|
||||||
storeDir = os.path.dirname(storePath)
|
|
||||||
os.makedirs(storeDir, exist_ok=True)
|
|
||||||
shutil.move(uploadPath, storePath)
|
|
||||||
newFilesDict[key] = storePath
|
|
||||||
paramDict["files"] = newFilesDict
|
|
||||||
props = LoadModelParams(slot=slot, isHalf=isHalf, params=paramDict)
|
|
||||||
|
|
||||||
info = self.voiceChangerManager.loadModel(props)
|
info = self.voiceChangerManager.loadModel(loadModelparams)
|
||||||
json_compatible_item_data = jsonable_encoder(info)
|
json_compatible_item_data = jsonable_encoder(info)
|
||||||
return JSONResponse(content=json_compatible_item_data)
|
return JSONResponse(content=json_compatible_item_data)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -1,14 +0,0 @@
|
|||||||
|
|
||||||
from fastapi.responses import FileResponse
|
|
||||||
import os
|
|
||||||
|
|
||||||
|
|
||||||
def mod_get_model(modelFile: str):
|
|
||||||
modelPath = os.path.join("MMVC_Trainer/logs", modelFile)
|
|
||||||
return FileResponse(path=modelPath)
|
|
||||||
|
|
||||||
|
|
||||||
def mod_delete_model(modelFile: str):
|
|
||||||
modelPath = os.path.join("MMVC_Trainer/logs", modelFile)
|
|
||||||
os.remove(modelPath)
|
|
||||||
return {"Model deleted": f"{modelFile}"}
|
|
@ -1,23 +0,0 @@
|
|||||||
|
|
||||||
from fastapi.responses import JSONResponse
|
|
||||||
from fastapi.encoders import jsonable_encoder
|
|
||||||
from restapi.utils.files import get_file_list
|
|
||||||
import os
|
|
||||||
|
|
||||||
def mod_get_models():
|
|
||||||
gModels = get_file_list(f'MMVC_Trainer/logs/G*.pth')
|
|
||||||
dModels = get_file_list(f'MMVC_Trainer/logs/D*.pth')
|
|
||||||
configs = get_file_list(f'MMVC_Trainer/logs/config.json')
|
|
||||||
models = []
|
|
||||||
models.extend(gModels)
|
|
||||||
models.extend(dModels)
|
|
||||||
models.extend(configs)
|
|
||||||
models = [ os.path.basename(x) for x in models]
|
|
||||||
|
|
||||||
models = sorted(models)
|
|
||||||
data = {
|
|
||||||
"models":models
|
|
||||||
}
|
|
||||||
json_compatible_item_data = jsonable_encoder(data)
|
|
||||||
return JSONResponse(content=json_compatible_item_data)
|
|
||||||
|
|
@ -1,26 +0,0 @@
|
|||||||
from fastapi.responses import JSONResponse
|
|
||||||
from fastapi.encoders import jsonable_encoder
|
|
||||||
import os
|
|
||||||
|
|
||||||
MULTI_SPEAKER_SETTING_PATH = "MMVC_Trainer/dataset/multi_speaker_correspondence.txt"
|
|
||||||
def mod_get_multi_speaker_setting():
|
|
||||||
data = {}
|
|
||||||
if os.path.isfile(MULTI_SPEAKER_SETTING_PATH) == False:
|
|
||||||
with open(MULTI_SPEAKER_SETTING_PATH, "w") as f:
|
|
||||||
f.write("")
|
|
||||||
f.flush()
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
with open(MULTI_SPEAKER_SETTING_PATH, "r") as f:
|
|
||||||
setting = f.read()
|
|
||||||
data["multi_speaker_setting"] = setting
|
|
||||||
json_compatible_item_data = jsonable_encoder(data)
|
|
||||||
return JSONResponse(content=json_compatible_item_data)
|
|
||||||
|
|
||||||
|
|
||||||
def mod_post_multi_speaker_setting(setting:str):
|
|
||||||
with open(MULTI_SPEAKER_SETTING_PATH, "w") as f:
|
|
||||||
f.write(setting)
|
|
||||||
f.flush()
|
|
||||||
f.close()
|
|
||||||
return {"Write Multispeaker setting": f"{setting}"}
|
|
@ -1,15 +0,0 @@
|
|||||||
import shutil
|
|
||||||
from restapi.mods.Trainer_MultiSpeakerSetting import MULTI_SPEAKER_SETTING_PATH
|
|
||||||
|
|
||||||
def mod_delete_speaker(speaker:str):
|
|
||||||
shutil.rmtree(f"MMVC_Trainer/dataset/textful/{speaker}")
|
|
||||||
|
|
||||||
with open(MULTI_SPEAKER_SETTING_PATH, "r") as f:
|
|
||||||
setting = f.readlines()
|
|
||||||
|
|
||||||
filtered = filter(lambda x: x.startswith(f"{speaker}|")==False, setting)
|
|
||||||
with open(MULTI_SPEAKER_SETTING_PATH, "w") as f:
|
|
||||||
f.writelines(list(filtered))
|
|
||||||
f.flush()
|
|
||||||
f.close()
|
|
||||||
return {"Speaker deleted": f"{speaker}"}
|
|
@ -1,28 +0,0 @@
|
|||||||
from fastapi.responses import JSONResponse
|
|
||||||
from fastapi.encoders import jsonable_encoder
|
|
||||||
import os, base64
|
|
||||||
|
|
||||||
def mod_get_speaker_voice(speaker:str, voice:str):
|
|
||||||
wav_file = f'MMVC_Trainer/dataset/textful/{speaker}/wav/{voice}.wav'
|
|
||||||
text_file = f'MMVC_Trainer/dataset/textful/{speaker}/text/{voice}.txt'
|
|
||||||
readable_text_file = f'MMVC_Trainer/dataset/textful/{speaker}/readable_text/{voice}.txt'
|
|
||||||
|
|
||||||
data = {}
|
|
||||||
if os.path.exists(wav_file):
|
|
||||||
with open(wav_file, "rb") as f:
|
|
||||||
wav_data = f.read()
|
|
||||||
wav_data_base64 = base64.b64encode(wav_data).decode('utf-8')
|
|
||||||
data["wav"] = wav_data_base64
|
|
||||||
|
|
||||||
|
|
||||||
if os.path.exists(text_file):
|
|
||||||
with open(text_file, "r") as f:
|
|
||||||
text_data = f.read()
|
|
||||||
data["text"] = text_data
|
|
||||||
|
|
||||||
if os.path.exists(readable_text_file):
|
|
||||||
with open(readable_text_file, "r") as f:
|
|
||||||
text_data = f.read()
|
|
||||||
data["readable_text"] = text_data
|
|
||||||
json_compatible_item_data = jsonable_encoder(data)
|
|
||||||
return JSONResponse(content=json_compatible_item_data)
|
|
@ -1,22 +0,0 @@
|
|||||||
from fastapi.responses import JSONResponse
|
|
||||||
from fastapi.encoders import jsonable_encoder
|
|
||||||
from restapi.utils.files import get_file_list
|
|
||||||
import os
|
|
||||||
|
|
||||||
def mod_get_speaker_voices(speaker:str):
|
|
||||||
voices = get_file_list(f'MMVC_Trainer/dataset/textful/{speaker}/wav/*.wav')
|
|
||||||
|
|
||||||
texts = get_file_list(f'MMVC_Trainer/dataset/textful/{speaker}/text/*.txt')
|
|
||||||
|
|
||||||
readable_texts = get_file_list(f'MMVC_Trainer/dataset/textful/{speaker}/readable_text/*.txt')
|
|
||||||
|
|
||||||
items = voices
|
|
||||||
items.extend(texts)
|
|
||||||
items.extend(readable_texts)
|
|
||||||
items = [ os.path.splitext(os.path.basename(x))[0] for x in items]
|
|
||||||
items = sorted(set(items))
|
|
||||||
data = {
|
|
||||||
"voices":items
|
|
||||||
}
|
|
||||||
json_compatible_item_data = jsonable_encoder(data)
|
|
||||||
return JSONResponse(content=json_compatible_item_data)
|
|
@ -1,15 +0,0 @@
|
|||||||
from fastapi.responses import JSONResponse
|
|
||||||
from fastapi.encoders import jsonable_encoder
|
|
||||||
from restapi.utils.files import get_dir_list
|
|
||||||
import os
|
|
||||||
# CreateはFileUploaderで実装。
|
|
||||||
|
|
||||||
def mod_get_speakers():
|
|
||||||
os.makedirs("MMVC_Trainer/dataset/textful", exist_ok=True)
|
|
||||||
speakers = get_dir_list("MMVC_Trainer/dataset/textful/")
|
|
||||||
|
|
||||||
data = {
|
|
||||||
"speakers":sorted(speakers)
|
|
||||||
}
|
|
||||||
json_compatible_item_data = jsonable_encoder(data)
|
|
||||||
return JSONResponse(content=json_compatible_item_data)
|
|
@ -1,176 +0,0 @@
|
|||||||
import subprocess,os
|
|
||||||
from restapi.utils.files import get_file_list
|
|
||||||
from fastapi.responses import JSONResponse
|
|
||||||
from fastapi.encoders import jsonable_encoder
|
|
||||||
|
|
||||||
LOG_DIR = "info"
|
|
||||||
train_proc = None
|
|
||||||
|
|
||||||
SUCCESS = 0
|
|
||||||
ERROR = -1
|
|
||||||
### Submodule for Pre train
|
|
||||||
def sync_exec(cmd:str, log_path:str, cwd=None):
|
|
||||||
shortCmdStr = cmd[:20]
|
|
||||||
try:
|
|
||||||
with open(log_path, 'w') as log_file:
|
|
||||||
if cwd == None:
|
|
||||||
proc = subprocess.run(cmd, shell=True, text=True, stdout=log_file, stderr=log_file)
|
|
||||||
else:
|
|
||||||
proc = subprocess.run(cmd, shell=True, text=True, stdout=log_file, stderr=log_file, cwd=cwd)
|
|
||||||
print(f"{shortCmdStr} returncode:{proc.returncode}")
|
|
||||||
if proc.returncode != 0:
|
|
||||||
print(f"{shortCmdStr} exception:")
|
|
||||||
return (ERROR, f"returncode:{proc.returncode}")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"{shortCmdStr} exception:", str(e))
|
|
||||||
return (ERROR, str(e))
|
|
||||||
return (SUCCESS, "success")
|
|
||||||
|
|
||||||
def sync_exec_with_stdout(cmd:str, log_path:str):
|
|
||||||
shortCmdStr = cmd[:20]
|
|
||||||
try:
|
|
||||||
with open(log_path, 'w') as log_file:
|
|
||||||
proc = subprocess.run(cmd, shell=True, text=True, stdout=subprocess.PIPE,
|
|
||||||
stderr=log_file, cwd="MMVC_Trainer")
|
|
||||||
print(f"STDOUT{shortCmdStr}",proc.stdout)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"{shortCmdStr} exception:", str(e))
|
|
||||||
return (ERROR, str(e))
|
|
||||||
return (SUCCESS, proc.stdout)
|
|
||||||
|
|
||||||
|
|
||||||
def create_dataset():
|
|
||||||
cmd = "python3 create_dataset_jtalk.py -f train_config -s 24000 -m dataset/multi_speaker_correspondence.txt"
|
|
||||||
log_file = os.path.join(LOG_DIR, "log_create_dataset_jtalk.txt")
|
|
||||||
res = sync_exec(cmd, log_file, "MMVC_Trainer")
|
|
||||||
return res
|
|
||||||
|
|
||||||
def set_batch_size(batch:int):
|
|
||||||
cmd = "sed -i 's/\"batch_size\": [0-9]*/\"batch_size\": " + str(batch) + "/' MMVC_Trainer/configs/baseconfig.json"
|
|
||||||
log_file = os.path.join(LOG_DIR, "log_set_batch_size.txt")
|
|
||||||
res = sync_exec(cmd, log_file)
|
|
||||||
return res
|
|
||||||
|
|
||||||
def set_dummy_device_count():
|
|
||||||
cmd = 'sed -ie "s/torch.cuda.device_count()/1/" MMVC_Trainer/train_ms.py'
|
|
||||||
log_file = os.path.join(LOG_DIR, "log_set_dummy_device_count.txt")
|
|
||||||
res = sync_exec(cmd, log_file)
|
|
||||||
return res
|
|
||||||
|
|
||||||
### Submodule for Train
|
|
||||||
def exec_training(enable_finetuning:bool, GModel:str, DModel:str):
|
|
||||||
global train_proc
|
|
||||||
log_file = os.path.join(LOG_DIR, "training.txt")
|
|
||||||
|
|
||||||
# トレーニング開始確認(二重起動回避)
|
|
||||||
if train_proc != None:
|
|
||||||
status = train_proc.poll()
|
|
||||||
if status != None:
|
|
||||||
print("Training have ended.", status)
|
|
||||||
train_proc = None
|
|
||||||
else:
|
|
||||||
print("Training have stated.")
|
|
||||||
return (ERROR, "Training have started")
|
|
||||||
|
|
||||||
try:
|
|
||||||
with open(log_file, 'w') as log_file:
|
|
||||||
if enable_finetuning == True:
|
|
||||||
GModelPath = os.path.join("logs", GModel) # 実行時にcwdを指定しているのでフォルダはlogsでよい。
|
|
||||||
DModelPath = os.path.join("logs", DModel)
|
|
||||||
cmd = f'python3 train_ms.py -c configs/train_config.json -m ./ -fg {GModelPath} -fd {DModelPath}'
|
|
||||||
else:
|
|
||||||
cmd = 'python3 train_ms.py -c configs/train_config.json -m ./'
|
|
||||||
print("exec:",cmd)
|
|
||||||
train_proc = subprocess.Popen("exec "+cmd, shell=True, text=True, stdout=log_file, stderr=log_file, cwd="MMVC_Trainer")
|
|
||||||
print("Training stated")
|
|
||||||
print(f"returncode:{train_proc.returncode}")
|
|
||||||
except Exception as e:
|
|
||||||
print("start training exception:", str(e))
|
|
||||||
return (ERROR, str(e))
|
|
||||||
|
|
||||||
return (SUCCESS, "success")
|
|
||||||
|
|
||||||
def stop_training():
|
|
||||||
global train_proc
|
|
||||||
if train_proc == None:
|
|
||||||
print("Training have not stated.")
|
|
||||||
return (ERROR, "Training have not stated.")
|
|
||||||
|
|
||||||
status = train_proc.poll()
|
|
||||||
if status != None:
|
|
||||||
print("Training have already ended.", status)
|
|
||||||
train_proc = None
|
|
||||||
return (ERROR, "Training have already ended. " + status)
|
|
||||||
else:
|
|
||||||
train_proc.kill()
|
|
||||||
print("Training have stoped.")
|
|
||||||
return (SUCCESS, "success")
|
|
||||||
|
|
||||||
### Main
|
|
||||||
def mod_post_pre_training(batch:int):
|
|
||||||
res = set_batch_size(batch)
|
|
||||||
if res[0] == ERROR:
|
|
||||||
return {"result":"failed", "detail": f"Preprocess(set_batch_size) failed. {res[1]}"}
|
|
||||||
|
|
||||||
res = set_dummy_device_count()
|
|
||||||
if res[0] == ERROR:
|
|
||||||
return {"result":"failed", "detail": f"Preprocess(set_dummy_device_count) failed. {res[1]}"}
|
|
||||||
|
|
||||||
res = create_dataset()
|
|
||||||
if res[0] == ERROR:
|
|
||||||
return {"result":"failed", "detail": f"Preprocess failed(create_dataset). {res[1]}"}
|
|
||||||
|
|
||||||
return {"result":"success", "detail": f"Preprocess succeeded. {res[1]}"}
|
|
||||||
|
|
||||||
|
|
||||||
def mod_post_start_training(enable_finetuning:str, GModel:str, DModel:str):
|
|
||||||
print("START_TRAINING:::::::", enable_finetuning, GModel, DModel)
|
|
||||||
res = exec_training(enable_finetuning, GModel, DModel)
|
|
||||||
if res[0] == ERROR:
|
|
||||||
return {"result":"failed", "detail": f"Start training failed. {res[1]}"}
|
|
||||||
|
|
||||||
return {"result":"success", "detail": f"Start training succeeded. {res[1]}"}
|
|
||||||
|
|
||||||
def mod_post_stop_training():
|
|
||||||
res = stop_training()
|
|
||||||
if res[0] == ERROR:
|
|
||||||
return {"result":"failed", "detail": f"Stop training failed. {res[1]}"}
|
|
||||||
|
|
||||||
return {"result":"success", "detail": f"Stop training succeeded. {res[1]}"}
|
|
||||||
|
|
||||||
### DEBUG
|
|
||||||
def mod_get_related_files():
|
|
||||||
files = get_file_list(os.path.join(LOG_DIR,"*"))
|
|
||||||
files.extend([
|
|
||||||
"MMVC_Trainer/dataset/multi_speaker_correspondence.txt",
|
|
||||||
"MMVC_Trainer/train_ms.py",
|
|
||||||
])
|
|
||||||
files.extend(
|
|
||||||
get_file_list("MMVC_Trainer/configs/*")
|
|
||||||
)
|
|
||||||
|
|
||||||
res = []
|
|
||||||
for f in files:
|
|
||||||
size = os.path.getsize(f)
|
|
||||||
data = ""
|
|
||||||
if size < 1024*1024:
|
|
||||||
with open(f, "r") as input:
|
|
||||||
data = input.read()
|
|
||||||
|
|
||||||
res.append({
|
|
||||||
"name":f,
|
|
||||||
"size":size,
|
|
||||||
"data":data
|
|
||||||
})
|
|
||||||
|
|
||||||
json_compatible_item_data = jsonable_encoder(res)
|
|
||||||
return JSONResponse(content=json_compatible_item_data)
|
|
||||||
|
|
||||||
def mod_get_tail_training_log(num:int):
|
|
||||||
training_log_file = os.path.join(LOG_DIR, "training.txt")
|
|
||||||
res = sync_exec(f"cat {training_log_file} | sed -e 's/.*\r//' > /tmp/out","/dev/null")
|
|
||||||
cmd = f'tail -n {num} /tmp/out'
|
|
||||||
res = sync_exec_with_stdout(cmd, "/dev/null")
|
|
||||||
if res[0] == ERROR:
|
|
||||||
return {"result":"failed", "detail": f"Tail training log failed. {res[1]}"}
|
|
||||||
return {"result":"success", "detail":res[1]}
|
|
@ -1,126 +0,0 @@
|
|||||||
from const import EnumEmbedderTypes, EnumInferenceTypes
|
|
||||||
|
|
||||||
import torch
|
|
||||||
import onnxruntime
|
|
||||||
import json
|
|
||||||
|
|
||||||
from data.ModelSlot import ModelSlot
|
|
||||||
|
|
||||||
|
|
||||||
def _setInfoByPytorch(slot: ModelSlot):
|
|
||||||
cpt = torch.load(slot.modelFile, map_location="cpu")
|
|
||||||
config_len = len(cpt["config"])
|
|
||||||
|
|
||||||
if config_len == 18:
|
|
||||||
# Original RVC
|
|
||||||
slot.f0 = True if cpt["f0"] == 1 else False
|
|
||||||
version = cpt.get("version", "v1")
|
|
||||||
if version is None or version == "v1":
|
|
||||||
slot.modelType = EnumInferenceTypes.pyTorchRVC.value if slot.f0 else EnumInferenceTypes.pyTorchRVCNono.value
|
|
||||||
slot.embChannels = 256
|
|
||||||
slot.embOutputLayer = 9
|
|
||||||
slot.useFinalProj = True
|
|
||||||
slot.embedder = EnumEmbedderTypes.hubert.value
|
|
||||||
print("[Voice Changer] Official Model(pyTorch) : v1")
|
|
||||||
else:
|
|
||||||
slot.modelType = EnumInferenceTypes.pyTorchRVCv2.value if slot.f0 else EnumInferenceTypes.pyTorchRVCv2Nono.value
|
|
||||||
slot.embChannels = 768
|
|
||||||
slot.embOutputLayer = 12
|
|
||||||
slot.useFinalProj = False
|
|
||||||
slot.embedder = EnumEmbedderTypes.hubert.value
|
|
||||||
print("[Voice Changer] Official Model(pyTorch) : v2")
|
|
||||||
|
|
||||||
else:
|
|
||||||
# DDPN RVC
|
|
||||||
slot.f0 = True if cpt["f0"] == 1 else False
|
|
||||||
slot.modelType = EnumInferenceTypes.pyTorchWebUI.value if slot.f0 else EnumInferenceTypes.pyTorchWebUINono.value
|
|
||||||
slot.embChannels = cpt["config"][17]
|
|
||||||
slot.embOutputLayer = cpt["embedder_output_layer"] if "embedder_output_layer" in cpt else 9
|
|
||||||
if slot.embChannels == 256:
|
|
||||||
slot.useFinalProj = True
|
|
||||||
else:
|
|
||||||
slot.useFinalProj = False
|
|
||||||
|
|
||||||
# DDPNモデルの情報を表示
|
|
||||||
if slot.embChannels == 256 and slot.embOutputLayer == 9 and slot.useFinalProj is True:
|
|
||||||
print("[Voice Changer] DDPN Model(pyTorch) : Official v1 like")
|
|
||||||
elif slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False:
|
|
||||||
print("[Voice Changer] DDPN Model(pyTorch): Official v2 like")
|
|
||||||
else:
|
|
||||||
print(f"[Voice Changer] DDPN Model(pyTorch): ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}")
|
|
||||||
|
|
||||||
slot.embedder = cpt["embedder_name"]
|
|
||||||
if slot.embedder.endswith("768"):
|
|
||||||
slot.embedder = slot.embedder[:-3]
|
|
||||||
|
|
||||||
# if slot.embedder == EnumEmbedderTypes.hubert.value:
|
|
||||||
# slot.embedder = EnumEmbedderTypes.hubert
|
|
||||||
# elif slot.embedder == EnumEmbedderTypes.contentvec.value:
|
|
||||||
# slot.embedder = EnumEmbedderTypes.contentvec
|
|
||||||
# elif slot.embedder == EnumEmbedderTypes.hubert_jp.value:
|
|
||||||
# slot.embedder = EnumEmbedderTypes.hubert_jp
|
|
||||||
# else:
|
|
||||||
# raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder")
|
|
||||||
|
|
||||||
slot.samplingRate = cpt["config"][-1]
|
|
||||||
|
|
||||||
del cpt
|
|
||||||
|
|
||||||
|
|
||||||
def _setInfoByONNX(slot: ModelSlot):
|
|
||||||
tmp_onnx_session = onnxruntime.InferenceSession(slot.modelFile, providers=["CPUExecutionProvider"])
|
|
||||||
modelmeta = tmp_onnx_session.get_modelmeta()
|
|
||||||
try:
|
|
||||||
metadata = json.loads(modelmeta.custom_metadata_map["metadata"])
|
|
||||||
|
|
||||||
# slot.modelType = metadata["modelType"]
|
|
||||||
slot.embChannels = metadata["embChannels"]
|
|
||||||
|
|
||||||
slot.embOutputLayer = metadata["embOutputLayer"] if "embOutputLayer" in metadata else 9
|
|
||||||
slot.useFinalProj = metadata["useFinalProj"] if "useFinalProj" in metadata else True if slot.embChannels == 256 else False
|
|
||||||
|
|
||||||
if slot.embChannels == 256:
|
|
||||||
slot.useFinalProj = True
|
|
||||||
else:
|
|
||||||
slot.useFinalProj = False
|
|
||||||
|
|
||||||
# ONNXモデルの情報を表示
|
|
||||||
if slot.embChannels == 256 and slot.embOutputLayer == 9 and slot.useFinalProj is True:
|
|
||||||
print("[Voice Changer] ONNX Model: Official v1 like")
|
|
||||||
elif slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False:
|
|
||||||
print("[Voice Changer] ONNX Model: Official v2 like")
|
|
||||||
else:
|
|
||||||
print(f"[Voice Changer] ONNX Model: ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}")
|
|
||||||
|
|
||||||
if "embedder" not in metadata:
|
|
||||||
slot.embedder = EnumEmbedderTypes.hubert.value
|
|
||||||
else:
|
|
||||||
slot.embedder = metadata["embedder"]
|
|
||||||
# elif metadata["embedder"] == EnumEmbedderTypes.hubert.value:
|
|
||||||
# slot.embedder = EnumEmbedderTypes.hubert
|
|
||||||
# elif metadata["embedder"] == EnumEmbedderTypes.contentvec.value:
|
|
||||||
# slot.embedder = EnumEmbedderTypes.contentvec
|
|
||||||
# elif metadata["embedder"] == EnumEmbedderTypes.hubert_jp.value:
|
|
||||||
# slot.embedder = EnumEmbedderTypes.hubert_jp
|
|
||||||
# else:
|
|
||||||
# raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder")
|
|
||||||
|
|
||||||
slot.f0 = metadata["f0"]
|
|
||||||
slot.modelType = EnumInferenceTypes.onnxRVC.value if slot.f0 else EnumInferenceTypes.onnxRVCNono.value
|
|
||||||
slot.samplingRate = metadata["samplingRate"]
|
|
||||||
slot.deprecated = False
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
slot.modelType = EnumInferenceTypes.onnxRVC.value
|
|
||||||
slot.embChannels = 256
|
|
||||||
slot.embedder = EnumEmbedderTypes.hubert.value
|
|
||||||
slot.f0 = True
|
|
||||||
slot.samplingRate = 48000
|
|
||||||
slot.deprecated = True
|
|
||||||
|
|
||||||
print("[Voice Changer] setInfoByONNX", e)
|
|
||||||
print("[Voice Changer] ############## !!!! CAUTION !!!! ####################")
|
|
||||||
print("[Voice Changer] This onnxfie is depricated. Please regenerate onnxfile.")
|
|
||||||
print("[Voice Changer] ############## !!!! CAUTION !!!! ####################")
|
|
||||||
|
|
||||||
del tmp_onnx_session
|
|
@ -20,14 +20,10 @@ if sys.platform.startswith("darwin"):
|
|||||||
else:
|
else:
|
||||||
sys.path.append("RVC")
|
sys.path.append("RVC")
|
||||||
|
|
||||||
from voice_changer.RVC.ModelSlotGenerator import (
|
|
||||||
_setInfoByONNX,
|
|
||||||
_setInfoByPytorch,
|
|
||||||
)
|
|
||||||
from voice_changer.RVC.RVCSettings import RVCSettings
|
from voice_changer.RVC.RVCSettings import RVCSettings
|
||||||
from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager
|
from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager
|
||||||
from voice_changer.utils.LoadModelParams import LoadModelParams2
|
from voice_changer.utils.VoiceChangerModel import AudioInOut, VoiceChangerModel
|
||||||
from voice_changer.utils.VoiceChangerModel import AudioInOut
|
|
||||||
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
||||||
from voice_changer.RVC.onnxExporter.export2onnx import export2onnx
|
from voice_changer.RVC.onnxExporter.export2onnx import export2onnx
|
||||||
from voice_changer.RVC.pitchExtractor.PitchExtractorManager import PitchExtractorManager
|
from voice_changer.RVC.pitchExtractor.PitchExtractorManager import PitchExtractorManager
|
||||||
@ -35,10 +31,10 @@ from voice_changer.RVC.pipeline.PipelineGenerator import createPipeline
|
|||||||
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
|
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
|
||||||
from voice_changer.RVC.pipeline.Pipeline import Pipeline
|
from voice_changer.RVC.pipeline.Pipeline import Pipeline
|
||||||
|
|
||||||
from Exceptions import DeviceCannotSupportHalfPrecisionException, NoModeLoadedException
|
from Exceptions import DeviceCannotSupportHalfPrecisionException
|
||||||
|
|
||||||
|
|
||||||
class RVC:
|
class RVC(VoiceChangerModel):
|
||||||
initialLoad: bool = True
|
initialLoad: bool = True
|
||||||
settings: RVCSettings = RVCSettings()
|
settings: RVCSettings = RVCSettings()
|
||||||
|
|
||||||
@ -53,7 +49,7 @@ class RVC:
|
|||||||
needSwitch: bool = False
|
needSwitch: bool = False
|
||||||
|
|
||||||
def __init__(self, params: VoiceChangerParams, slotInfo: RVCModelSlot):
|
def __init__(self, params: VoiceChangerParams, slotInfo: RVCModelSlot):
|
||||||
print("[Voice Changer][RVC] Creating instance ")
|
print("[Voice Changer] [RVC] Creating instance ")
|
||||||
EmbedderManager.initialize(params)
|
EmbedderManager.initialize(params)
|
||||||
|
|
||||||
self.params = params
|
self.params = params
|
||||||
@ -64,38 +60,16 @@ class RVC:
|
|||||||
self.initialize()
|
self.initialize()
|
||||||
|
|
||||||
def initialize(self):
|
def initialize(self):
|
||||||
print("[Voice Changer][RVC] Initializing... ")
|
print("[Voice Changer] [RVC] Initializing... ")
|
||||||
|
|
||||||
# pipelineの生成
|
# pipelineの生成
|
||||||
self.pipeline = createPipeline(self.slotInfo, self.settings.gpu, self.settings.f0Detector)
|
self.pipeline = createPipeline(self.slotInfo, self.settings.gpu, self.settings.f0Detector)
|
||||||
|
|
||||||
# その他の設定
|
# その他の設定
|
||||||
self.trans = self.slotInfo.defaultTune
|
self.settings.tran = self.slotInfo.defaultTune
|
||||||
self.index_ratio = self.slotInfo.defaultIndexRatio
|
self.settings.indexRatio = self.slotInfo.defaultIndexRatio
|
||||||
self.protect = self.slotInfo.defaultProtect
|
self.settings.protect = self.slotInfo.defaultProtect
|
||||||
self.samplingRate = self.slotInfo.samplingRate
|
print("[Voice Changer] [RVC] Initializing... done")
|
||||||
print("[Voice Changer][RVC] Initializing... done")
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def loadModel2(cls, props: LoadModelParams2):
|
|
||||||
slotInfo: RVCModelSlot = RVCModelSlot()
|
|
||||||
for file in props.files:
|
|
||||||
if file.kind == "rvcModel":
|
|
||||||
slotInfo.modelFile = file.name
|
|
||||||
elif file.kind == "rvcIndex":
|
|
||||||
slotInfo.indexFile = file.name
|
|
||||||
slotInfo.defaultTune = 0
|
|
||||||
slotInfo.defaultIndexRatio = 0
|
|
||||||
slotInfo.defaultProtect = 0.5
|
|
||||||
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
|
|
||||||
slotInfo.name = os.path.splitext(os.path.basename(slotInfo.modelFile))[0]
|
|
||||||
# slotInfo.iconFile = "/assets/icons/noimage.png"
|
|
||||||
|
|
||||||
if slotInfo.isONNX:
|
|
||||||
_setInfoByONNX(slotInfo)
|
|
||||||
else:
|
|
||||||
_setInfoByPytorch(slotInfo)
|
|
||||||
return slotInfo
|
|
||||||
|
|
||||||
def update_settings(self, key: str, val: int | float | str):
|
def update_settings(self, key: str, val: int | float | str):
|
||||||
print("[Voice Changer][RVC]: update_settings", key, val)
|
print("[Voice Changer][RVC]: update_settings", key, val)
|
||||||
@ -123,7 +97,7 @@ class RVC:
|
|||||||
return data
|
return data
|
||||||
|
|
||||||
def get_processing_sampling_rate(self):
|
def get_processing_sampling_rate(self):
|
||||||
return self.settings.modelSamplingRate
|
return self.slotInfo.samplingRate
|
||||||
|
|
||||||
def generate_input(
|
def generate_input(
|
||||||
self,
|
self,
|
||||||
@ -170,14 +144,6 @@ class RVC:
|
|||||||
return (audio_buffer, convertSize, vol)
|
return (audio_buffer, convertSize, vol)
|
||||||
|
|
||||||
def inference(self, data):
|
def inference(self, data):
|
||||||
# if self.settings.modelSlotIndex < 0:
|
|
||||||
# print(
|
|
||||||
# "[Voice Changer] wait for loading model...",
|
|
||||||
# self.settings.modelSlotIndex,
|
|
||||||
# self.currentSlot,
|
|
||||||
# )
|
|
||||||
# raise NoModeLoadedException("model_common")
|
|
||||||
|
|
||||||
audio = data[0]
|
audio = data[0]
|
||||||
convertSize = data[1]
|
convertSize = data[1]
|
||||||
vol = data[2]
|
vol = data[2]
|
||||||
@ -185,17 +151,13 @@ class RVC:
|
|||||||
if vol < self.settings.silentThreshold:
|
if vol < self.settings.silentThreshold:
|
||||||
return np.zeros(convertSize).astype(np.int16)
|
return np.zeros(convertSize).astype(np.int16)
|
||||||
|
|
||||||
audio = torchaudio.functional.resample(audio, self.settings.modelSamplingRate, 16000, rolloff=0.99)
|
audio = torchaudio.functional.resample(audio, self.slotInfo.samplingRate, 16000, rolloff=0.99)
|
||||||
repeat = 1 if self.settings.rvcQuality else 0
|
repeat = 1 if self.settings.rvcQuality else 0
|
||||||
sid = 0
|
sid = 0
|
||||||
f0_up_key = self.settings.tran
|
f0_up_key = self.settings.tran
|
||||||
index_rate = self.settings.indexRatio
|
index_rate = self.settings.indexRatio
|
||||||
protect = self.settings.protect
|
protect = self.settings.protect
|
||||||
|
|
||||||
# if_f0 = 1 if self.settings.modelSlots[self.currentSlot].f0 else 0
|
|
||||||
# embOutputLayer = self.settings.modelSlots[self.currentSlot].embOutputLayer
|
|
||||||
# useFinalProj = self.settings.modelSlots[self.currentSlot].useFinalProj
|
|
||||||
|
|
||||||
if_f0 = 1 if self.slotInfo.f0 else 0
|
if_f0 = 1 if self.slotInfo.f0 else 0
|
||||||
embOutputLayer = self.slotInfo.embOutputLayer
|
embOutputLayer = self.slotInfo.embOutputLayer
|
||||||
useFinalProj = self.slotInfo.useFinalProj
|
useFinalProj = self.slotInfo.useFinalProj
|
||||||
@ -207,7 +169,7 @@ class RVC:
|
|||||||
f0_up_key,
|
f0_up_key,
|
||||||
index_rate,
|
index_rate,
|
||||||
if_f0,
|
if_f0,
|
||||||
self.settings.extraConvertSize / self.settings.modelSamplingRate, # extaraDataSizeの秒数。RVCのモデルのサンプリングレートで処理(★1)。
|
self.settings.extraConvertSize / self.slotInfo.samplingRate, # extaraDataSizeの秒数。RVCのモデルのサンプリングレートで処理(★1)。
|
||||||
embOutputLayer,
|
embOutputLayer,
|
||||||
useFinalProj,
|
useFinalProj,
|
||||||
repeat,
|
repeat,
|
||||||
@ -292,36 +254,6 @@ class RVC:
|
|||||||
# self.settings.modelSlotIndex = targetSlot
|
# self.settings.modelSlotIndex = targetSlot
|
||||||
# self.currentSlot = self.settings.modelSlotIndex
|
# self.currentSlot = self.settings.modelSlotIndex
|
||||||
|
|
||||||
# def update_model_default(self):
|
|
||||||
# # {"slot":9,"key":"name","val":"dogsdododg"}
|
|
||||||
# self.modelSlotManager.update_model_info(
|
|
||||||
# json.dumps(
|
|
||||||
# {
|
|
||||||
# "slot": self.currentSlot,
|
|
||||||
# "key": "defaultTune",
|
|
||||||
# "val": self.settings.tran,
|
|
||||||
# }
|
|
||||||
# )
|
|
||||||
# )
|
|
||||||
# self.modelSlotManager.update_model_info(
|
|
||||||
# json.dumps(
|
|
||||||
# {
|
|
||||||
# "slot": self.currentSlot,
|
|
||||||
# "key": "defaultIndexRatio",
|
|
||||||
# "val": self.settings.indexRatio,
|
|
||||||
# }
|
|
||||||
# )
|
|
||||||
# )
|
|
||||||
# self.modelSlotManager.update_model_info(
|
|
||||||
# json.dumps(
|
|
||||||
# {
|
|
||||||
# "slot": self.currentSlot,
|
|
||||||
# "key": "defaultProtect",
|
|
||||||
# "val": self.settings.protect,
|
|
||||||
# }
|
|
||||||
# )
|
|
||||||
# )
|
|
||||||
|
|
||||||
def get_model_current(self):
|
def get_model_current(self):
|
||||||
return [
|
return [
|
||||||
{
|
{
|
||||||
@ -337,9 +269,3 @@ class RVC:
|
|||||||
"val": self.settings.protect,
|
"val": self.settings.protect,
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
# def update_model_info(self, newData: str):
|
|
||||||
# self.modelSlotManager.update_model_info(newData)
|
|
||||||
|
|
||||||
# def upload_model_assets(self, params: str):
|
|
||||||
# self.modelSlotManager.store_model_assets(params)
|
|
||||||
|
152
server/voice_changer/RVC/RVCModelSlotGenerator.py
Normal file
152
server/voice_changer/RVC/RVCModelSlotGenerator.py
Normal file
@ -0,0 +1,152 @@
|
|||||||
|
import os
|
||||||
|
from const import EnumEmbedderTypes, EnumInferenceTypes
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import onnxruntime
|
||||||
|
import json
|
||||||
|
|
||||||
|
from data.ModelSlot import ModelSlot, RVCModelSlot
|
||||||
|
from voice_changer.utils.LoadModelParams import LoadModelParams
|
||||||
|
from voice_changer.utils.ModelSlotGenerator import ModelSlotGenerator
|
||||||
|
|
||||||
|
|
||||||
|
class RVCModelSlotGenerator(ModelSlotGenerator):
|
||||||
|
@classmethod
|
||||||
|
def loadModel(cls, props: LoadModelParams):
|
||||||
|
slotInfo: RVCModelSlot = RVCModelSlot()
|
||||||
|
for file in props.files:
|
||||||
|
if file.kind == "rvcModel":
|
||||||
|
slotInfo.modelFile = file.name
|
||||||
|
elif file.kind == "rvcIndex":
|
||||||
|
slotInfo.indexFile = file.name
|
||||||
|
slotInfo.defaultTune = 0
|
||||||
|
slotInfo.defaultIndexRatio = 0
|
||||||
|
slotInfo.defaultProtect = 0.5
|
||||||
|
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
|
||||||
|
slotInfo.name = os.path.splitext(os.path.basename(slotInfo.modelFile))[0]
|
||||||
|
# slotInfo.iconFile = "/assets/icons/noimage.png"
|
||||||
|
|
||||||
|
if slotInfo.isONNX:
|
||||||
|
cls._setInfoByONNX(slotInfo)
|
||||||
|
else:
|
||||||
|
cls._setInfoByPytorch(slotInfo)
|
||||||
|
return slotInfo
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _setInfoByPytorch(cls, slot: ModelSlot):
|
||||||
|
cpt = torch.load(slot.modelFile, map_location="cpu")
|
||||||
|
config_len = len(cpt["config"])
|
||||||
|
|
||||||
|
if config_len == 18:
|
||||||
|
# Original RVC
|
||||||
|
slot.f0 = True if cpt["f0"] == 1 else False
|
||||||
|
version = cpt.get("version", "v1")
|
||||||
|
if version is None or version == "v1":
|
||||||
|
slot.modelType = EnumInferenceTypes.pyTorchRVC.value if slot.f0 else EnumInferenceTypes.pyTorchRVCNono.value
|
||||||
|
slot.embChannels = 256
|
||||||
|
slot.embOutputLayer = 9
|
||||||
|
slot.useFinalProj = True
|
||||||
|
slot.embedder = EnumEmbedderTypes.hubert.value
|
||||||
|
print("[Voice Changer] Official Model(pyTorch) : v1")
|
||||||
|
else:
|
||||||
|
slot.modelType = EnumInferenceTypes.pyTorchRVCv2.value if slot.f0 else EnumInferenceTypes.pyTorchRVCv2Nono.value
|
||||||
|
slot.embChannels = 768
|
||||||
|
slot.embOutputLayer = 12
|
||||||
|
slot.useFinalProj = False
|
||||||
|
slot.embedder = EnumEmbedderTypes.hubert.value
|
||||||
|
print("[Voice Changer] Official Model(pyTorch) : v2")
|
||||||
|
|
||||||
|
else:
|
||||||
|
# DDPN RVC
|
||||||
|
slot.f0 = True if cpt["f0"] == 1 else False
|
||||||
|
slot.modelType = EnumInferenceTypes.pyTorchWebUI.value if slot.f0 else EnumInferenceTypes.pyTorchWebUINono.value
|
||||||
|
slot.embChannels = cpt["config"][17]
|
||||||
|
slot.embOutputLayer = cpt["embedder_output_layer"] if "embedder_output_layer" in cpt else 9
|
||||||
|
if slot.embChannels == 256:
|
||||||
|
slot.useFinalProj = True
|
||||||
|
else:
|
||||||
|
slot.useFinalProj = False
|
||||||
|
|
||||||
|
# DDPNモデルの情報を表示
|
||||||
|
if slot.embChannels == 256 and slot.embOutputLayer == 9 and slot.useFinalProj is True:
|
||||||
|
print("[Voice Changer] DDPN Model(pyTorch) : Official v1 like")
|
||||||
|
elif slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False:
|
||||||
|
print("[Voice Changer] DDPN Model(pyTorch): Official v2 like")
|
||||||
|
else:
|
||||||
|
print(f"[Voice Changer] DDPN Model(pyTorch): ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}")
|
||||||
|
|
||||||
|
slot.embedder = cpt["embedder_name"]
|
||||||
|
if slot.embedder.endswith("768"):
|
||||||
|
slot.embedder = slot.embedder[:-3]
|
||||||
|
|
||||||
|
# if slot.embedder == EnumEmbedderTypes.hubert.value:
|
||||||
|
# slot.embedder = EnumEmbedderTypes.hubert
|
||||||
|
# elif slot.embedder == EnumEmbedderTypes.contentvec.value:
|
||||||
|
# slot.embedder = EnumEmbedderTypes.contentvec
|
||||||
|
# elif slot.embedder == EnumEmbedderTypes.hubert_jp.value:
|
||||||
|
# slot.embedder = EnumEmbedderTypes.hubert_jp
|
||||||
|
# else:
|
||||||
|
# raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder")
|
||||||
|
|
||||||
|
slot.samplingRate = cpt["config"][-1]
|
||||||
|
|
||||||
|
del cpt
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _setInfoByONNX(cls, slot: ModelSlot):
|
||||||
|
tmp_onnx_session = onnxruntime.InferenceSession(slot.modelFile, providers=["CPUExecutionProvider"])
|
||||||
|
modelmeta = tmp_onnx_session.get_modelmeta()
|
||||||
|
try:
|
||||||
|
metadata = json.loads(modelmeta.custom_metadata_map["metadata"])
|
||||||
|
|
||||||
|
# slot.modelType = metadata["modelType"]
|
||||||
|
slot.embChannels = metadata["embChannels"]
|
||||||
|
|
||||||
|
slot.embOutputLayer = metadata["embOutputLayer"] if "embOutputLayer" in metadata else 9
|
||||||
|
slot.useFinalProj = metadata["useFinalProj"] if "useFinalProj" in metadata else True if slot.embChannels == 256 else False
|
||||||
|
|
||||||
|
if slot.embChannels == 256:
|
||||||
|
slot.useFinalProj = True
|
||||||
|
else:
|
||||||
|
slot.useFinalProj = False
|
||||||
|
|
||||||
|
# ONNXモデルの情報を表示
|
||||||
|
if slot.embChannels == 256 and slot.embOutputLayer == 9 and slot.useFinalProj is True:
|
||||||
|
print("[Voice Changer] ONNX Model: Official v1 like")
|
||||||
|
elif slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False:
|
||||||
|
print("[Voice Changer] ONNX Model: Official v2 like")
|
||||||
|
else:
|
||||||
|
print(f"[Voice Changer] ONNX Model: ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}")
|
||||||
|
|
||||||
|
if "embedder" not in metadata:
|
||||||
|
slot.embedder = EnumEmbedderTypes.hubert.value
|
||||||
|
else:
|
||||||
|
slot.embedder = metadata["embedder"]
|
||||||
|
# elif metadata["embedder"] == EnumEmbedderTypes.hubert.value:
|
||||||
|
# slot.embedder = EnumEmbedderTypes.hubert
|
||||||
|
# elif metadata["embedder"] == EnumEmbedderTypes.contentvec.value:
|
||||||
|
# slot.embedder = EnumEmbedderTypes.contentvec
|
||||||
|
# elif metadata["embedder"] == EnumEmbedderTypes.hubert_jp.value:
|
||||||
|
# slot.embedder = EnumEmbedderTypes.hubert_jp
|
||||||
|
# else:
|
||||||
|
# raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder")
|
||||||
|
|
||||||
|
slot.f0 = metadata["f0"]
|
||||||
|
slot.modelType = EnumInferenceTypes.onnxRVC.value if slot.f0 else EnumInferenceTypes.onnxRVCNono.value
|
||||||
|
slot.samplingRate = metadata["samplingRate"]
|
||||||
|
slot.deprecated = False
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
slot.modelType = EnumInferenceTypes.onnxRVC.value
|
||||||
|
slot.embChannels = 256
|
||||||
|
slot.embedder = EnumEmbedderTypes.hubert.value
|
||||||
|
slot.f0 = True
|
||||||
|
slot.samplingRate = 48000
|
||||||
|
slot.deprecated = True
|
||||||
|
|
||||||
|
print("[Voice Changer] setInfoByONNX", e)
|
||||||
|
print("[Voice Changer] ############## !!!! CAUTION !!!! ####################")
|
||||||
|
print("[Voice Changer] This onnxfie is depricated. Please regenerate onnxfile.")
|
||||||
|
print("[Voice Changer] ############## !!!! CAUTION !!!! ####################")
|
||||||
|
|
||||||
|
del tmp_onnx_session
|
@ -7,12 +7,9 @@ class RVCSettings:
|
|||||||
dstId: int = 0
|
dstId: int = 0
|
||||||
|
|
||||||
f0Detector: str = "harvest" # dio or harvest
|
f0Detector: str = "harvest" # dio or harvest
|
||||||
tran: int = 20
|
tran: int = 12
|
||||||
silentThreshold: float = 0.00001
|
silentThreshold: float = 0.00001
|
||||||
extraConvertSize: int = 1024 * 32
|
extraConvertSize: int = 1024 * 4
|
||||||
clusterInferRatio: float = 0.1
|
|
||||||
|
|
||||||
framework: str = "PyTorch" # PyTorch or ONNX
|
|
||||||
|
|
||||||
indexRatio: float = 0
|
indexRatio: float = 0
|
||||||
protect: float = 0.5
|
protect: float = 0.5
|
||||||
@ -21,8 +18,8 @@ class RVCSettings:
|
|||||||
modelSamplingRate: int = 48000
|
modelSamplingRate: int = 48000
|
||||||
|
|
||||||
speakers: dict[str, int] = field(default_factory=lambda: {})
|
speakers: dict[str, int] = field(default_factory=lambda: {})
|
||||||
isHalf: int = 1 # 0:off, 1:on
|
# isHalf: int = 1 # 0:off, 1:on
|
||||||
enableDirectML: int = 0 # 0:off, 1:on
|
# enableDirectML: int = 0 # 0:off, 1:on
|
||||||
# ↓mutableな物だけ列挙
|
# ↓mutableな物だけ列挙
|
||||||
intData = [
|
intData = [
|
||||||
"gpu",
|
"gpu",
|
||||||
@ -30,11 +27,7 @@ class RVCSettings:
|
|||||||
"tran",
|
"tran",
|
||||||
"extraConvertSize",
|
"extraConvertSize",
|
||||||
"rvcQuality",
|
"rvcQuality",
|
||||||
"modelSamplingRate",
|
|
||||||
"silenceFront",
|
"silenceFront",
|
||||||
"modelSlotIndex",
|
|
||||||
"isHalf",
|
|
||||||
"enableDirectML",
|
|
||||||
]
|
]
|
||||||
floatData = ["silentThreshold", "indexRatio", "protect"]
|
floatData = ["silentThreshold", "indexRatio", "protect"]
|
||||||
strData = ["framework", "f0Detector"]
|
strData = ["f0Detector"]
|
||||||
|
@ -7,7 +7,7 @@ from voice_changer.Local.ServerDevice import ServerDevice, ServerDeviceCallbacks
|
|||||||
from voice_changer.ModelSlotManager import ModelSlotManager
|
from voice_changer.ModelSlotManager import ModelSlotManager
|
||||||
from voice_changer.VoiceChanger import VoiceChanger
|
from voice_changer.VoiceChanger import VoiceChanger
|
||||||
from const import UPLOAD_DIR, ModelType
|
from const import UPLOAD_DIR, ModelType
|
||||||
from voice_changer.utils.LoadModelParams import LoadModelParamFile, LoadModelParams, LoadModelParams2
|
from voice_changer.utils.LoadModelParams import LoadModelParams
|
||||||
from voice_changer.utils.VoiceChangerModel import AudioInOut
|
from voice_changer.utils.VoiceChangerModel import AudioInOut
|
||||||
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
||||||
from dataclasses import dataclass, asdict, field
|
from dataclasses import dataclass, asdict, field
|
||||||
@ -85,20 +85,15 @@ class VoiceChangerManager(ServerDeviceCallbacks):
|
|||||||
cls._instance.voiceChanger = VoiceChanger(params)
|
cls._instance.voiceChanger = VoiceChanger(params)
|
||||||
return cls._instance
|
return cls._instance
|
||||||
|
|
||||||
def loadModel(self, props: LoadModelParams):
|
def loadModel(self, params: LoadModelParams):
|
||||||
paramDict = props.params
|
if params.isSampleMode:
|
||||||
if paramDict["sampleId"] is not None:
|
|
||||||
# サンプルダウンロード
|
# サンプルダウンロード
|
||||||
downloadSample(self.params.sample_mode, paramDict["sampleId"], self.params.model_dir, props.slot, {"useIndex": paramDict["rvcIndexDownload"]})
|
downloadSample(self.params.sample_mode, params.sampleId, self.params.model_dir, params.slot, {"useIndex": params.params["rvcIndexDownload"]})
|
||||||
self.modelSlotManager.getAllSlotInfo(reload=True)
|
self.modelSlotManager.getAllSlotInfo(reload=True)
|
||||||
info = {"status": "OK"}
|
info = {"status": "OK"}
|
||||||
return info
|
return info
|
||||||
elif paramDict["voiceChangerType"]:
|
else:
|
||||||
# 新しいアップローダ
|
# アップローダ
|
||||||
# Dataを展開
|
|
||||||
params = LoadModelParams2(**paramDict)
|
|
||||||
params.files = [LoadModelParamFile(**x) for x in paramDict["files"]]
|
|
||||||
|
|
||||||
# ファイルをslotにコピー
|
# ファイルをslotにコピー
|
||||||
for file in params.files:
|
for file in params.files:
|
||||||
print("FILE", file)
|
print("FILE", file)
|
||||||
@ -116,42 +111,32 @@ class VoiceChangerManager(ServerDeviceCallbacks):
|
|||||||
|
|
||||||
# メタデータ作成(各VCで定義)
|
# メタデータ作成(各VCで定義)
|
||||||
if params.voiceChangerType == "RVC":
|
if params.voiceChangerType == "RVC":
|
||||||
from voice_changer.RVC.RVC import RVC # 起動時にインポートするとパラメータが取れない。
|
from voice_changer.RVC.RVCModelSlotGenerator import RVCModelSlotGenerator # 起動時にインポートするとパラメータが取れない。
|
||||||
|
|
||||||
slotInfo = RVC.loadModel2(params)
|
slotInfo = RVCModelSlotGenerator.loadModel(params)
|
||||||
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
|
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
|
||||||
elif params.voiceChangerType == "MMVCv13":
|
elif params.voiceChangerType == "MMVCv13":
|
||||||
from voice_changer.MMVCv13.MMVCv13 import MMVCv13
|
from voice_changer.MMVCv13.MMVCv13 import MMVCv13
|
||||||
|
|
||||||
slotInfo = MMVCv13.loadModel2(params)
|
slotInfo = MMVCv13.loadModel(params)
|
||||||
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
|
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
|
||||||
elif params.voiceChangerType == "MMVCv15":
|
elif params.voiceChangerType == "MMVCv15":
|
||||||
from voice_changer.MMVCv15.MMVCv15 import MMVCv15
|
from voice_changer.MMVCv15.MMVCv15 import MMVCv15
|
||||||
|
|
||||||
slotInfo = MMVCv15.loadModel2(params)
|
slotInfo = MMVCv15.loadModel(params)
|
||||||
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
|
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
|
||||||
elif params.voiceChangerType == "so-vits-svc-40":
|
elif params.voiceChangerType == "so-vits-svc-40":
|
||||||
from voice_changer.SoVitsSvc40.SoVitsSvc40 import SoVitsSvc40
|
from voice_changer.SoVitsSvc40.SoVitsSvc40 import SoVitsSvc40
|
||||||
|
|
||||||
slotInfo = SoVitsSvc40.loadModel2(params)
|
slotInfo = SoVitsSvc40.loadModel(params)
|
||||||
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
|
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
|
||||||
elif params.voiceChangerType == "DDSP-SVC":
|
elif params.voiceChangerType == "DDSP-SVC":
|
||||||
from voice_changer.DDSP_SVC.DDSP_SVC import DDSP_SVC
|
from voice_changer.DDSP_SVC.DDSP_SVC import DDSP_SVC
|
||||||
|
|
||||||
slotInfo = DDSP_SVC.loadModel2(params)
|
slotInfo = DDSP_SVC.loadModel(params)
|
||||||
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
|
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
|
||||||
print("params", params)
|
print("params", params)
|
||||||
|
|
||||||
else:
|
|
||||||
# 古いアップローダ
|
|
||||||
print("[Voice Canger]: upload models........")
|
|
||||||
info = self.voiceChanger.loadModel(props)
|
|
||||||
if hasattr(info, "status") and info["status"] == "NG":
|
|
||||||
return info
|
|
||||||
else:
|
|
||||||
info["status"] = "OK"
|
|
||||||
return info
|
|
||||||
|
|
||||||
def get_info(self):
|
def get_info(self):
|
||||||
data = asdict(self.settings)
|
data = asdict(self.settings)
|
||||||
data["gpus"] = self.gpus
|
data["gpus"] = self.gpus
|
||||||
|
@ -1,27 +1,8 @@
|
|||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
from const import VoiceChangerType
|
from const import VoiceChangerType
|
||||||
from typing import Literal, TypeAlias
|
from typing import Literal, TypeAlias
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class FilePaths:
|
|
||||||
configFilename: str | None
|
|
||||||
pyTorchModelFilename: str | None
|
|
||||||
onnxModelFilename: str | None
|
|
||||||
clusterTorchModelFilename: str | None
|
|
||||||
featureFilename: str | None
|
|
||||||
indexFilename: str | None
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class LoadModelParams:
|
|
||||||
slot: int
|
|
||||||
isHalf: bool
|
|
||||||
params: Any
|
|
||||||
|
|
||||||
|
|
||||||
LoadModelParamFileKind: TypeAlias = Literal[
|
LoadModelParamFileKind: TypeAlias = Literal[
|
||||||
"mmvcv13Config",
|
"mmvcv13Config",
|
||||||
"mmvcv13Model",
|
"mmvcv13Model",
|
||||||
@ -47,9 +28,10 @@ class LoadModelParamFile:
|
|||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class LoadModelParams2:
|
class LoadModelParams:
|
||||||
voiceChangerType: VoiceChangerType
|
voiceChangerType: VoiceChangerType
|
||||||
slot: int
|
slot: int
|
||||||
isSampleMode: bool
|
isSampleMode: bool
|
||||||
sampleId: str
|
sampleId: str
|
||||||
files: list[LoadModelParamFile]
|
files: list[LoadModelParamFile]
|
||||||
|
params: dict
|
||||||
|
9
server/voice_changer/utils/ModelSlotGenerator.py
Normal file
9
server/voice_changer/utils/ModelSlotGenerator.py
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
from typing import Protocol
|
||||||
|
|
||||||
|
from voice_changer.utils.LoadModelParams import LoadModelParams
|
||||||
|
|
||||||
|
|
||||||
|
class ModelSlotGenerator(Protocol):
|
||||||
|
@classmethod
|
||||||
|
def loadModel(self, params: LoadModelParams):
|
||||||
|
...
|
@ -21,10 +21,8 @@ class VoiceChangerModel(Protocol):
|
|||||||
def inference(self, data: tuple[Any, ...]) -> Any:
|
def inference(self, data: tuple[Any, ...]) -> Any:
|
||||||
...
|
...
|
||||||
|
|
||||||
def generate_input(
|
def generate_input(self, newData: AudioInOut, inputSize: int, crossfadeSize: int, solaSearchFrame: int) -> tuple[Any, ...]:
|
||||||
self, newData: AudioInOut, inputSize: int, crossfadeSize: int
|
|
||||||
) -> tuple[Any, ...]:
|
|
||||||
...
|
...
|
||||||
|
|
||||||
def update_settings(self, key: str, val: Any) -> bool:
|
def update_settings(self, key: str, val: int | float | str) -> bool:
|
||||||
...
|
...
|
||||||
|
Loading…
Reference in New Issue
Block a user