add feature and index to RVC

This commit is contained in:
wataru 2023-04-08 03:11:37 +09:00
parent f68366c22a
commit 04b9bb1ac3
18 changed files with 387 additions and 76 deletions

View File

@ -34,12 +34,15 @@
{
"name": "modelUploader",
"options": {
"showConfig": true,
"showConfig": false,
"showOnnx": true,
"showPyTorch": true,
"showCorrespondence": false,
"showPyTorchCluster": false,
"showFeature": true,
"showIndex": true,
"defaultEnablePyTorch": true
}
},
@ -83,12 +86,6 @@
}
],
"speakerSetting": [
{
"name": "srcId",
"options": {
"showF0": true
}
},
{
"name": "dstId",
"options": {
@ -96,20 +93,12 @@
"useServerInfo": false
}
},
{
"name": "editSpeakerIdMapping",
"options": {}
},
{
"name": "f0Factor",
"options": {}
},
{
"name": "tune",
"options": {}
},
{
"name": "clusterInferRatio",
"name": "indexRatio",
"options": {}
},
{

File diff suppressed because one or more lines are too long

View File

@ -34,12 +34,15 @@
{
"name": "modelUploader",
"options": {
"showConfig": true,
"showConfig": false,
"showOnnx": true,
"showPyTorch": true,
"showCorrespondence": false,
"showPyTorchCluster": false,
"showFeature": true,
"showIndex": true,
"defaultEnablePyTorch": true
}
},
@ -83,12 +86,6 @@
}
],
"speakerSetting": [
{
"name": "srcId",
"options": {
"showF0": true
}
},
{
"name": "dstId",
"options": {
@ -96,20 +93,12 @@
"useServerInfo": false
}
},
{
"name": "editSpeakerIdMapping",
"options": {}
},
{
"name": "f0Factor",
"options": {}
},
{
"name": "tune",
"options": {}
},
{
"name": "clusterInferRatio",
"name": "indexRatio",
"options": {}
},
{

View File

@ -33,6 +33,7 @@ import { CrossFadeOffsetRateRow, CrossFadeOffsetRateRowProps } from "./component
import { CrossFadeEndRateRow, CrossFadeEndRateRowProps } from "./components/807_CrossFadeEndRateRow"
import { DownSamplingModeRow, DownSamplingModeRowProps } from "./components/808_DownSamplingModeRow"
import { TrancateNumTresholdRow, TrancateNumTresholdRowProps } from "./components/809_TrancateNumTresholdRow"
import { IndexRatioRow, IndexRatioRowProps } from "./components/609_IndexRatioRow"
export const catalog: { [key: string]: (props: any) => JSX.Element } = {}
@ -78,6 +79,9 @@ const initialize = () => {
addToCatalog("clusterInferRatio", (props: ClusterInferRatioRowProps) => { return <ClusterInferRatioRow {...props} /> })
addToCatalog("noiseScale", (props: NoiseScaleRowProps) => { return <NoiseScaleRow {...props} /> })
addToCatalog("silentThreshold", (props: SilentThresholdRowProps) => { return <SilentThresholdRow {...props} /> })
addToCatalog("indexRatio", (props: IndexRatioRowProps) => { return <IndexRatioRow {...props} /> })
addToCatalog("inputChunkNum", (props: InputChunkNumRowProps) => { return <InputChunkNumRow {...props} /> })
addToCatalog("extraDataLength", (props: ExtraDataLengthRowProps) => { return <ExtraDataLengthRow {...props} /> })

View File

@ -0,0 +1,47 @@
import React, { useMemo } from "react"
import { fileSelector } from "@dannadori/voice-changer-client-js"
import { useAppState } from "../../../001_provider/001_AppStateProvider"
export const FeatureSelectRow = () => {
const appState = useAppState()
const featureSelectRow = useMemo(() => {
const featureFilenameText = appState.serverSetting.fileUploadSetting.feature?.filename || appState.serverSetting.fileUploadSetting.feature?.file?.name || ""
const onFeatureFileLoadClicked = async () => {
const file = await fileSelector("")
if (file.name.endsWith(".npy") == false) {
alert("Feature file's extension should be npy")
return
}
appState.serverSetting.setFileUploadSetting({
...appState.serverSetting.fileUploadSetting,
feature: {
file: file
}
})
}
const onFeatureFileClearClicked = () => {
appState.serverSetting.setFileUploadSetting({
...appState.serverSetting.fileUploadSetting,
feature: null
})
}
return (
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title left-padding-2">feature(.npy)</div>
<div className="body-item-text">
<div>{featureFilenameText}</div>
</div>
<div className="body-button-container">
<div className="body-button" onClick={onFeatureFileLoadClicked}>select</div>
<div className="body-button left-margin-1" onClick={onFeatureFileClearClicked}>clear</div>
</div>
</div>
)
}, [appState.serverSetting.fileUploadSetting, appState.serverSetting.setFileUploadSetting])
return featureSelectRow
}

View File

@ -0,0 +1,47 @@
import React, { useMemo } from "react"
import { fileSelector } from "@dannadori/voice-changer-client-js"
import { useAppState } from "../../../001_provider/001_AppStateProvider"
export const IndexSelectRow = () => {
const appState = useAppState()
const indexSelectRow = useMemo(() => {
const indexFilenameText = appState.serverSetting.fileUploadSetting.index?.filename || appState.serverSetting.fileUploadSetting.index?.file?.name || ""
const onIndexFileLoadClicked = async () => {
const file = await fileSelector("")
if (file.name.endsWith(".index") == false) {
alert("Index file's extension should be .index")
return
}
appState.serverSetting.setFileUploadSetting({
...appState.serverSetting.fileUploadSetting,
index: {
file: file
}
})
}
const onIndexFileClearClicked = () => {
appState.serverSetting.setFileUploadSetting({
...appState.serverSetting.fileUploadSetting,
index: null
})
}
return (
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title left-padding-2">index(.index)</div>
<div className="body-item-text">
<div>{indexFilenameText}</div>
</div>
<div className="body-button-container">
<div className="body-button" onClick={onIndexFileLoadClicked}>select</div>
<div className="body-button left-margin-1" onClick={onIndexFileClearClicked}>clear</div>
</div>
</div>
)
}, [appState.serverSetting.fileUploadSetting, appState.serverSetting.setFileUploadSetting])
return indexSelectRow
}

View File

@ -13,7 +13,7 @@ export const ModelUploadButtonRow = () => {
const uploadButtonAction = appState.serverSetting.isUploading ? () => { } : onModelUploadClicked
const uploadButtonLabel = appState.serverSetting.isUploading ? "wait..." : "upload"
const uploadingStatus = appState.serverSetting.isUploading ?
appState.serverSetting.uploadProgress == 0 ? `loading model...(wait about 20sec)` : `uploading.... ${appState.serverSetting.uploadProgress}%` : ""
appState.serverSetting.uploadProgress == 0 ? `loading model...(wait about 20sec)` : `uploading.... ${appState.serverSetting.uploadProgress.toFixed(1)}%` : ""
return (

View File

@ -5,6 +5,8 @@ import { ONNXSelectRow } from "./301-2_ONNXSelectRow"
import { PyTorchSelectRow } from "./301-3_PyTorchSelectRow"
import { CorrespondenceSelectRow } from "./301-4_CorrespondenceSelectRow"
import { PyTorchClusterSelectRow } from "./301-5_PyTorchClusterSelectRow"
import { FeatureSelectRow } from "./301-6_FeatureSelectRow"
import { IndexSelectRow } from "./301-7_IndexSelectRow"
import { ModelUploadButtonRow } from "./301-9_ModelUploadButtonRow"
export type ModelUploaderRowProps = {
@ -14,6 +16,9 @@ export type ModelUploaderRowProps = {
showCorrespondence: boolean
showPyTorchCluster: boolean
showFeature: boolean
showIndex: boolean
defaultEnablePyTorch: boolean
}
@ -44,6 +49,8 @@ export const ModelUploaderRow = (props: ModelUploaderRowProps) => {
{props.showPyTorch && guiState.showPyTorchModelUpload ? <PyTorchSelectRow /> : <></>}
{props.showCorrespondence ? <CorrespondenceSelectRow /> : <></>}
{props.showPyTorchCluster ? <PyTorchClusterSelectRow /> : <></>}
{props.showFeature ? <FeatureSelectRow /> : <></>}
{props.showIndex ? <IndexSelectRow /> : <></>}
<ModelUploadButtonRow />
</>
)

View File

@ -0,0 +1,30 @@
import React, { useMemo } from "react"
import { useAppState } from "../../../001_provider/001_AppStateProvider"
export type IndexRatioRowProps = {
}
export const IndexRatioRow = (_props: IndexRatioRowProps) => {
const appState = useAppState()
const clusterRatioRow = useMemo(() => {
return (
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title left-padding-1 ">index ratio</div>
<div>
<input type="range" className="body-item-input-slider" min="0" max="1" step="0.1" value={appState.serverSetting.serverSetting.indexRatio} onChange={(e) => {
appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, indexRatio: Number(e.target.value) })
}}></input>
<span className="body-item-input-slider-val">{appState.serverSetting.serverSetting.indexRatio}</span>
</div>
<div className="body-button-container">
</div>
</div>
)
}, [
appState.serverSetting.serverSetting,
appState.serverSetting.updateServerSettings
])
return clusterRatioRow
}

View File

@ -110,7 +110,7 @@ export class ServerConfigurator {
}
// !! 注意!! hubertTorchModelは固定値で上書きされるため、設定しても効果ない。
loadModel = async (configFilename: string, pyTorchModelFilename: string | null, onnxModelFilename: string | null, clusterTorchModelFilename: string | null, hubertTorchModelFilename: string | null) => {
loadModel = async (configFilename: string, pyTorchModelFilename: string | null, onnxModelFilename: string | null, clusterTorchModelFilename: string | null, featureFilename: string | null, indexFilename: string | null) => {
const url = this.serverUrl + "/load_model"
const info = new Promise<ServerInfo>(async (resolve) => {
const formData = new FormData();
@ -118,7 +118,8 @@ export class ServerConfigurator {
formData.append("onnxModelFilename", onnxModelFilename || "-");
formData.append("configFilename", configFilename);
formData.append("clusterTorchModelFilename", clusterTorchModelFilename || "-");
formData.append("hubertTorchModelFilename", hubertTorchModelFilename || "-");
formData.append("featureFilename", featureFilename || "-");
formData.append("indexFilename", indexFilename || "-");
const request = new Request(url, {
method: 'POST',

View File

@ -257,9 +257,16 @@ export class VoiceChangerClient {
concatUploadedFile = (filename: string, chunkNum: number) => {
return this.configurator.concatUploadedFile(filename, chunkNum)
}
loadModel = (configFilename: string, pyTorchModelFilename: string | null, onnxModelFilename: string | null, clusterTorchModelFilename: string | null, hubertTorchModelFilename: string | null) => {
loadModel = (
configFilename: string,
pyTorchModelFilename: string | null,
onnxModelFilename: string | null,
clusterTorchModelFilename: string | null,
featureFilename: string | null,
indexFilename: string | null,
) => {
// !! 注意!! hubertTorchModelは固定値で上書きされるため、設定しても効果ない。
return this.configurator.loadModel(configFilename, pyTorchModelFilename, onnxModelFilename, clusterTorchModelFilename, hubertTorchModelFilename)
return this.configurator.loadModel(configFilename, pyTorchModelFilename, onnxModelFilename, clusterTorchModelFilename, featureFilename, indexFilename)
}
//## Worklet ##//

View File

@ -78,6 +78,8 @@ export const ServerSettingKey = {
"extraConvertSize": "extraConvertSize",
"clusterInferRatio": "clusterInferRatio",
"indexRatio": "indexRatio",
"inputSampleRate": "inputSampleRate",
} as const
export type ServerSettingKey = typeof ServerSettingKey[keyof typeof ServerSettingKey]
@ -106,6 +108,8 @@ export type VoiceChangerServerSetting = {
extraConvertSize: number// so-vits-svc
clusterInferRatio: number // so-vits-svc
indexRatio: number // RVC
inputSampleRate: InputSampleRate
}
@ -142,6 +146,8 @@ export const DefaultServerSetting_MMVCv15: ServerInfo = {
extraConvertSize: 0,
clusterInferRatio: 0,
indexRatio: 0,
inputSampleRate: 24000,
//
@ -174,6 +180,8 @@ export const DefaultServerSetting_MMVCv13: ServerInfo = {
extraConvertSize: 0,
clusterInferRatio: 0,
indexRatio: 0,
inputSampleRate: 24000,
//
@ -210,6 +218,8 @@ export const DefaultServerSetting_so_vits_svc_40: ServerInfo = {
extraConvertSize: 1024 * 32,
clusterInferRatio: 0.1,
indexRatio: 0,
inputSampleRate: 24000,
//
@ -246,6 +256,8 @@ export const DefaultServerSetting_so_vits_svc_40_c: ServerInfo = {
extraConvertSize: 1024 * 32,
clusterInferRatio: 0.1,
indexRatio: 0,
inputSampleRate: 24000,
//
@ -281,6 +293,8 @@ export const DefaultServerSetting_so_vits_svc_40v2: ServerInfo = {
extraConvertSize: 1024 * 32,
clusterInferRatio: 0.1,
indexRatio: 0,
inputSampleRate: 24000,
//
@ -318,6 +332,8 @@ export const DefaultServerSetting_RVC: ServerInfo = {
extraConvertSize: 1024 * 32,
clusterInferRatio: 0.1,
indexRatio: 0,
inputSampleRate: 24000,
//

View File

@ -16,6 +16,10 @@ export type FileUploadSetting = {
configFile: ModelData | null
clusterTorchModel: ModelData | null
hubertTorchModel: ModelData | null // !! 注意!! hubertTorchModelは固定値で上書きされるため、設定しても効果ない。
feature: ModelData | null //RVC
index: ModelData | null //RVC
}
const InitialFileUploadSetting: FileUploadSetting = {
@ -24,6 +28,9 @@ const InitialFileUploadSetting: FileUploadSetting = {
onnxModel: null,
clusterTorchModel: null,
hubertTorchModel: null,
feature: null,
index: null
}
export type UseServerSettingProps = {
@ -89,7 +96,7 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
loadCache()
}, [])
// クライアントへ設定反映 (キャッシュ反映)
// サーバへキャッシュの内容を反映 (クライアント初期化した時の一回)
useEffect(() => {
if (!props.voiceChangerClient) return
for (let i = 0; i < Object.values(ServerSettingKey).length; i++) {
@ -165,7 +172,7 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
setUploadProgress(0)
setIsUploading(true)
// ファイルをメモリにロード
// ファイルをメモリにロード(dataがある場合は、キャッシュから読まれていると想定しスキップ)
if (fileUploadSetting.onnxModel && !fileUploadSetting.onnxModel.data) {
fileUploadSetting.onnxModel.data = await fileUploadSetting.onnxModel.file!.arrayBuffer()
fileUploadSetting.onnxModel.filename = await fileUploadSetting.onnxModel.file!.name
@ -189,8 +196,27 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
}
}
if (fileUploadSetting.feature) {
if ((props.clientType == "RVC") && !fileUploadSetting.feature!.data) {
fileUploadSetting.feature!.data = await fileUploadSetting.feature!.file!.arrayBuffer()
fileUploadSetting.feature!.filename = await fileUploadSetting.feature!.file!.name
}
}
if (fileUploadSetting.index) {
if ((props.clientType == "RVC") && !fileUploadSetting.index!.data) {
fileUploadSetting.index!.data = await fileUploadSetting.index!.file!.arrayBuffer()
fileUploadSetting.index!.filename = await fileUploadSetting.index!.file!.name
}
}
// ファイルをサーバにアップロード
const models = [fileUploadSetting.onnxModel, fileUploadSetting.pyTorchModel, fileUploadSetting.clusterTorchModel /*, fileUploadSetting.hubertTorchModel*/].filter(x => { return x != null }) as ModelData[]
const models = [
fileUploadSetting.onnxModel,
fileUploadSetting.pyTorchModel,
fileUploadSetting.clusterTorchModel,
fileUploadSetting.feature,
fileUploadSetting.index,
].filter(x => { return x != null }) as ModelData[]
for (let i = 0; i < models.length; i++) {
const progRate = 1 / models.length
const progOffset = 100 * i * progRate
@ -208,7 +234,14 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
// !! 注意!! hubertTorchModelは固定値で上書きされるため、設定しても効果ない。
const configFileName = fileUploadSetting.configFile ? fileUploadSetting.configFile.filename || "-" : "-"
const loadPromise = props.voiceChangerClient.loadModel(configFileName, fileUploadSetting.pyTorchModel?.filename || null, fileUploadSetting.onnxModel?.filename || null, fileUploadSetting.clusterTorchModel?.filename || null, fileUploadSetting.hubertTorchModel?.filename || null)
const loadPromise = props.voiceChangerClient.loadModel(
configFileName,
fileUploadSetting.pyTorchModel?.filename || null,
fileUploadSetting.onnxModel?.filename || null,
fileUploadSetting.clusterTorchModel?.filename || null,
fileUploadSetting.feature?.filename || null,
fileUploadSetting.index?.filename || null
)
// サーバでロード中にキャッシュにセーブ
try {
@ -221,6 +254,12 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
} : null,
clusterTorchModel: fileUploadSetting.clusterTorchModel ? {
data: fileUploadSetting.clusterTorchModel.data, filename: fileUploadSetting.clusterTorchModel.filename
} : null,
feature: fileUploadSetting.feature ? {
data: fileUploadSetting.feature.data, filename: fileUploadSetting.feature.filename
} : null,
index: fileUploadSetting.index ? {
data: fileUploadSetting.index.data, filename: fileUploadSetting.index.filename
} : null
}
setItem(INDEXEDDB_KEY_MODEL_DATA, saveData)
@ -254,7 +293,6 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
await removeItem(INDEXEDDB_KEY_MODEL_DATA)
}
return {
serverSetting,
updateServerSettings,

View File

@ -55,17 +55,18 @@ class MMVC_Rest_Fileuploader:
onnxModelFilename: str = Form(...),
configFilename: str = Form(...),
clusterTorchModelFilename: str = Form(...),
hubertTorchModelFilename: str = Form(...),
featureFilename: str = Form(...),
indexFilename: str = Form(...),
):
print("Hubert:", hubertTorchModelFilename)
pyTorchModelFilePath = os.path.join(UPLOAD_DIR, pyTorchModelFilename) if pyTorchModelFilename != "-" else None
onnxModelFilePath = os.path.join(UPLOAD_DIR, onnxModelFilename) if onnxModelFilename != "-" else None
configFilePath = os.path.join(UPLOAD_DIR, configFilename)
clusterTorchModelFilePath = os.path.join(UPLOAD_DIR, clusterTorchModelFilename) if clusterTorchModelFilename != "-" else None
hubertTorchModelFilePath = os.path.join(UPLOAD_DIR, hubertTorchModelFilename) if hubertTorchModelFilename != "-" else None
featureFilePath = os.path.join(UPLOAD_DIR, featureFilename) if featureFilename != "-" else None
indexFilePath = os.path.join(UPLOAD_DIR, indexFilename) if indexFilename != "-" else None
info = self.voiceChangerManager.loadModel(configFilePath, pyTorchModelFilePath, onnxModelFilePath,
clusterTorchModelFilePath)
clusterTorchModelFilePath, featureFilePath, indexFilePath)
json_compatible_item_data = jsonable_encoder(info)
return JSONResponse(content=json_compatible_item_data)
# return {"load": f"{configFilePath}, {pyTorchModelFilePath}, {onnxModelFilePath}"}

View File

@ -0,0 +1,55 @@
import onnxruntime
import torch
import numpy as np
# providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
providers = ["CPUExecutionProvider"]
class ModelWrapper:
def __init__(self, onnx_model, is_half):
self.onnx_model = onnx_model
self.is_half = is_half
# ort_options = onnxruntime.SessionOptions()
# ort_options.intra_op_num_threads = 8
self.onnx_session = onnxruntime.InferenceSession(
self.onnx_model,
providers=providers
)
# input_info = s
def set_providers(self, providers, provider_options=[{}]):
self.onnx_session.set_providers(providers=providers, provider_options=provider_options)
def get_providers(self):
return self.onnx_session.get_providers()
def infer(self, feats, p_len, pitch, pitchf, sid):
if self.is_half:
print("feats", feats.cpu().numpy().dtype)
print("p_len", p_len.cpu().numpy().dtype)
print("pitch", pitch.cpu().numpy().dtype)
print("pitchf", pitchf.cpu().numpy().dtype)
print("sid", sid.cpu().numpy().dtype)
audio1 = self.onnx_session.run(
["audio"],
{
"feats": feats.cpu().numpy().astype(np.float16),
"p_len": p_len.cpu().numpy().astype(np.int64),
"pitch": pitch.cpu().numpy().astype(np.int64),
"pitchf": pitchf.cpu().numpy().astype(np.float32),
"sid": sid.cpu().numpy().astype(np.int64),
})
else:
audio1 = self.onnx_session.run(
["audio"],
{
"feats": feats.cpu().numpy().astype(np.float32),
"p_len": p_len.cpu().numpy(),
"pitch": pitch.cpu().numpy(),
"pitchf": pitchf.cpu().numpy(),
"sid": sid.cpu().numpy(),
})
return torch.tensor(np.array(audio1))

View File

@ -1,6 +1,7 @@
import sys
import os
import resampy
from voice_changer.RVC.ModelWrapper import ModelWrapper
# avoiding parse arg error in RVC
sys.argv = ["MMVCServerSIO.py"]
@ -50,13 +51,16 @@ class RVCSettings():
onnxModelFile: str = ""
configFile: str = ""
indexRatio: float = 0
isHalf: int = 0
speakers: dict[str, int] = field(
default_factory=lambda: {}
)
# ↓mutableな物だけ列挙
intData = ["gpu", "dstId", "tran", "predictF0", "extraConvertSize"]
floatData = ["noiceScale", "silentThreshold", "clusterInferRatio"]
floatData = ["noiceScale", "silentThreshold", "indexRatio"]
strData = ["framework", "f0Detector"]
@ -72,18 +76,22 @@ class RVC:
self.params = params
print("RVC initialization: ", params)
def loadModel(self, config: str, pyTorch_model_file: str = None, onnx_model_file: str = None, clusterTorchModel: str = None):
self.device = torch.device("cuda", index=self.settings.gpu)
def loadModel(self, config: str, pyTorch_model_file: str = None, onnx_model_file: str = None, feature_file: str = None, index_file: str = None):
self.settings.configFile = config
self.feature_file = feature_file
self.index_file = index_file
print("featurefile", feature_file, index_file)
self.tgt_sr = 40000
try:
hubert_path = self.params["hubert"]
models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task([hubert_path], suffix="",)
model = models[0]
model.eval()
# model = model.half()
if self.settings.isHalf:
model = model.half()
self.hubert_model = model
self.hubert_model = self.hubert_model.to(self.device)
except Exception as e:
print("EXCEPTION during loading hubert/contentvec model", e)
@ -97,22 +105,17 @@ class RVC:
if pyTorch_model_file != None:
cpt = torch.load(pyTorch_model_file, map_location="cpu")
self.tgt_sr = cpt["config"][-1]
is_half = False
net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=is_half)
net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=self.settings.isHalf)
net_g.eval()
net_g.load_state_dict(cpt["weight"], strict=False)
# net_g = net_g.half()
if self.settings.isHalf:
net_g = net_g.half()
self.net_g = net_g
self.net_g = self.net_g.to(self.device)
# ONNXモデル生成
if onnx_model_file != None:
ort_options = onnxruntime.SessionOptions()
ort_options.intra_op_num_threads = 8
self.onnx_session = onnxruntime.InferenceSession(
onnx_model_file,
providers=providers
)
# self.onnx_session = ModelWrapper(onnx_model_file, is_half=True)
self.onnx_session = ModelWrapper(onnx_model_file, is_half=self.settings.isHalf)
# input_info = self.onnx_session.get_inputs()
# for i in input_info:
# print("input", i)
@ -187,16 +190,54 @@ class RVC:
convertSize = convertSize + (128 - (convertSize % 128))
self.audio_buffer = self.audio_buffer[-1 * convertSize:] # 変換対象の部分だけ抽出
print("convert size", convertSize, self.audio_buffer.shape)
crop = self.audio_buffer[-1 * (inputSize + crossfadeSize):-1 * (crossfadeSize)]
rms = np.sqrt(np.square(crop).mean(axis=0))
vol = max(rms, self.prevVol * 0.0)
self.prevVol = vol
print("audio len 01,", len(self.audio_buffer))
return (self.audio_buffer, convertSize, vol)
def _onnx_inference(self, data):
pass
if hasattr(self, "onnx_session") == False or self.onnx_session == None:
print("[Voice Changer] No onnx session.")
return np.zeros(1).astype(np.int16)
if self.settings.gpu < 0 or self.gpu_num == 0:
dev = torch.device("cpu")
else:
dev = torch.device("cuda", index=self.settings.gpu)
self.hubert_model = self.hubert_model.to(dev)
audio = data[0]
convertSize = data[1]
vol = data[2]
audio = resampy.resample(audio, self.tgt_sr, 16000)
if vol < self.settings.silentThreshold:
return np.zeros(convertSize).astype(np.int16)
with torch.no_grad():
vc = VC(self.tgt_sr, dev, self.settings.isHalf)
sid = 0
times = [0, 0, 0]
f0_up_key = self.settings.tran
f0_method = "pm" if self.settings.f0Detector == "dio" else "harvest"
file_index = self.index_file if self.index_file != None else ""
file_big_npy = self.feature_file if self.feature_file != None else ""
index_rate = self.settings.indexRatio
if_f0 = 1
f0_file = None
audio_out = vc.pipeline(self.hubert_model, self.onnx_session, sid, audio, times, f0_up_key, f0_method,
file_index, file_big_npy, index_rate, if_f0, f0_file=f0_file)
result = audio_out * np.sqrt(vol)
return result
def _pyTorch_inference(self, data):
if hasattr(self, "net_g") == False or self.net_g == None:
@ -208,28 +249,32 @@ class RVC:
else:
dev = torch.device("cuda", index=self.settings.gpu)
self.hubert_model = self.hubert_model.to(dev)
self.net_g = self.net_g.to(dev)
audio = data[0]
convertSize = data[1]
vol = data[2]
print("audio len 02,", len(audio))
audio = resampy.resample(audio, self.tgt_sr, 16000)
print("audio len 03,", len(audio))
if vol < self.settings.silentThreshold:
return np.zeros(convertSize).astype(np.int16)
is_half = False
with torch.no_grad():
vc = VC(self.tgt_sr, dev, is_half)
vc = VC(self.tgt_sr, dev, self.settings.isHalf)
sid = 0
times = [0, 0, 0]
f0_up_key = self.settings.tran
f0_method = "pm"
file_index = ""
file_big_npy = ""
index_rate = 1
f0_method = "pm" if self.settings.f0Detector == "dio" else "harvest"
file_index = self.index_file if self.index_file != None else ""
file_big_npy = self.feature_file if self.feature_file != None else ""
index_rate = self.settings.indexRatio
if_f0 = 1
f0_file = None
print("audio len 0,", len(audio))
audio_out = vc.pipeline(self.hubert_model, self.net_g, sid, audio, times, f0_up_key, f0_method,
file_index, file_big_npy, index_rate, if_f0, f0_file=f0_file)
result = audio_out * np.sqrt(vol)

View File

@ -77,11 +77,13 @@ class VoiceChanger():
print(f"VoiceChanger Initialized (GPU_NUM:{self.gpu_num}, mps_enabled:{self.mps_enabled})")
def loadModel(self, config: str, pyTorch_model_file: str = None, onnx_model_file: str = None, clusterTorchModel: str = None):
def loadModel(self, config: str, pyTorch_model_file: str = None, onnx_model_file: str = None, clusterTorchModel: str = None, feature_file: str = None, index_file: str = None):
if self.modelType == "MMVCv15" or self.modelType == "MMVCv13":
return self.voiceChanger.loadModel(config, pyTorch_model_file, onnx_model_file)
elif self.modelType == "so-vits-svc-40" or self.modelType == "so-vits-svc-40_c" or self.modelType == "so-vits-svc-40v2":
return self.voiceChanger.loadModel(config, pyTorch_model_file, onnx_model_file, clusterTorchModel)
elif self.modelType == "RVC":
return self.voiceChanger.loadModel(config, pyTorch_model_file, onnx_model_file, feature_file, index_file)
else:
return self.voiceChanger.loadModel(config, pyTorch_model_file, onnx_model_file, clusterTorchModel)

View File

@ -10,8 +10,8 @@ class VoiceChangerManager():
cls._instance.voiceChanger = VoiceChanger(params)
return cls._instance
def loadModel(self, config, model, onnx_model, clusterTorchModel):
info = self.voiceChanger.loadModel(config, model, onnx_model, clusterTorchModel)
def loadModel(self, config, model, onnx_model, clusterTorchModel, feature_file, index_file):
info = self.voiceChanger.loadModel(config, model, onnx_model, clusterTorchModel, feature_file, index_file)
info["status"] = "OK"
return info