mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-02-02 16:23:58 +03:00
add feature and index to RVC
This commit is contained in:
parent
f68366c22a
commit
04b9bb1ac3
21
client/demo/dist/assets/gui_settings/RVC.json
vendored
21
client/demo/dist/assets/gui_settings/RVC.json
vendored
@ -34,12 +34,15 @@
|
||||
{
|
||||
"name": "modelUploader",
|
||||
"options": {
|
||||
"showConfig": true,
|
||||
"showConfig": false,
|
||||
"showOnnx": true,
|
||||
"showPyTorch": true,
|
||||
"showCorrespondence": false,
|
||||
"showPyTorchCluster": false,
|
||||
|
||||
"showFeature": true,
|
||||
"showIndex": true,
|
||||
|
||||
"defaultEnablePyTorch": true
|
||||
}
|
||||
},
|
||||
@ -83,12 +86,6 @@
|
||||
}
|
||||
],
|
||||
"speakerSetting": [
|
||||
{
|
||||
"name": "srcId",
|
||||
"options": {
|
||||
"showF0": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "dstId",
|
||||
"options": {
|
||||
@ -96,20 +93,12 @@
|
||||
"useServerInfo": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "editSpeakerIdMapping",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "f0Factor",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "tune",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "clusterInferRatio",
|
||||
"name": "indexRatio",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
|
41
client/demo/dist/index.js
vendored
41
client/demo/dist/index.js
vendored
File diff suppressed because one or more lines are too long
@ -34,12 +34,15 @@
|
||||
{
|
||||
"name": "modelUploader",
|
||||
"options": {
|
||||
"showConfig": true,
|
||||
"showConfig": false,
|
||||
"showOnnx": true,
|
||||
"showPyTorch": true,
|
||||
"showCorrespondence": false,
|
||||
"showPyTorchCluster": false,
|
||||
|
||||
"showFeature": true,
|
||||
"showIndex": true,
|
||||
|
||||
"defaultEnablePyTorch": true
|
||||
}
|
||||
},
|
||||
@ -83,12 +86,6 @@
|
||||
}
|
||||
],
|
||||
"speakerSetting": [
|
||||
{
|
||||
"name": "srcId",
|
||||
"options": {
|
||||
"showF0": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "dstId",
|
||||
"options": {
|
||||
@ -96,20 +93,12 @@
|
||||
"useServerInfo": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "editSpeakerIdMapping",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "f0Factor",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "tune",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "clusterInferRatio",
|
||||
"name": "indexRatio",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
|
@ -33,6 +33,7 @@ import { CrossFadeOffsetRateRow, CrossFadeOffsetRateRowProps } from "./component
|
||||
import { CrossFadeEndRateRow, CrossFadeEndRateRowProps } from "./components/807_CrossFadeEndRateRow"
|
||||
import { DownSamplingModeRow, DownSamplingModeRowProps } from "./components/808_DownSamplingModeRow"
|
||||
import { TrancateNumTresholdRow, TrancateNumTresholdRowProps } from "./components/809_TrancateNumTresholdRow"
|
||||
import { IndexRatioRow, IndexRatioRowProps } from "./components/609_IndexRatioRow"
|
||||
|
||||
export const catalog: { [key: string]: (props: any) => JSX.Element } = {}
|
||||
|
||||
@ -78,6 +79,9 @@ const initialize = () => {
|
||||
addToCatalog("clusterInferRatio", (props: ClusterInferRatioRowProps) => { return <ClusterInferRatioRow {...props} /> })
|
||||
addToCatalog("noiseScale", (props: NoiseScaleRowProps) => { return <NoiseScaleRow {...props} /> })
|
||||
addToCatalog("silentThreshold", (props: SilentThresholdRowProps) => { return <SilentThresholdRow {...props} /> })
|
||||
addToCatalog("indexRatio", (props: IndexRatioRowProps) => { return <IndexRatioRow {...props} /> })
|
||||
|
||||
|
||||
|
||||
addToCatalog("inputChunkNum", (props: InputChunkNumRowProps) => { return <InputChunkNumRow {...props} /> })
|
||||
addToCatalog("extraDataLength", (props: ExtraDataLengthRowProps) => { return <ExtraDataLengthRow {...props} /> })
|
||||
|
@ -0,0 +1,47 @@
|
||||
import React, { useMemo } from "react"
|
||||
import { fileSelector } from "@dannadori/voice-changer-client-js"
|
||||
import { useAppState } from "../../../001_provider/001_AppStateProvider"
|
||||
|
||||
|
||||
export const FeatureSelectRow = () => {
|
||||
const appState = useAppState()
|
||||
|
||||
const featureSelectRow = useMemo(() => {
|
||||
const featureFilenameText = appState.serverSetting.fileUploadSetting.feature?.filename || appState.serverSetting.fileUploadSetting.feature?.file?.name || ""
|
||||
const onFeatureFileLoadClicked = async () => {
|
||||
const file = await fileSelector("")
|
||||
if (file.name.endsWith(".npy") == false) {
|
||||
alert("Feature file's extension should be npy")
|
||||
return
|
||||
}
|
||||
appState.serverSetting.setFileUploadSetting({
|
||||
...appState.serverSetting.fileUploadSetting,
|
||||
feature: {
|
||||
file: file
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
const onFeatureFileClearClicked = () => {
|
||||
appState.serverSetting.setFileUploadSetting({
|
||||
...appState.serverSetting.fileUploadSetting,
|
||||
feature: null
|
||||
})
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="body-row split-3-3-4 left-padding-1 guided">
|
||||
<div className="body-item-title left-padding-2">feature(.npy)</div>
|
||||
<div className="body-item-text">
|
||||
<div>{featureFilenameText}</div>
|
||||
</div>
|
||||
<div className="body-button-container">
|
||||
<div className="body-button" onClick={onFeatureFileLoadClicked}>select</div>
|
||||
<div className="body-button left-margin-1" onClick={onFeatureFileClearClicked}>clear</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}, [appState.serverSetting.fileUploadSetting, appState.serverSetting.setFileUploadSetting])
|
||||
|
||||
return featureSelectRow
|
||||
}
|
@ -0,0 +1,47 @@
|
||||
import React, { useMemo } from "react"
|
||||
import { fileSelector } from "@dannadori/voice-changer-client-js"
|
||||
import { useAppState } from "../../../001_provider/001_AppStateProvider"
|
||||
|
||||
|
||||
export const IndexSelectRow = () => {
|
||||
const appState = useAppState()
|
||||
|
||||
const indexSelectRow = useMemo(() => {
|
||||
const indexFilenameText = appState.serverSetting.fileUploadSetting.index?.filename || appState.serverSetting.fileUploadSetting.index?.file?.name || ""
|
||||
const onIndexFileLoadClicked = async () => {
|
||||
const file = await fileSelector("")
|
||||
if (file.name.endsWith(".index") == false) {
|
||||
alert("Index file's extension should be .index")
|
||||
return
|
||||
}
|
||||
appState.serverSetting.setFileUploadSetting({
|
||||
...appState.serverSetting.fileUploadSetting,
|
||||
index: {
|
||||
file: file
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
const onIndexFileClearClicked = () => {
|
||||
appState.serverSetting.setFileUploadSetting({
|
||||
...appState.serverSetting.fileUploadSetting,
|
||||
index: null
|
||||
})
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="body-row split-3-3-4 left-padding-1 guided">
|
||||
<div className="body-item-title left-padding-2">index(.index)</div>
|
||||
<div className="body-item-text">
|
||||
<div>{indexFilenameText}</div>
|
||||
</div>
|
||||
<div className="body-button-container">
|
||||
<div className="body-button" onClick={onIndexFileLoadClicked}>select</div>
|
||||
<div className="body-button left-margin-1" onClick={onIndexFileClearClicked}>clear</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}, [appState.serverSetting.fileUploadSetting, appState.serverSetting.setFileUploadSetting])
|
||||
|
||||
return indexSelectRow
|
||||
}
|
@ -13,7 +13,7 @@ export const ModelUploadButtonRow = () => {
|
||||
const uploadButtonAction = appState.serverSetting.isUploading ? () => { } : onModelUploadClicked
|
||||
const uploadButtonLabel = appState.serverSetting.isUploading ? "wait..." : "upload"
|
||||
const uploadingStatus = appState.serverSetting.isUploading ?
|
||||
appState.serverSetting.uploadProgress == 0 ? `loading model...(wait about 20sec)` : `uploading.... ${appState.serverSetting.uploadProgress}%` : ""
|
||||
appState.serverSetting.uploadProgress == 0 ? `loading model...(wait about 20sec)` : `uploading.... ${appState.serverSetting.uploadProgress.toFixed(1)}%` : ""
|
||||
|
||||
|
||||
return (
|
||||
|
@ -5,6 +5,8 @@ import { ONNXSelectRow } from "./301-2_ONNXSelectRow"
|
||||
import { PyTorchSelectRow } from "./301-3_PyTorchSelectRow"
|
||||
import { CorrespondenceSelectRow } from "./301-4_CorrespondenceSelectRow"
|
||||
import { PyTorchClusterSelectRow } from "./301-5_PyTorchClusterSelectRow"
|
||||
import { FeatureSelectRow } from "./301-6_FeatureSelectRow"
|
||||
import { IndexSelectRow } from "./301-7_IndexSelectRow"
|
||||
import { ModelUploadButtonRow } from "./301-9_ModelUploadButtonRow"
|
||||
|
||||
export type ModelUploaderRowProps = {
|
||||
@ -14,6 +16,9 @@ export type ModelUploaderRowProps = {
|
||||
showCorrespondence: boolean
|
||||
showPyTorchCluster: boolean
|
||||
|
||||
showFeature: boolean
|
||||
showIndex: boolean
|
||||
|
||||
defaultEnablePyTorch: boolean
|
||||
}
|
||||
|
||||
@ -44,6 +49,8 @@ export const ModelUploaderRow = (props: ModelUploaderRowProps) => {
|
||||
{props.showPyTorch && guiState.showPyTorchModelUpload ? <PyTorchSelectRow /> : <></>}
|
||||
{props.showCorrespondence ? <CorrespondenceSelectRow /> : <></>}
|
||||
{props.showPyTorchCluster ? <PyTorchClusterSelectRow /> : <></>}
|
||||
{props.showFeature ? <FeatureSelectRow /> : <></>}
|
||||
{props.showIndex ? <IndexSelectRow /> : <></>}
|
||||
<ModelUploadButtonRow />
|
||||
</>
|
||||
)
|
||||
|
@ -0,0 +1,30 @@
|
||||
import React, { useMemo } from "react"
|
||||
import { useAppState } from "../../../001_provider/001_AppStateProvider"
|
||||
|
||||
export type IndexRatioRowProps = {
|
||||
}
|
||||
|
||||
export const IndexRatioRow = (_props: IndexRatioRowProps) => {
|
||||
const appState = useAppState()
|
||||
|
||||
const clusterRatioRow = useMemo(() => {
|
||||
return (
|
||||
<div className="body-row split-3-3-4 left-padding-1 guided">
|
||||
<div className="body-item-title left-padding-1 ">index ratio</div>
|
||||
<div>
|
||||
<input type="range" className="body-item-input-slider" min="0" max="1" step="0.1" value={appState.serverSetting.serverSetting.indexRatio} onChange={(e) => {
|
||||
appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, indexRatio: Number(e.target.value) })
|
||||
}}></input>
|
||||
<span className="body-item-input-slider-val">{appState.serverSetting.serverSetting.indexRatio}</span>
|
||||
</div>
|
||||
<div className="body-button-container">
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}, [
|
||||
appState.serverSetting.serverSetting,
|
||||
appState.serverSetting.updateServerSettings
|
||||
])
|
||||
|
||||
return clusterRatioRow
|
||||
}
|
@ -110,7 +110,7 @@ export class ServerConfigurator {
|
||||
}
|
||||
|
||||
// !! 注意!! hubertTorchModelは固定値で上書きされるため、設定しても効果ない。
|
||||
loadModel = async (configFilename: string, pyTorchModelFilename: string | null, onnxModelFilename: string | null, clusterTorchModelFilename: string | null, hubertTorchModelFilename: string | null) => {
|
||||
loadModel = async (configFilename: string, pyTorchModelFilename: string | null, onnxModelFilename: string | null, clusterTorchModelFilename: string | null, featureFilename: string | null, indexFilename: string | null) => {
|
||||
const url = this.serverUrl + "/load_model"
|
||||
const info = new Promise<ServerInfo>(async (resolve) => {
|
||||
const formData = new FormData();
|
||||
@ -118,7 +118,8 @@ export class ServerConfigurator {
|
||||
formData.append("onnxModelFilename", onnxModelFilename || "-");
|
||||
formData.append("configFilename", configFilename);
|
||||
formData.append("clusterTorchModelFilename", clusterTorchModelFilename || "-");
|
||||
formData.append("hubertTorchModelFilename", hubertTorchModelFilename || "-");
|
||||
formData.append("featureFilename", featureFilename || "-");
|
||||
formData.append("indexFilename", indexFilename || "-");
|
||||
|
||||
const request = new Request(url, {
|
||||
method: 'POST',
|
||||
|
@ -257,9 +257,16 @@ export class VoiceChangerClient {
|
||||
concatUploadedFile = (filename: string, chunkNum: number) => {
|
||||
return this.configurator.concatUploadedFile(filename, chunkNum)
|
||||
}
|
||||
loadModel = (configFilename: string, pyTorchModelFilename: string | null, onnxModelFilename: string | null, clusterTorchModelFilename: string | null, hubertTorchModelFilename: string | null) => {
|
||||
loadModel = (
|
||||
configFilename: string,
|
||||
pyTorchModelFilename: string | null,
|
||||
onnxModelFilename: string | null,
|
||||
clusterTorchModelFilename: string | null,
|
||||
featureFilename: string | null,
|
||||
indexFilename: string | null,
|
||||
) => {
|
||||
// !! 注意!! hubertTorchModelは固定値で上書きされるため、設定しても効果ない。
|
||||
return this.configurator.loadModel(configFilename, pyTorchModelFilename, onnxModelFilename, clusterTorchModelFilename, hubertTorchModelFilename)
|
||||
return this.configurator.loadModel(configFilename, pyTorchModelFilename, onnxModelFilename, clusterTorchModelFilename, featureFilename, indexFilename)
|
||||
}
|
||||
|
||||
//## Worklet ##//
|
||||
|
@ -78,6 +78,8 @@ export const ServerSettingKey = {
|
||||
"extraConvertSize": "extraConvertSize",
|
||||
"clusterInferRatio": "clusterInferRatio",
|
||||
|
||||
"indexRatio": "indexRatio",
|
||||
|
||||
"inputSampleRate": "inputSampleRate",
|
||||
} as const
|
||||
export type ServerSettingKey = typeof ServerSettingKey[keyof typeof ServerSettingKey]
|
||||
@ -106,6 +108,8 @@ export type VoiceChangerServerSetting = {
|
||||
extraConvertSize: number// so-vits-svc
|
||||
clusterInferRatio: number // so-vits-svc
|
||||
|
||||
indexRatio: number // RVC
|
||||
|
||||
inputSampleRate: InputSampleRate
|
||||
}
|
||||
|
||||
@ -142,6 +146,8 @@ export const DefaultServerSetting_MMVCv15: ServerInfo = {
|
||||
extraConvertSize: 0,
|
||||
clusterInferRatio: 0,
|
||||
|
||||
indexRatio: 0,
|
||||
|
||||
inputSampleRate: 24000,
|
||||
|
||||
//
|
||||
@ -174,6 +180,8 @@ export const DefaultServerSetting_MMVCv13: ServerInfo = {
|
||||
extraConvertSize: 0,
|
||||
clusterInferRatio: 0,
|
||||
|
||||
indexRatio: 0,
|
||||
|
||||
inputSampleRate: 24000,
|
||||
|
||||
//
|
||||
@ -210,6 +218,8 @@ export const DefaultServerSetting_so_vits_svc_40: ServerInfo = {
|
||||
extraConvertSize: 1024 * 32,
|
||||
clusterInferRatio: 0.1,
|
||||
|
||||
indexRatio: 0,
|
||||
|
||||
inputSampleRate: 24000,
|
||||
|
||||
//
|
||||
@ -246,6 +256,8 @@ export const DefaultServerSetting_so_vits_svc_40_c: ServerInfo = {
|
||||
extraConvertSize: 1024 * 32,
|
||||
clusterInferRatio: 0.1,
|
||||
|
||||
indexRatio: 0,
|
||||
|
||||
inputSampleRate: 24000,
|
||||
|
||||
//
|
||||
@ -281,6 +293,8 @@ export const DefaultServerSetting_so_vits_svc_40v2: ServerInfo = {
|
||||
extraConvertSize: 1024 * 32,
|
||||
clusterInferRatio: 0.1,
|
||||
|
||||
indexRatio: 0,
|
||||
|
||||
inputSampleRate: 24000,
|
||||
|
||||
//
|
||||
@ -318,6 +332,8 @@ export const DefaultServerSetting_RVC: ServerInfo = {
|
||||
extraConvertSize: 1024 * 32,
|
||||
clusterInferRatio: 0.1,
|
||||
|
||||
indexRatio: 0,
|
||||
|
||||
inputSampleRate: 24000,
|
||||
|
||||
//
|
||||
|
@ -16,6 +16,10 @@ export type FileUploadSetting = {
|
||||
configFile: ModelData | null
|
||||
clusterTorchModel: ModelData | null
|
||||
hubertTorchModel: ModelData | null // !! 注意!! hubertTorchModelは固定値で上書きされるため、設定しても効果ない。
|
||||
|
||||
feature: ModelData | null //RVC
|
||||
index: ModelData | null //RVC
|
||||
|
||||
}
|
||||
|
||||
const InitialFileUploadSetting: FileUploadSetting = {
|
||||
@ -24,6 +28,9 @@ const InitialFileUploadSetting: FileUploadSetting = {
|
||||
onnxModel: null,
|
||||
clusterTorchModel: null,
|
||||
hubertTorchModel: null,
|
||||
|
||||
feature: null,
|
||||
index: null
|
||||
}
|
||||
|
||||
export type UseServerSettingProps = {
|
||||
@ -89,7 +96,7 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
||||
loadCache()
|
||||
}, [])
|
||||
|
||||
// クライアントへ設定反映 (キャッシュ反映)
|
||||
// サーバへキャッシュの内容を反映 (クライアント初期化した時の一回)
|
||||
useEffect(() => {
|
||||
if (!props.voiceChangerClient) return
|
||||
for (let i = 0; i < Object.values(ServerSettingKey).length; i++) {
|
||||
@ -165,7 +172,7 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
||||
setUploadProgress(0)
|
||||
setIsUploading(true)
|
||||
|
||||
// ファイルをメモリにロード
|
||||
// ファイルをメモリにロード(dataがある場合は、キャッシュから読まれていると想定しスキップ)
|
||||
if (fileUploadSetting.onnxModel && !fileUploadSetting.onnxModel.data) {
|
||||
fileUploadSetting.onnxModel.data = await fileUploadSetting.onnxModel.file!.arrayBuffer()
|
||||
fileUploadSetting.onnxModel.filename = await fileUploadSetting.onnxModel.file!.name
|
||||
@ -189,8 +196,27 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
||||
}
|
||||
}
|
||||
|
||||
if (fileUploadSetting.feature) {
|
||||
if ((props.clientType == "RVC") && !fileUploadSetting.feature!.data) {
|
||||
fileUploadSetting.feature!.data = await fileUploadSetting.feature!.file!.arrayBuffer()
|
||||
fileUploadSetting.feature!.filename = await fileUploadSetting.feature!.file!.name
|
||||
}
|
||||
}
|
||||
if (fileUploadSetting.index) {
|
||||
if ((props.clientType == "RVC") && !fileUploadSetting.index!.data) {
|
||||
fileUploadSetting.index!.data = await fileUploadSetting.index!.file!.arrayBuffer()
|
||||
fileUploadSetting.index!.filename = await fileUploadSetting.index!.file!.name
|
||||
}
|
||||
}
|
||||
|
||||
// ファイルをサーバにアップロード
|
||||
const models = [fileUploadSetting.onnxModel, fileUploadSetting.pyTorchModel, fileUploadSetting.clusterTorchModel /*, fileUploadSetting.hubertTorchModel*/].filter(x => { return x != null }) as ModelData[]
|
||||
const models = [
|
||||
fileUploadSetting.onnxModel,
|
||||
fileUploadSetting.pyTorchModel,
|
||||
fileUploadSetting.clusterTorchModel,
|
||||
fileUploadSetting.feature,
|
||||
fileUploadSetting.index,
|
||||
].filter(x => { return x != null }) as ModelData[]
|
||||
for (let i = 0; i < models.length; i++) {
|
||||
const progRate = 1 / models.length
|
||||
const progOffset = 100 * i * progRate
|
||||
@ -208,7 +234,14 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
||||
|
||||
// !! 注意!! hubertTorchModelは固定値で上書きされるため、設定しても効果ない。
|
||||
const configFileName = fileUploadSetting.configFile ? fileUploadSetting.configFile.filename || "-" : "-"
|
||||
const loadPromise = props.voiceChangerClient.loadModel(configFileName, fileUploadSetting.pyTorchModel?.filename || null, fileUploadSetting.onnxModel?.filename || null, fileUploadSetting.clusterTorchModel?.filename || null, fileUploadSetting.hubertTorchModel?.filename || null)
|
||||
const loadPromise = props.voiceChangerClient.loadModel(
|
||||
configFileName,
|
||||
fileUploadSetting.pyTorchModel?.filename || null,
|
||||
fileUploadSetting.onnxModel?.filename || null,
|
||||
fileUploadSetting.clusterTorchModel?.filename || null,
|
||||
fileUploadSetting.feature?.filename || null,
|
||||
fileUploadSetting.index?.filename || null
|
||||
)
|
||||
|
||||
// サーバでロード中にキャッシュにセーブ
|
||||
try {
|
||||
@ -221,6 +254,12 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
||||
} : null,
|
||||
clusterTorchModel: fileUploadSetting.clusterTorchModel ? {
|
||||
data: fileUploadSetting.clusterTorchModel.data, filename: fileUploadSetting.clusterTorchModel.filename
|
||||
} : null,
|
||||
feature: fileUploadSetting.feature ? {
|
||||
data: fileUploadSetting.feature.data, filename: fileUploadSetting.feature.filename
|
||||
} : null,
|
||||
index: fileUploadSetting.index ? {
|
||||
data: fileUploadSetting.index.data, filename: fileUploadSetting.index.filename
|
||||
} : null
|
||||
}
|
||||
setItem(INDEXEDDB_KEY_MODEL_DATA, saveData)
|
||||
@ -254,7 +293,6 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
||||
await removeItem(INDEXEDDB_KEY_MODEL_DATA)
|
||||
}
|
||||
|
||||
|
||||
return {
|
||||
serverSetting,
|
||||
updateServerSettings,
|
||||
|
@ -55,17 +55,18 @@ class MMVC_Rest_Fileuploader:
|
||||
onnxModelFilename: str = Form(...),
|
||||
configFilename: str = Form(...),
|
||||
clusterTorchModelFilename: str = Form(...),
|
||||
hubertTorchModelFilename: str = Form(...),
|
||||
featureFilename: str = Form(...),
|
||||
indexFilename: str = Form(...),
|
||||
):
|
||||
print("Hubert:", hubertTorchModelFilename)
|
||||
pyTorchModelFilePath = os.path.join(UPLOAD_DIR, pyTorchModelFilename) if pyTorchModelFilename != "-" else None
|
||||
onnxModelFilePath = os.path.join(UPLOAD_DIR, onnxModelFilename) if onnxModelFilename != "-" else None
|
||||
configFilePath = os.path.join(UPLOAD_DIR, configFilename)
|
||||
clusterTorchModelFilePath = os.path.join(UPLOAD_DIR, clusterTorchModelFilename) if clusterTorchModelFilename != "-" else None
|
||||
hubertTorchModelFilePath = os.path.join(UPLOAD_DIR, hubertTorchModelFilename) if hubertTorchModelFilename != "-" else None
|
||||
featureFilePath = os.path.join(UPLOAD_DIR, featureFilename) if featureFilename != "-" else None
|
||||
indexFilePath = os.path.join(UPLOAD_DIR, indexFilename) if indexFilename != "-" else None
|
||||
|
||||
info = self.voiceChangerManager.loadModel(configFilePath, pyTorchModelFilePath, onnxModelFilePath,
|
||||
clusterTorchModelFilePath)
|
||||
clusterTorchModelFilePath, featureFilePath, indexFilePath)
|
||||
json_compatible_item_data = jsonable_encoder(info)
|
||||
return JSONResponse(content=json_compatible_item_data)
|
||||
# return {"load": f"{configFilePath}, {pyTorchModelFilePath}, {onnxModelFilePath}"}
|
||||
|
55
server/voice_changer/RVC/ModelWrapper.py
Normal file
55
server/voice_changer/RVC/ModelWrapper.py
Normal file
@ -0,0 +1,55 @@
|
||||
import onnxruntime
|
||||
import torch
|
||||
import numpy as np
|
||||
# providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
|
||||
providers = ["CPUExecutionProvider"]
|
||||
|
||||
|
||||
class ModelWrapper:
|
||||
def __init__(self, onnx_model, is_half):
|
||||
self.onnx_model = onnx_model
|
||||
self.is_half = is_half
|
||||
|
||||
# ort_options = onnxruntime.SessionOptions()
|
||||
# ort_options.intra_op_num_threads = 8
|
||||
self.onnx_session = onnxruntime.InferenceSession(
|
||||
self.onnx_model,
|
||||
providers=providers
|
||||
)
|
||||
# input_info = s
|
||||
|
||||
def set_providers(self, providers, provider_options=[{}]):
|
||||
self.onnx_session.set_providers(providers=providers, provider_options=provider_options)
|
||||
|
||||
def get_providers(self):
|
||||
return self.onnx_session.get_providers()
|
||||
|
||||
def infer(self, feats, p_len, pitch, pitchf, sid):
|
||||
if self.is_half:
|
||||
print("feats", feats.cpu().numpy().dtype)
|
||||
print("p_len", p_len.cpu().numpy().dtype)
|
||||
print("pitch", pitch.cpu().numpy().dtype)
|
||||
print("pitchf", pitchf.cpu().numpy().dtype)
|
||||
print("sid", sid.cpu().numpy().dtype)
|
||||
|
||||
audio1 = self.onnx_session.run(
|
||||
["audio"],
|
||||
{
|
||||
"feats": feats.cpu().numpy().astype(np.float16),
|
||||
"p_len": p_len.cpu().numpy().astype(np.int64),
|
||||
"pitch": pitch.cpu().numpy().astype(np.int64),
|
||||
"pitchf": pitchf.cpu().numpy().astype(np.float32),
|
||||
"sid": sid.cpu().numpy().astype(np.int64),
|
||||
})
|
||||
else:
|
||||
audio1 = self.onnx_session.run(
|
||||
["audio"],
|
||||
{
|
||||
"feats": feats.cpu().numpy().astype(np.float32),
|
||||
"p_len": p_len.cpu().numpy(),
|
||||
"pitch": pitch.cpu().numpy(),
|
||||
"pitchf": pitchf.cpu().numpy(),
|
||||
"sid": sid.cpu().numpy(),
|
||||
})
|
||||
|
||||
return torch.tensor(np.array(audio1))
|
@ -1,6 +1,7 @@
|
||||
import sys
|
||||
import os
|
||||
import resampy
|
||||
from voice_changer.RVC.ModelWrapper import ModelWrapper
|
||||
|
||||
# avoiding parse arg error in RVC
|
||||
sys.argv = ["MMVCServerSIO.py"]
|
||||
@ -50,13 +51,16 @@ class RVCSettings():
|
||||
onnxModelFile: str = ""
|
||||
configFile: str = ""
|
||||
|
||||
indexRatio: float = 0
|
||||
isHalf: int = 0
|
||||
|
||||
speakers: dict[str, int] = field(
|
||||
default_factory=lambda: {}
|
||||
)
|
||||
|
||||
# ↓mutableな物だけ列挙
|
||||
intData = ["gpu", "dstId", "tran", "predictF0", "extraConvertSize"]
|
||||
floatData = ["noiceScale", "silentThreshold", "clusterInferRatio"]
|
||||
floatData = ["noiceScale", "silentThreshold", "indexRatio"]
|
||||
strData = ["framework", "f0Detector"]
|
||||
|
||||
|
||||
@ -72,18 +76,22 @@ class RVC:
|
||||
self.params = params
|
||||
print("RVC initialization: ", params)
|
||||
|
||||
def loadModel(self, config: str, pyTorch_model_file: str = None, onnx_model_file: str = None, clusterTorchModel: str = None):
|
||||
self.device = torch.device("cuda", index=self.settings.gpu)
|
||||
def loadModel(self, config: str, pyTorch_model_file: str = None, onnx_model_file: str = None, feature_file: str = None, index_file: str = None):
|
||||
self.settings.configFile = config
|
||||
self.feature_file = feature_file
|
||||
self.index_file = index_file
|
||||
|
||||
print("featurefile", feature_file, index_file)
|
||||
|
||||
self.tgt_sr = 40000
|
||||
try:
|
||||
hubert_path = self.params["hubert"]
|
||||
models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task([hubert_path], suffix="",)
|
||||
model = models[0]
|
||||
model.eval()
|
||||
# model = model.half()
|
||||
if self.settings.isHalf:
|
||||
model = model.half()
|
||||
self.hubert_model = model
|
||||
self.hubert_model = self.hubert_model.to(self.device)
|
||||
|
||||
except Exception as e:
|
||||
print("EXCEPTION during loading hubert/contentvec model", e)
|
||||
@ -97,22 +105,17 @@ class RVC:
|
||||
if pyTorch_model_file != None:
|
||||
cpt = torch.load(pyTorch_model_file, map_location="cpu")
|
||||
self.tgt_sr = cpt["config"][-1]
|
||||
is_half = False
|
||||
net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=is_half)
|
||||
net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=self.settings.isHalf)
|
||||
net_g.eval()
|
||||
net_g.load_state_dict(cpt["weight"], strict=False)
|
||||
# net_g = net_g.half()
|
||||
if self.settings.isHalf:
|
||||
net_g = net_g.half()
|
||||
self.net_g = net_g
|
||||
self.net_g = self.net_g.to(self.device)
|
||||
|
||||
# ONNXモデル生成
|
||||
if onnx_model_file != None:
|
||||
ort_options = onnxruntime.SessionOptions()
|
||||
ort_options.intra_op_num_threads = 8
|
||||
self.onnx_session = onnxruntime.InferenceSession(
|
||||
onnx_model_file,
|
||||
providers=providers
|
||||
)
|
||||
# self.onnx_session = ModelWrapper(onnx_model_file, is_half=True)
|
||||
self.onnx_session = ModelWrapper(onnx_model_file, is_half=self.settings.isHalf)
|
||||
# input_info = self.onnx_session.get_inputs()
|
||||
# for i in input_info:
|
||||
# print("input", i)
|
||||
@ -187,16 +190,54 @@ class RVC:
|
||||
convertSize = convertSize + (128 - (convertSize % 128))
|
||||
|
||||
self.audio_buffer = self.audio_buffer[-1 * convertSize:] # 変換対象の部分だけ抽出
|
||||
print("convert size", convertSize, self.audio_buffer.shape)
|
||||
|
||||
crop = self.audio_buffer[-1 * (inputSize + crossfadeSize):-1 * (crossfadeSize)]
|
||||
rms = np.sqrt(np.square(crop).mean(axis=0))
|
||||
vol = max(rms, self.prevVol * 0.0)
|
||||
self.prevVol = vol
|
||||
|
||||
print("audio len 01,", len(self.audio_buffer))
|
||||
return (self.audio_buffer, convertSize, vol)
|
||||
|
||||
def _onnx_inference(self, data):
|
||||
pass
|
||||
if hasattr(self, "onnx_session") == False or self.onnx_session == None:
|
||||
print("[Voice Changer] No onnx session.")
|
||||
return np.zeros(1).astype(np.int16)
|
||||
|
||||
if self.settings.gpu < 0 or self.gpu_num == 0:
|
||||
dev = torch.device("cpu")
|
||||
else:
|
||||
dev = torch.device("cuda", index=self.settings.gpu)
|
||||
|
||||
self.hubert_model = self.hubert_model.to(dev)
|
||||
|
||||
audio = data[0]
|
||||
convertSize = data[1]
|
||||
vol = data[2]
|
||||
|
||||
audio = resampy.resample(audio, self.tgt_sr, 16000)
|
||||
|
||||
if vol < self.settings.silentThreshold:
|
||||
return np.zeros(convertSize).astype(np.int16)
|
||||
|
||||
with torch.no_grad():
|
||||
vc = VC(self.tgt_sr, dev, self.settings.isHalf)
|
||||
sid = 0
|
||||
times = [0, 0, 0]
|
||||
f0_up_key = self.settings.tran
|
||||
f0_method = "pm" if self.settings.f0Detector == "dio" else "harvest"
|
||||
file_index = self.index_file if self.index_file != None else ""
|
||||
file_big_npy = self.feature_file if self.feature_file != None else ""
|
||||
index_rate = self.settings.indexRatio
|
||||
if_f0 = 1
|
||||
f0_file = None
|
||||
|
||||
audio_out = vc.pipeline(self.hubert_model, self.onnx_session, sid, audio, times, f0_up_key, f0_method,
|
||||
file_index, file_big_npy, index_rate, if_f0, f0_file=f0_file)
|
||||
result = audio_out * np.sqrt(vol)
|
||||
|
||||
return result
|
||||
|
||||
def _pyTorch_inference(self, data):
|
||||
if hasattr(self, "net_g") == False or self.net_g == None:
|
||||
@ -208,28 +249,32 @@ class RVC:
|
||||
else:
|
||||
dev = torch.device("cuda", index=self.settings.gpu)
|
||||
|
||||
self.hubert_model = self.hubert_model.to(dev)
|
||||
self.net_g = self.net_g.to(dev)
|
||||
|
||||
audio = data[0]
|
||||
convertSize = data[1]
|
||||
vol = data[2]
|
||||
|
||||
print("audio len 02,", len(audio))
|
||||
audio = resampy.resample(audio, self.tgt_sr, 16000)
|
||||
print("audio len 03,", len(audio))
|
||||
|
||||
if vol < self.settings.silentThreshold:
|
||||
return np.zeros(convertSize).astype(np.int16)
|
||||
|
||||
is_half = False
|
||||
with torch.no_grad():
|
||||
vc = VC(self.tgt_sr, dev, is_half)
|
||||
vc = VC(self.tgt_sr, dev, self.settings.isHalf)
|
||||
sid = 0
|
||||
times = [0, 0, 0]
|
||||
f0_up_key = self.settings.tran
|
||||
f0_method = "pm"
|
||||
file_index = ""
|
||||
file_big_npy = ""
|
||||
index_rate = 1
|
||||
f0_method = "pm" if self.settings.f0Detector == "dio" else "harvest"
|
||||
file_index = self.index_file if self.index_file != None else ""
|
||||
file_big_npy = self.feature_file if self.feature_file != None else ""
|
||||
index_rate = self.settings.indexRatio
|
||||
if_f0 = 1
|
||||
f0_file = None
|
||||
|
||||
print("audio len 0,", len(audio))
|
||||
audio_out = vc.pipeline(self.hubert_model, self.net_g, sid, audio, times, f0_up_key, f0_method,
|
||||
file_index, file_big_npy, index_rate, if_f0, f0_file=f0_file)
|
||||
result = audio_out * np.sqrt(vol)
|
||||
|
@ -77,11 +77,13 @@ class VoiceChanger():
|
||||
|
||||
print(f"VoiceChanger Initialized (GPU_NUM:{self.gpu_num}, mps_enabled:{self.mps_enabled})")
|
||||
|
||||
def loadModel(self, config: str, pyTorch_model_file: str = None, onnx_model_file: str = None, clusterTorchModel: str = None):
|
||||
def loadModel(self, config: str, pyTorch_model_file: str = None, onnx_model_file: str = None, clusterTorchModel: str = None, feature_file: str = None, index_file: str = None):
|
||||
if self.modelType == "MMVCv15" or self.modelType == "MMVCv13":
|
||||
return self.voiceChanger.loadModel(config, pyTorch_model_file, onnx_model_file)
|
||||
elif self.modelType == "so-vits-svc-40" or self.modelType == "so-vits-svc-40_c" or self.modelType == "so-vits-svc-40v2":
|
||||
return self.voiceChanger.loadModel(config, pyTorch_model_file, onnx_model_file, clusterTorchModel)
|
||||
elif self.modelType == "RVC":
|
||||
return self.voiceChanger.loadModel(config, pyTorch_model_file, onnx_model_file, feature_file, index_file)
|
||||
else:
|
||||
return self.voiceChanger.loadModel(config, pyTorch_model_file, onnx_model_file, clusterTorchModel)
|
||||
|
||||
|
@ -10,8 +10,8 @@ class VoiceChangerManager():
|
||||
cls._instance.voiceChanger = VoiceChanger(params)
|
||||
return cls._instance
|
||||
|
||||
def loadModel(self, config, model, onnx_model, clusterTorchModel):
|
||||
info = self.voiceChanger.loadModel(config, model, onnx_model, clusterTorchModel)
|
||||
def loadModel(self, config, model, onnx_model, clusterTorchModel, feature_file, index_file):
|
||||
info = self.voiceChanger.loadModel(config, model, onnx_model, clusterTorchModel, feature_file, index_file)
|
||||
info["status"] = "OK"
|
||||
return info
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user