mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-02-02 16:23:58 +03:00
WIP: refactoring
This commit is contained in:
parent
328ea46161
commit
be42bb682d
3
.gitignore
vendored
3
.gitignore
vendored
@ -43,12 +43,15 @@ docker/cudnn/
|
||||
server/pretrain/
|
||||
server/weights/
|
||||
server/model_dir/
|
||||
server/model_dir2/
|
||||
server/weights_/
|
||||
server/weights__/
|
||||
server/models/
|
||||
server/samples.json
|
||||
server/samples_0003_t.json
|
||||
server/samples_0003_o.json
|
||||
server/samples_0003_o2.json
|
||||
server/samples_0003_t2.json
|
||||
|
||||
server/test_official_v1_v2.json
|
||||
server/test_ddpn_v1_v2.json
|
||||
|
11
client/demo/dist/index.html
vendored
11
client/demo/dist/index.html
vendored
@ -1 +1,10 @@
|
||||
<!doctype html><html style="width:100%;height:100%;overflow:hidden"><head><meta charset="utf-8"/><title>Voice Changer Client Demo</title><script defer="defer" src="index.js"></script></head><body style="width:100%;height:100%;margin:0"><div id="app" style="width:100%;height:100%"></div></body></html>
|
||||
<!DOCTYPE html>
|
||||
<html style="width: 100%; height: 100%; overflow: hidden">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>Voice Changer Client Demo</title>
|
||||
<script defer src="index.js"></script></head>
|
||||
<body style="width: 100%; height: 100%; margin: 0px">
|
||||
<div id="app" style="width: 100%; height: 100%"></div>
|
||||
</body>
|
||||
</html>
|
||||
|
1904
client/demo/dist/index.js
vendored
1904
client/demo/dist/index.js
vendored
File diff suppressed because one or more lines are too long
31
client/demo/dist/index.js.LICENSE.txt
vendored
31
client/demo/dist/index.js.LICENSE.txt
vendored
@ -1,31 +0,0 @@
|
||||
/*! regenerator-runtime -- Copyright (c) 2014-present, Facebook, Inc. -- license (MIT): https://github.com/facebook/regenerator/blob/main/LICENSE */
|
||||
|
||||
/**
|
||||
* @license React
|
||||
* react-dom.production.min.js
|
||||
*
|
||||
* Copyright (c) Facebook, Inc. and its affiliates.
|
||||
*
|
||||
* This source code is licensed under the MIT license found in the
|
||||
* LICENSE file in the root directory of this source tree.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @license React
|
||||
* react.production.min.js
|
||||
*
|
||||
* Copyright (c) Facebook, Inc. and its affiliates.
|
||||
*
|
||||
* This source code is licensed under the MIT license found in the
|
||||
* LICENSE file in the root directory of this source tree.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @license React
|
||||
* scheduler.production.min.js
|
||||
*
|
||||
* Copyright (c) Facebook, Inc. and its affiliates.
|
||||
*
|
||||
* This source code is licensed under the MIT license found in the
|
||||
* LICENSE file in the root directory of this source tree.
|
||||
*/
|
@ -27,7 +27,6 @@ export const useMessageBuilder_old = (): MessageBuilderStateAndMethod => {
|
||||
lang = "en"
|
||||
}
|
||||
|
||||
console.log(messagesRef.current)
|
||||
return messagesRef.current.find(x => { return x.file == file && x.id == id })?.message[lang] || "unknwon message"
|
||||
}
|
||||
return {
|
||||
|
@ -39,7 +39,7 @@ export const ModelSlotManagerDialog = () => {
|
||||
if (mode != "localFile") {
|
||||
return <></>
|
||||
}
|
||||
if (!serverSetting.serverSetting.modelSlots) {
|
||||
if (!serverSetting.serverSetting.slotInfos) {
|
||||
return <></>
|
||||
}
|
||||
|
||||
@ -114,7 +114,7 @@ export const ModelSlotManagerDialog = () => {
|
||||
}
|
||||
|
||||
|
||||
const slots = serverSetting.serverSetting.modelSlots.map((x, index) => {
|
||||
const slots = serverSetting.serverSetting.slotInfos.map((x, index) => {
|
||||
let modelFileName = ""
|
||||
if (uploadData?.slot == index) {
|
||||
modelFileName = (uploadData.model?.name || "").replace(/^.*[\\\/]/, '')
|
||||
@ -248,7 +248,7 @@ export const ModelSlotManagerDialog = () => {
|
||||
|
||||
}, [
|
||||
mode,
|
||||
serverSetting.serverSetting.modelSlots,
|
||||
serverSetting.serverSetting.slotInfos,
|
||||
serverSetting.fileUploadSettings,
|
||||
serverSetting.uploadProgress,
|
||||
serverSetting.setFileUploadSetting,
|
||||
|
@ -13,19 +13,22 @@ export const MergeLabDialog = () => {
|
||||
|
||||
// スロットが変更されたときの初期化処理
|
||||
const newSlotChangeKey = useMemo(() => {
|
||||
if (!serverSetting.serverSetting.modelSlots) {
|
||||
if (!serverSetting.serverSetting.slotInfos) {
|
||||
return ""
|
||||
}
|
||||
return serverSetting.serverSetting.modelSlots.reduce((prev, cur) => {
|
||||
return serverSetting.serverSetting.slotInfos.reduce((prev, cur) => {
|
||||
return prev + "_" + cur.modelFile
|
||||
}, "")
|
||||
}, [serverSetting.serverSetting.modelSlots])
|
||||
}, [serverSetting.serverSetting.slotInfos])
|
||||
|
||||
const filterItems = useMemo(() => {
|
||||
return serverSetting.serverSetting.modelSlots.reduce((prev, cur) => {
|
||||
return serverSetting.serverSetting.slotInfos.reduce((prev, cur) => {
|
||||
const key = `${cur.modelType},${cur.samplingRate},${cur.embChannels}`
|
||||
const val = { type: cur.modelType, samplingRate: cur.samplingRate, embChannels: cur.embChannels }
|
||||
const existKeys = Object.keys(prev)
|
||||
if (cur.voiceChangerType == null) {
|
||||
return prev
|
||||
}
|
||||
if (cur.modelFile.length == 0) {
|
||||
return prev
|
||||
}
|
||||
@ -41,7 +44,7 @@ export const MergeLabDialog = () => {
|
||||
}, [newSlotChangeKey])
|
||||
|
||||
const models = useMemo(() => {
|
||||
return serverSetting.serverSetting.modelSlots.filter(x => {
|
||||
return serverSetting.serverSetting.slotInfos.filter(x => {
|
||||
const filterVals = filterItems[currentFilter]
|
||||
if (!filterVals) {
|
||||
return false
|
||||
|
@ -9,11 +9,11 @@ export const ModelSwitchRow = (_props: ModelSwitchRowProps) => {
|
||||
const appState = useAppState()
|
||||
|
||||
const modelSwitchRow = useMemo(() => {
|
||||
const slot = appState.serverSetting.serverSetting.modelSlotIndex
|
||||
const slot = appState.serverSetting.serverSetting.slotIndex
|
||||
const onSwitchModelClicked = async (slot: number) => {
|
||||
// Quick hack for same slot is selected. 下3桁が実際のSlotID
|
||||
const dummyModelSlotIndex = (Math.floor(Date.now() / 1000)) * 1000 + slot
|
||||
await appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, modelSlotIndex: dummyModelSlotIndex })
|
||||
await appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, slotIndex: dummyModelSlotIndex })
|
||||
setTimeout(() => { // quick hack
|
||||
appState.getInfo()
|
||||
}, 1000 * 2)
|
||||
@ -24,7 +24,7 @@ export const ModelSwitchRow = (_props: ModelSwitchRowProps) => {
|
||||
}
|
||||
|
||||
|
||||
const modelSlots = appState.serverSetting.serverSetting.modelSlots
|
||||
const modelSlots = appState.serverSetting.serverSetting.slotInfos
|
||||
let options: React.JSX.Element[] = []
|
||||
if (modelSlots) {
|
||||
options = modelSlots.map((x, index) => {
|
||||
|
@ -18,14 +18,17 @@ export const ModelSlotArea = (_props: ModelSlotAreaProps) => {
|
||||
|
||||
|
||||
const modelTiles = useMemo(() => {
|
||||
if (!serverSetting.serverSetting.modelSlots) {
|
||||
if (!serverSetting.serverSetting.slotInfos) {
|
||||
return []
|
||||
}
|
||||
return serverSetting.serverSetting.modelSlots.map((x, index) => {
|
||||
return serverSetting.serverSetting.slotInfos.map((x, index) => {
|
||||
if (x.voiceChangerType == null) {
|
||||
return null
|
||||
}
|
||||
if (x.modelFile.length == 0) {
|
||||
return null
|
||||
}
|
||||
const tileContainerClass = index == serverSetting.serverSetting.modelSlotIndex ? "model-slot-tile-container-selected" : "model-slot-tile-container"
|
||||
const tileContainerClass = index == serverSetting.serverSetting.slotIndex ? "model-slot-tile-container-selected" : "model-slot-tile-container"
|
||||
const name = x.name.length > 8 ? x.name.substring(0, 7) + "..." : x.name
|
||||
const iconElem = x.iconFile.length > 0 ?
|
||||
<img className="model-slot-tile-icon" src={x.iconFile} alt={x.name} /> :
|
||||
@ -33,7 +36,7 @@ export const ModelSlotArea = (_props: ModelSlotAreaProps) => {
|
||||
|
||||
const clickAction = async () => {
|
||||
const dummyModelSlotIndex = (Math.floor(Date.now() / 1000)) * 1000 + index
|
||||
await serverSetting.updateServerSettings({ ...serverSetting.serverSetting, modelSlotIndex: dummyModelSlotIndex })
|
||||
await serverSetting.updateServerSettings({ ...serverSetting.serverSetting, slotIndex: dummyModelSlotIndex })
|
||||
setTimeout(() => { // quick hack
|
||||
getInfo()
|
||||
}, 1000 * 2)
|
||||
@ -50,7 +53,7 @@ export const ModelSlotArea = (_props: ModelSlotAreaProps) => {
|
||||
</div >
|
||||
)
|
||||
}).filter(x => x != null)
|
||||
}, [serverSetting.serverSetting.modelSlots, serverSetting.serverSetting.modelSlotIndex])
|
||||
}, [serverSetting.serverSetting.slotInfos, serverSetting.serverSetting.slotIndex])
|
||||
|
||||
|
||||
const modelSlotArea = useMemo(() => {
|
||||
|
@ -21,11 +21,12 @@ export const CharacterArea = (_props: CharacterAreaProps) => {
|
||||
}, [])
|
||||
|
||||
const selected = useMemo(() => {
|
||||
if (serverSetting.serverSetting.modelSlotIndex == undefined) {
|
||||
console.log("serverSetting.serverSetting.slotInfos::", serverSetting.serverSetting.slotIndex, serverSetting.serverSetting.slotInfos)
|
||||
if (serverSetting.serverSetting.slotIndex == undefined) {
|
||||
return
|
||||
}
|
||||
return serverSetting.serverSetting.modelSlots[serverSetting.serverSetting.modelSlotIndex]
|
||||
}, [serverSetting.serverSetting.modelSlotIndex, serverSetting.serverSetting.modelSlots])
|
||||
return serverSetting.serverSetting.slotInfos[serverSetting.serverSetting.slotIndex]
|
||||
}, [serverSetting.serverSetting.slotIndex, serverSetting.serverSetting.slotInfos])
|
||||
|
||||
|
||||
useEffect(() => {
|
||||
|
@ -27,7 +27,6 @@ export const useMessageBuilder = (): MessageBuilderStateAndMethod => {
|
||||
lang = "en"
|
||||
}
|
||||
|
||||
console.log(messagesRef.current)
|
||||
return messagesRef.current.find(x => { return x.file == file && x.id == id })?.message[lang] || "unknwon message"
|
||||
}
|
||||
return {
|
||||
|
@ -1,4 +1,4 @@
|
||||
import { ClientType, MergeModelRequest, OnnxExporterInfo, ServerInfo, ServerSettingKey } from "./const";
|
||||
import { ClientType, MergeModelRequest, OnnxExporterInfo, ServerInfo, ServerSettingKey, VoiceChangerType } from "./const";
|
||||
|
||||
|
||||
type FileChunk = {
|
||||
@ -166,16 +166,12 @@ export class ServerConfigurator {
|
||||
})
|
||||
}
|
||||
|
||||
loadModel = async (slot: number, isHalf: boolean, params: string = "{}") => {
|
||||
if (isHalf == undefined || isHalf == null) {
|
||||
console.warn("isHalf is invalid value", isHalf)
|
||||
isHalf = false
|
||||
}
|
||||
loadModel = async (slot: number, voiceChangerType: VoiceChangerType, params: string = "{}") => {
|
||||
const url = this.serverUrl + "/load_model"
|
||||
const info = new Promise<ServerInfo>(async (resolve) => {
|
||||
const formData = new FormData();
|
||||
formData.append("slot", "" + slot);
|
||||
formData.append("isHalf", "" + isHalf);
|
||||
formData.append("voiceChangerType", voiceChangerType);
|
||||
formData.append("params", params);
|
||||
|
||||
const request = new Request(url, {
|
||||
|
@ -3,7 +3,7 @@ import { VoiceChangerWorkletNode, VoiceChangerWorkletListener } from "./VoiceCha
|
||||
import workerjs from "raw-loader!../worklet/dist/index.js";
|
||||
import { VoiceFocusDeviceTransformer, VoiceFocusTransformDevice } from "amazon-chime-sdk-js";
|
||||
import { createDummyMediaStream, validateUrl } from "./util";
|
||||
import { ClientType, DefaultVoiceChangerClientSetting, MergeModelRequest, ServerSettingKey, VoiceChangerClientSetting, WorkletNodeSetting, WorkletSetting } from "./const";
|
||||
import { ClientType, DefaultVoiceChangerClientSetting, MergeModelRequest, ServerSettingKey, VoiceChangerClientSetting, VoiceChangerType, WorkletNodeSetting, WorkletSetting } from "./const";
|
||||
import { ServerConfigurator } from "./ServerConfigurator";
|
||||
|
||||
// オーディオデータの流れ
|
||||
@ -298,10 +298,10 @@ export class VoiceChangerClient {
|
||||
}
|
||||
loadModel = (
|
||||
slot: number,
|
||||
isHalf: boolean,
|
||||
voiceChangerType: VoiceChangerType,
|
||||
params: string,
|
||||
) => {
|
||||
return this.configurator.loadModel(slot, isHalf, params)
|
||||
return this.configurator.loadModel(slot, voiceChangerType, params)
|
||||
}
|
||||
uploadAssets = (params: string) => {
|
||||
return this.configurator.uploadAssets(params)
|
||||
|
@ -16,6 +16,16 @@ export const ClientType = {
|
||||
} as const
|
||||
export type ClientType = typeof ClientType[keyof typeof ClientType]
|
||||
|
||||
export const VoiceChangerType = {
|
||||
"MMVCv15": "MMVCv15",
|
||||
"MMVCv13": "MMVCv13",
|
||||
"so-vits-svc-40": "so-vits-svc-40",
|
||||
"DDSP-SVC": "DDSP-SVC",
|
||||
"RVC": "RVC"
|
||||
} as const
|
||||
export type VoiceChangerType = typeof VoiceChangerType[keyof typeof VoiceChangerType]
|
||||
|
||||
|
||||
///////////////////////
|
||||
// サーバセッティング
|
||||
///////////////////////
|
||||
@ -122,7 +132,8 @@ export const ServerSettingKey = {
|
||||
"rvcQuality": "rvcQuality",
|
||||
"modelSamplingRate": "modelSamplingRate",
|
||||
"silenceFront": "silenceFront",
|
||||
"modelSlotIndex": "modelSlotIndex",
|
||||
// "modelSlotIndex": "modelSlotIndex",
|
||||
"slotIndex": "slotIndex",
|
||||
|
||||
"useEnhancer": "useEnhancer",
|
||||
"useDiff": "useDiff",
|
||||
@ -181,7 +192,8 @@ export type VoiceChangerServerSetting = {
|
||||
rvcQuality: number // 0:low, 1:high
|
||||
silenceFront: number // 0:off, 1:on
|
||||
modelSamplingRate: ModelSamplingRate // 32000,40000,48000
|
||||
modelSlotIndex: number,
|
||||
// modelSlotIndex: number,
|
||||
slotIndex: number,
|
||||
|
||||
useEnhancer: number// DDSP-SVC
|
||||
useDiff: number// DDSP-SVC
|
||||
@ -197,7 +209,31 @@ export type VoiceChangerServerSetting = {
|
||||
enableDirectML: number
|
||||
}
|
||||
|
||||
type ModelSlot = {
|
||||
// type ModelSlot = {
|
||||
// modelFile: string
|
||||
// featureFile: string,
|
||||
// indexFile: string,
|
||||
|
||||
// defaultTune: number,
|
||||
// defaultIndexRatio: number,
|
||||
// defaultProtect: number,
|
||||
|
||||
// modelType: ModelType,
|
||||
// embChannels: number,
|
||||
// f0: boolean,
|
||||
// samplingRate: number
|
||||
// deprecated: boolean
|
||||
|
||||
|
||||
// name: string,
|
||||
// description: string,
|
||||
// credit: string,
|
||||
// termsOfUseUrl: string,
|
||||
// iconFile: string
|
||||
// }
|
||||
|
||||
type SlotInfo = {
|
||||
voiceChangerType: VoiceChangerType | null
|
||||
modelFile: string
|
||||
featureFile: string,
|
||||
indexFile: string,
|
||||
@ -233,7 +269,8 @@ export type ServerInfo = VoiceChangerServerSetting & {
|
||||
pyTorchModelFile: string,
|
||||
onnxModelFile: string,
|
||||
onnxExecutionProviders: OnnxExecutionProvider[]
|
||||
modelSlots: ModelSlot[]
|
||||
// modelSlots: ModelSlot[]
|
||||
slotInfos: SlotInfo[]
|
||||
serverAudioInputDevices: ServerAudioDevice[]
|
||||
serverAudioOutputDevices: ServerAudioDevice[]
|
||||
sampleModels: RVCSampleModel[]
|
||||
@ -311,7 +348,8 @@ export const DefaultServerSetting: ServerInfo = {
|
||||
rvcQuality: 0,
|
||||
modelSamplingRate: 48000,
|
||||
silenceFront: 1,
|
||||
modelSlotIndex: 0,
|
||||
// modelSlotIndex: 0,
|
||||
slotIndex: 0,
|
||||
sampleModels: [],
|
||||
gpus: [],
|
||||
|
||||
@ -331,7 +369,8 @@ export const DefaultServerSetting: ServerInfo = {
|
||||
pyTorchModelFile: "",
|
||||
onnxModelFile: "",
|
||||
onnxExecutionProviders: [],
|
||||
modelSlots: [],
|
||||
// modelSlots: [],
|
||||
slotInfos: [],
|
||||
serverAudioInputDevices: [],
|
||||
serverAudioOutputDevices: []
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
import { useState, useMemo, useEffect } from "react"
|
||||
import { VoiceChangerServerSetting, ServerInfo, ServerSettingKey, INDEXEDDB_KEY_SERVER, INDEXEDDB_KEY_MODEL_DATA, ClientType, DefaultServerSetting_MMVCv13, DefaultServerSetting_MMVCv15, DefaultServerSetting_so_vits_svc_40v2, DefaultServerSetting_so_vits_svc_40, DefaultServerSetting_so_vits_svc_40_c, DefaultServerSetting_RVC, OnnxExporterInfo, DefaultServerSetting_DDSP_SVC, MAX_MODEL_SLOT_NUM, Framework, MergeModelRequest } from "../const"
|
||||
import { VoiceChangerServerSetting, ServerInfo, ServerSettingKey, INDEXEDDB_KEY_SERVER, INDEXEDDB_KEY_MODEL_DATA, ClientType, DefaultServerSetting_MMVCv13, DefaultServerSetting_MMVCv15, DefaultServerSetting_so_vits_svc_40v2, DefaultServerSetting_so_vits_svc_40, DefaultServerSetting_so_vits_svc_40_c, DefaultServerSetting_RVC, OnnxExporterInfo, DefaultServerSetting_DDSP_SVC, MAX_MODEL_SLOT_NUM, MergeModelRequest, VoiceChangerType } from "../const"
|
||||
import { VoiceChangerClient } from "../VoiceChangerClient"
|
||||
import { useIndexedDB } from "./useIndexedDB"
|
||||
import { ModelLoadException } from "../exceptions"
|
||||
@ -18,12 +18,7 @@ export type ModelAssetName = typeof ModelAssetName[keyof typeof ModelAssetName]
|
||||
|
||||
|
||||
export type FileUploadSetting = {
|
||||
isHalf: boolean
|
||||
uploaded: boolean
|
||||
defaultTune: number
|
||||
defaultIndexRatio: number
|
||||
defaultProtect: number
|
||||
framework: Framework
|
||||
params: string
|
||||
|
||||
mmvcv13Config: ModelData | null
|
||||
@ -52,12 +47,7 @@ export type FileUploadSetting = {
|
||||
}
|
||||
|
||||
export const InitialFileUploadSetting: FileUploadSetting = {
|
||||
isHalf: true,
|
||||
uploaded: false,
|
||||
defaultTune: 0,
|
||||
defaultIndexRatio: 1,
|
||||
defaultProtect: 0.5,
|
||||
framework: Framework.PyTorch,
|
||||
params: "{}",
|
||||
|
||||
mmvcv13Config: null,
|
||||
@ -400,43 +390,39 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
||||
}
|
||||
|
||||
// const configFileName = fileUploadSetting.configFile?.filename || "-"
|
||||
const files: { kind: string, name: string }[] = []
|
||||
if (fileUploadSetting.mmvcv13Config?.filename) files.push({ kind: "mmvcv13Config", name: fileUploadSetting.mmvcv13Config.filename })
|
||||
if (fileUploadSetting.mmvcv13Model?.filename) files.push({ kind: "mmvcv13Model", name: fileUploadSetting.mmvcv13Model.filename })
|
||||
|
||||
if (fileUploadSetting.mmvcv15Config?.filename) files.push({ kind: "mmvcv15Config", name: fileUploadSetting.mmvcv15Config.filename })
|
||||
if (fileUploadSetting.mmvcv15Model?.filename) files.push({ kind: "mmvcv15Model", name: fileUploadSetting.mmvcv15Model.filename })
|
||||
|
||||
if (fileUploadSetting.soVitsSvc40Config?.filename) files.push({ kind: "soVitsSvc40Config", name: fileUploadSetting.soVitsSvc40Config.filename })
|
||||
if (fileUploadSetting.soVitsSvc40Model?.filename) files.push({ kind: "soVitsSvc40Model", name: fileUploadSetting.soVitsSvc40Model.filename })
|
||||
if (fileUploadSetting.soVitsSvc40Cluster?.filename) files.push({ kind: "soVitsSvc40Cluster", name: fileUploadSetting.soVitsSvc40Cluster.filename })
|
||||
|
||||
if (fileUploadSetting.rvcModel?.filename) files.push({ kind: "rvcModel", name: fileUploadSetting.rvcModel.filename })
|
||||
if (fileUploadSetting.rvcIndex?.filename) files.push({ kind: "rvcIndex", name: fileUploadSetting.rvcIndex.filename })
|
||||
|
||||
if (fileUploadSetting.ddspSvcModel?.filename) files.push({ kind: "ddspSvcModel", name: fileUploadSetting.ddspSvcModel.filename })
|
||||
if (fileUploadSetting.ddspSvcModelConfig?.filename) files.push({ kind: "ddspSvcModelConfig", name: fileUploadSetting.ddspSvcModelConfig.filename })
|
||||
if (fileUploadSetting.ddspSvcDiffusion?.filename) files.push({ kind: "ddspSvcDiffusion", name: fileUploadSetting.ddspSvcDiffusion.filename })
|
||||
if (fileUploadSetting.ddspSvcDiffusionConfig?.filename) files.push({ kind: "ddspSvcDiffusionConfig", name: fileUploadSetting.ddspSvcDiffusionConfig.filename })
|
||||
|
||||
|
||||
const params = JSON.stringify({
|
||||
defaultTune: fileUploadSetting.defaultTune || 0,
|
||||
defaultIndexRatio: fileUploadSetting.defaultIndexRatio || 1,
|
||||
defaultProtect: fileUploadSetting.defaultProtect || 0.5,
|
||||
sampleId: fileUploadSetting.isSampleMode ? fileUploadSetting.sampleId || "" : "",
|
||||
rvcIndexDownload: fileUploadSetting.rvcIndexDownload || false,
|
||||
files: fileUploadSetting.isSampleMode ? {} : {
|
||||
mmvcv13Config: fileUploadSetting.mmvcv13Config?.filename || "",
|
||||
mmvcv13Model: fileUploadSetting.mmvcv13Model?.filename || "",
|
||||
mmvcv15Config: fileUploadSetting.mmvcv15Config?.filename || "",
|
||||
mmvcv15Model: fileUploadSetting.mmvcv15Model?.filename || "",
|
||||
soVitsSvc40Config: fileUploadSetting.soVitsSvc40Config?.filename || "",
|
||||
soVitsSvc40Model: fileUploadSetting.soVitsSvc40Model?.filename || "",
|
||||
soVitsSvc40Cluster: fileUploadSetting.soVitsSvc40Cluster?.filename || "",
|
||||
soVitsSvc40v2Config: fileUploadSetting.soVitsSvc40v2Config?.filename || "",
|
||||
soVitsSvc40v2Model: fileUploadSetting.soVitsSvc40v2Model?.filename || "",
|
||||
soVitsSvc40v2Cluster: fileUploadSetting.soVitsSvc40v2Cluster?.filename || "",
|
||||
rvcModel: fileUploadSetting.rvcModel?.filename || "",
|
||||
rvcIndex: fileUploadSetting.rvcIndex?.filename || "",
|
||||
rvcFeature: fileUploadSetting.rvcFeature?.filename || "",
|
||||
|
||||
ddspSvcModel: fileUploadSetting.ddspSvcModel?.filename ? "ddsp_mod/" + fileUploadSetting.ddspSvcModel?.filename : "",
|
||||
ddspSvcModelConfig: fileUploadSetting.ddspSvcModelConfig?.filename ? "ddsp_mod/" + fileUploadSetting.ddspSvcModelConfig?.filename : "",
|
||||
ddspSvcDiffusion: fileUploadSetting.ddspSvcDiffusion?.filename ? "ddsp_diff/" + fileUploadSetting.ddspSvcDiffusion?.filename : "",
|
||||
ddspSvcDiffusionConfig: fileUploadSetting.ddspSvcDiffusionConfig?.filename ? "ddsp_diff/" + fileUploadSetting.ddspSvcDiffusionConfig.filename : "",
|
||||
}
|
||||
files: fileUploadSetting.isSampleMode ? [] : files
|
||||
})
|
||||
|
||||
if (fileUploadSetting.isHalf == undefined) {
|
||||
fileUploadSetting.isHalf = false
|
||||
}
|
||||
|
||||
console.log("PARAMS:", params)
|
||||
const voiceChangerType = VoiceChangerType.RVC
|
||||
|
||||
const loadPromise = props.voiceChangerClient.loadModel(
|
||||
slot,
|
||||
fileUploadSetting.isHalf,
|
||||
voiceChangerType,
|
||||
params,
|
||||
)
|
||||
|
||||
@ -460,12 +446,7 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
||||
const storeToCache = (slot: number, fileUploadSetting: FileUploadSetting) => {
|
||||
try {
|
||||
const saveData: FileUploadSetting = {
|
||||
isHalf: fileUploadSetting.isHalf, // キャッシュとしては不使用。guiで上書きされる。
|
||||
uploaded: false, // キャッシュから読み込まれるときには、まだuploadされていないから。
|
||||
defaultTune: fileUploadSetting.defaultTune,
|
||||
defaultIndexRatio: fileUploadSetting.defaultIndexRatio,
|
||||
defaultProtect: fileUploadSetting.defaultProtect,
|
||||
framework: fileUploadSetting.framework,
|
||||
params: fileUploadSetting.params,
|
||||
|
||||
mmvcv13Config: fileUploadSetting.mmvcv13Config ? { data: fileUploadSetting.mmvcv13Config.data, filename: fileUploadSetting.mmvcv13Config.filename } : null,
|
||||
|
3
server/.vscode/settings.json
vendored
3
server/.vscode/settings.json
vendored
@ -8,9 +8,10 @@
|
||||
"editor.defaultFormatter": null, // Prettier を使わないようにする
|
||||
"editor.formatOnSave": true // ファイル保存時に自動フォーマット
|
||||
},
|
||||
"python.formatting.blackArgs": ["--line-length", "550"],
|
||||
"flake8.args": [
|
||||
"--ignore=E501,E402,E722,E741,E203,W503"
|
||||
// "--max-line-length=150",
|
||||
// "--max-line-length=150"
|
||||
// "--max-complexity=20"
|
||||
]
|
||||
}
|
||||
|
@ -3,9 +3,7 @@ class NoModeLoadedException(Exception):
|
||||
self.framework = framework
|
||||
|
||||
def __str__(self):
|
||||
return repr(
|
||||
f"No model for {self.framework} loaded. Please confirm the model uploaded."
|
||||
)
|
||||
return repr(f"No model for {self.framework} loaded. Please confirm the model uploaded.")
|
||||
|
||||
|
||||
class HalfPrecisionChangingException(Exception):
|
||||
@ -36,3 +34,17 @@ class DeviceCannotSupportHalfPrecisionException(Exception):
|
||||
class VoiceChangerIsNotSelectedException(Exception):
|
||||
def __str__(self):
|
||||
return repr("Voice Changer is not selected.")
|
||||
|
||||
|
||||
class SlotConfigNotFoundException(Exception):
|
||||
def __init__(self, modelDir, slotIndex):
|
||||
self.modelDir = modelDir
|
||||
self.slotIndex = slotIndex
|
||||
|
||||
def __str__(self):
|
||||
return repr(f"Config for slot {self.slotIndex} is not found. (modelDir:{self.modelDir})")
|
||||
|
||||
|
||||
class WeightDownladException(Exception):
|
||||
def __str__(self):
|
||||
return repr("Failed to download weight.")
|
||||
|
@ -1,4 +1,3 @@
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
import sys
|
||||
|
||||
from distutils.util import strtobool
|
||||
@ -7,24 +6,24 @@ import socket
|
||||
import platform
|
||||
import os
|
||||
import argparse
|
||||
from Downloader import download, download_no_tqdm
|
||||
from voice_changer.RVC.SampleDownloader import (
|
||||
checkRvcModelExist,
|
||||
downloadInitialSampleModels,
|
||||
)
|
||||
|
||||
from Exceptions import WeightDownladException
|
||||
from utils.downloader.SampleDownloader import downloadInitialSamples
|
||||
from utils.downloader.WeightDownloader import downloadWeight
|
||||
|
||||
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
||||
|
||||
import uvicorn
|
||||
from mods.ssl import create_self_signed_cert
|
||||
|
||||
from voice_changer.VoiceChangerManager import VoiceChangerManager
|
||||
|
||||
from sio.MMVC_SocketIOApp import MMVC_SocketIOApp
|
||||
from restapi.MMVC_Rest import MMVC_Rest
|
||||
from const import (
|
||||
NATIVE_CLIENT_FILE_MAC,
|
||||
NATIVE_CLIENT_FILE_WIN,
|
||||
SSL_KEY_DIR,
|
||||
getRVCSampleJsonAndModelIds,
|
||||
)
|
||||
import subprocess
|
||||
import multiprocessing as mp
|
||||
@ -35,56 +34,23 @@ setup_loggers()
|
||||
|
||||
def setupArgParser():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--logLevel",
|
||||
type=str,
|
||||
default="critical",
|
||||
help="Log level info|critical. (default: critical)",
|
||||
)
|
||||
parser.add_argument("--logLevel", type=str, default="critical", help="Log level info|critical. (default: critical)")
|
||||
parser.add_argument("-p", type=int, default=18888, help="port")
|
||||
parser.add_argument("--https", type=strtobool, default=False, help="use https")
|
||||
parser.add_argument(
|
||||
"--httpsKey", type=str, default="ssl.key", help="path for the key of https"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--httpsCert", type=str, default="ssl.cert", help="path for the cert of https"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--httpsSelfSigned",
|
||||
type=strtobool,
|
||||
default=True,
|
||||
help="generate self-signed certificate",
|
||||
)
|
||||
parser.add_argument("--httpsKey", type=str, default="ssl.key", help="path for the key of https")
|
||||
parser.add_argument("--httpsCert", type=str, default="ssl.cert", help="path for the cert of https")
|
||||
parser.add_argument("--httpsSelfSigned", type=strtobool, default=True, help="generate self-signed certificate")
|
||||
|
||||
parser.add_argument("--model_dir", type=str, help="path to model files")
|
||||
parser.add_argument(
|
||||
"--rvc_sample_mode", type=str, default="production", help="rvc_sample_mode"
|
||||
)
|
||||
parser.add_argument("--sample_mode", type=str, default="production", help="sample_mode")
|
||||
|
||||
parser.add_argument(
|
||||
"--content_vec_500", type=str, help="path to content_vec_500 model(pytorch)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--content_vec_500_onnx", type=str, help="path to content_vec_500 model(onnx)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--content_vec_500_onnx_on",
|
||||
type=strtobool,
|
||||
default=False,
|
||||
help="use or not onnx for content_vec_500",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--hubert_base", type=str, help="path to hubert_base model(pytorch)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--hubert_base_jp", type=str, help="path to hubert_base_jp model(pytorch)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--hubert_soft", type=str, help="path to hubert_soft model(pytorch)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--nsf_hifigan", type=str, help="path to nsf_hifigan model(pytorch)"
|
||||
)
|
||||
parser.add_argument("--content_vec_500", type=str, help="path to content_vec_500 model(pytorch)")
|
||||
parser.add_argument("--content_vec_500_onnx", type=str, help="path to content_vec_500 model(onnx)")
|
||||
parser.add_argument("--content_vec_500_onnx_on", type=strtobool, default=False, help="use or not onnx for content_vec_500")
|
||||
parser.add_argument("--hubert_base", type=str, help="path to hubert_base model(pytorch)")
|
||||
parser.add_argument("--hubert_base_jp", type=str, help="path to hubert_base_jp model(pytorch)")
|
||||
parser.add_argument("--hubert_soft", type=str, help="path to hubert_soft model(pytorch)")
|
||||
parser.add_argument("--nsf_hifigan", type=str, help="path to nsf_hifigan model(pytorch)")
|
||||
|
||||
return parser
|
||||
|
||||
@ -111,76 +77,19 @@ def printMessage(message, level=0):
|
||||
print(f"\033[47m {message}\033[0m")
|
||||
|
||||
|
||||
def downloadWeight():
|
||||
# content_vec_500 = (args.content_vec_500,)
|
||||
# content_vec_500_onnx = (args.content_vec_500_onnx,)
|
||||
# content_vec_500_onnx_on = (args.content_vec_500_onnx_on,)
|
||||
hubert_base = args.hubert_base
|
||||
hubert_base_jp = args.hubert_base_jp
|
||||
hubert_soft = args.hubert_soft
|
||||
nsf_hifigan = args.nsf_hifigan
|
||||
|
||||
# file exists check (currently only for rvc)
|
||||
downloadParams = []
|
||||
if os.path.exists(hubert_base) is False:
|
||||
downloadParams.append(
|
||||
{
|
||||
"url": "https://huggingface.co/ddPn08/rvc-webui-models/resolve/main/embeddings/hubert_base.pt",
|
||||
"saveTo": hubert_base,
|
||||
"position": 0,
|
||||
}
|
||||
)
|
||||
if os.path.exists(hubert_base_jp) is False:
|
||||
downloadParams.append(
|
||||
{
|
||||
"url": "https://huggingface.co/rinna/japanese-hubert-base/resolve/main/fairseq/model.pt",
|
||||
"saveTo": hubert_base_jp,
|
||||
"position": 1,
|
||||
}
|
||||
)
|
||||
if os.path.exists(hubert_soft) is False:
|
||||
downloadParams.append(
|
||||
{
|
||||
"url": "https://huggingface.co/wok000/weights/resolve/main/ddsp-svc30/embedder/hubert-soft-0d54a1f4.pt",
|
||||
"saveTo": hubert_soft,
|
||||
"position": 2,
|
||||
}
|
||||
)
|
||||
if os.path.exists(nsf_hifigan) is False:
|
||||
downloadParams.append(
|
||||
{
|
||||
"url": "https://huggingface.co/wok000/weights/resolve/main/ddsp-svc30/nsf_hifigan_20221211/model.bin",
|
||||
"saveTo": nsf_hifigan,
|
||||
"position": 3,
|
||||
}
|
||||
)
|
||||
nsf_hifigan_config = os.path.join(os.path.dirname(nsf_hifigan), "config.json")
|
||||
|
||||
if os.path.exists(nsf_hifigan_config) is False:
|
||||
downloadParams.append(
|
||||
{
|
||||
"url": "https://huggingface.co/wok000/weights/raw/main/ddsp-svc30/nsf_hifigan_20221211/config.json",
|
||||
"saveTo": nsf_hifigan_config,
|
||||
"position": 4,
|
||||
}
|
||||
)
|
||||
|
||||
with ThreadPoolExecutor() as pool:
|
||||
pool.map(download, downloadParams)
|
||||
|
||||
if (
|
||||
os.path.exists(hubert_base) is False
|
||||
or os.path.exists(hubert_base_jp) is False
|
||||
or os.path.exists(hubert_soft) is False
|
||||
or os.path.exists(nsf_hifigan) is False
|
||||
or os.path.exists(nsf_hifigan_config) is False
|
||||
):
|
||||
printMessage("RVC用のモデルファイルのダウンロードに失敗しました。", level=2)
|
||||
printMessage("failed to download weight for rvc", level=2)
|
||||
|
||||
|
||||
parser = setupArgParser()
|
||||
args, unknown = parser.parse_known_args()
|
||||
voiceChangerParams = VoiceChangerParams(
|
||||
model_dir=args.model_dir,
|
||||
content_vec_500=args.content_vec_500,
|
||||
content_vec_500_onnx=args.content_vec_500_onnx,
|
||||
content_vec_500_onnx_on=args.content_vec_500_onnx_on,
|
||||
hubert_base=args.hubert_base,
|
||||
hubert_base_jp=args.hubert_base_jp,
|
||||
hubert_soft=args.hubert_soft,
|
||||
nsf_hifigan=args.nsf_hifigan,
|
||||
sample_mode=args.sample_mode,
|
||||
)
|
||||
|
||||
printMessage(f"Booting PHASE :{__name__}", level=2)
|
||||
|
||||
@ -199,24 +108,6 @@ def localServer(logLevel: str = "critical"):
|
||||
|
||||
if __name__ == "MMVCServerSIO":
|
||||
mp.freeze_support()
|
||||
voiceChangerParams = VoiceChangerParams(
|
||||
model_dir=args.model_dir,
|
||||
content_vec_500=args.content_vec_500,
|
||||
content_vec_500_onnx=args.content_vec_500_onnx,
|
||||
content_vec_500_onnx_on=args.content_vec_500_onnx_on,
|
||||
hubert_base=args.hubert_base,
|
||||
hubert_base_jp=args.hubert_base_jp,
|
||||
hubert_soft=args.hubert_soft,
|
||||
nsf_hifigan=args.nsf_hifigan,
|
||||
rvc_sample_mode=args.rvc_sample_mode,
|
||||
)
|
||||
|
||||
if (
|
||||
os.path.exists(voiceChangerParams.hubert_base) is False
|
||||
or os.path.exists(voiceChangerParams.hubert_base_jp) is False
|
||||
):
|
||||
printMessage("RVC用のモデルファイルのダウンロードに失敗しました。", level=2)
|
||||
printMessage("failed to download weight for rvc", level=2)
|
||||
|
||||
voiceChangerManager = VoiceChangerManager.get_instance(voiceChangerParams)
|
||||
app_fastapi = MMVC_Rest.get_instance(voiceChangerManager, voiceChangerParams)
|
||||
@ -230,20 +121,16 @@ if __name__ == "__main__":
|
||||
mp.freeze_support()
|
||||
|
||||
printMessage("Voice Changerを起動しています。", level=2)
|
||||
|
||||
# ダウンロード
|
||||
downloadWeight()
|
||||
os.makedirs(args.model_dir, exist_ok=True)
|
||||
|
||||
# ダウンロード(Weight)
|
||||
try:
|
||||
sampleJsons = []
|
||||
sampleJsonUrls, sampleModels = getRVCSampleJsonAndModelIds(args.rvc_sample_mode)
|
||||
for url in sampleJsonUrls:
|
||||
filename = os.path.basename(url)
|
||||
download_no_tqdm({"url": url, "saveTo": filename, "position": 0})
|
||||
sampleJsons.append(filename)
|
||||
if checkRvcModelExist(args.model_dir) is False:
|
||||
downloadInitialSampleModels(sampleJsons, sampleModels, args.model_dir)
|
||||
downloadWeight(voiceChangerParams)
|
||||
except WeightDownladException:
|
||||
printMessage("RVC用のモデルファイルのダウンロードに失敗しました。", level=2)
|
||||
printMessage("failed to download weight for rvc", level=2)
|
||||
|
||||
# ダウンロード(Sample)
|
||||
try:
|
||||
downloadInitialSamples(args.sample_mode, args.model_dir)
|
||||
except Exception as e:
|
||||
print("[Voice Changer] loading sample failed", e)
|
||||
|
||||
@ -280,9 +167,7 @@ if __name__ == "__main__":
|
||||
)
|
||||
key_path = os.path.join(SSL_KEY_DIR, keyname)
|
||||
cert_path = os.path.join(SSL_KEY_DIR, certname)
|
||||
printMessage(
|
||||
f"protocol: HTTPS(self-signed), key:{key_path}, cert:{cert_path}", level=1
|
||||
)
|
||||
printMessage(f"protocol: HTTPS(self-signed), key:{key_path}, cert:{cert_path}", level=1)
|
||||
|
||||
elif args.https and args.httpsSelfSigned == 0:
|
||||
# HTTPS
|
||||
@ -336,16 +221,12 @@ if __name__ == "__main__":
|
||||
p.start()
|
||||
try:
|
||||
if sys.platform.startswith("win"):
|
||||
process = subprocess.Popen(
|
||||
[NATIVE_CLIENT_FILE_WIN, "-u", f"http://localhost:{PORT}/"]
|
||||
)
|
||||
process = subprocess.Popen([NATIVE_CLIENT_FILE_WIN, "-u", f"http://localhost:{PORT}/"])
|
||||
return_code = process.wait()
|
||||
print("client closed.")
|
||||
p.terminate()
|
||||
elif sys.platform.startswith("darwin"):
|
||||
process = subprocess.Popen(
|
||||
[NATIVE_CLIENT_FILE_MAC, "-u", f"http://localhost:{PORT}/"]
|
||||
)
|
||||
process = subprocess.Popen([NATIVE_CLIENT_FILE_MAC, "-u", f"http://localhost:{PORT}/"])
|
||||
return_code = process.wait()
|
||||
print("client closed.")
|
||||
p.terminate()
|
||||
|
@ -1,44 +0,0 @@
|
||||
from dataclasses import dataclass, field
|
||||
import json
|
||||
|
||||
from const import ModelType
|
||||
|
||||
|
||||
@dataclass
|
||||
class RVCModelSample:
|
||||
id: str = ""
|
||||
lang: str = ""
|
||||
tag: list[str] = field(default_factory=lambda: [])
|
||||
name: str = ""
|
||||
modelUrl: str = ""
|
||||
indexUrl: str = ""
|
||||
termsOfUseUrl: str = ""
|
||||
icon: str = ""
|
||||
credit: str = ""
|
||||
description: str = ""
|
||||
|
||||
sampleRate: int = 48000
|
||||
modelType: str = ""
|
||||
f0: bool = True
|
||||
|
||||
|
||||
def getModelSamples(jsonFiles: list[str], modelType: ModelType):
|
||||
try:
|
||||
samples: list[RVCModelSample] = []
|
||||
for file in jsonFiles:
|
||||
with open(file, "r", encoding="utf-8") as f:
|
||||
jsonDict = json.load(f)
|
||||
|
||||
modelList = jsonDict[modelType]
|
||||
if modelType == "RVC":
|
||||
for s in modelList:
|
||||
modelSample = RVCModelSample(**s)
|
||||
samples.append(modelSample)
|
||||
|
||||
else:
|
||||
raise RuntimeError(f"Unknown model type {modelType}")
|
||||
return samples
|
||||
|
||||
except Exception as e:
|
||||
print("[Voice Changer] loading sample info error:", e)
|
||||
return None
|
137
server/const.py
137
server/const.py
@ -5,33 +5,21 @@ import tempfile
|
||||
from typing import Literal, TypeAlias
|
||||
|
||||
|
||||
ModelType: TypeAlias = Literal[
|
||||
"MMVCv15",
|
||||
VoiceChangerType: TypeAlias = Literal[
|
||||
"MMVCv13",
|
||||
"so-vits-svc-40v2",
|
||||
"MMVCv15",
|
||||
"so-vits-svc-40",
|
||||
"so-vits-svc-40_c",
|
||||
"DDSP-SVC",
|
||||
"RVC",
|
||||
]
|
||||
|
||||
ERROR_NO_ONNX_SESSION = "ERROR_NO_ONNX_SESSION"
|
||||
|
||||
|
||||
tmpdir = tempfile.TemporaryDirectory()
|
||||
# print("generate tmpdir:::",tmpdir)
|
||||
SSL_KEY_DIR = os.path.join(tmpdir.name, "keys") if hasattr(sys, "_MEIPASS") else "keys"
|
||||
MODEL_DIR = os.path.join(tmpdir.name, "logs") if hasattr(sys, "_MEIPASS") else "logs"
|
||||
UPLOAD_DIR = (
|
||||
os.path.join(tmpdir.name, "upload_dir")
|
||||
if hasattr(sys, "_MEIPASS")
|
||||
else "upload_dir"
|
||||
)
|
||||
NATIVE_CLIENT_FILE_WIN = (
|
||||
os.path.join(sys._MEIPASS, "voice-changer-native-client.exe") # type: ignore
|
||||
if hasattr(sys, "_MEIPASS")
|
||||
else "voice-changer-native-client"
|
||||
)
|
||||
UPLOAD_DIR = os.path.join(tmpdir.name, "upload_dir") if hasattr(sys, "_MEIPASS") else "upload_dir"
|
||||
NATIVE_CLIENT_FILE_WIN = os.path.join(sys._MEIPASS, "voice-changer-native-client.exe") if hasattr(sys, "_MEIPASS") else "voice-changer-native-client" # type: ignore
|
||||
NATIVE_CLIENT_FILE_MAC = (
|
||||
os.path.join(
|
||||
sys._MEIPASS, # type: ignore
|
||||
@ -44,25 +32,12 @@ NATIVE_CLIENT_FILE_MAC = (
|
||||
else "voice-changer-native-client"
|
||||
)
|
||||
|
||||
HUBERT_ONNX_MODEL_PATH = (
|
||||
os.path.join(sys._MEIPASS, "model_hubert/hubert_simple.onnx") # type: ignore
|
||||
if hasattr(sys, "_MEIPASS")
|
||||
else "model_hubert/hubert_simple.onnx"
|
||||
)
|
||||
|
||||
|
||||
TMP_DIR = (
|
||||
os.path.join(tmpdir.name, "tmp_dir") if hasattr(sys, "_MEIPASS") else "tmp_dir"
|
||||
)
|
||||
TMP_DIR = os.path.join(tmpdir.name, "tmp_dir") if hasattr(sys, "_MEIPASS") else "tmp_dir"
|
||||
os.makedirs(TMP_DIR, exist_ok=True)
|
||||
|
||||
|
||||
def getFrontendPath():
|
||||
frontend_path = (
|
||||
os.path.join(sys._MEIPASS, "dist")
|
||||
if hasattr(sys, "_MEIPASS")
|
||||
else "../client/demo/dist"
|
||||
)
|
||||
frontend_path = os.path.join(sys._MEIPASS, "dist") if hasattr(sys, "_MEIPASS") else "../client/demo/dist"
|
||||
return frontend_path
|
||||
|
||||
|
||||
@ -100,84 +75,84 @@ class ServerAudioDeviceTypes(Enum):
|
||||
audiooutput = "audiooutput"
|
||||
|
||||
|
||||
class RVCSampleMode(Enum):
|
||||
production = "production"
|
||||
testOfficial = "testOfficial"
|
||||
testDDPNTorch = "testDDPNTorch"
|
||||
testDDPNONNX = "testDDPNONNX"
|
||||
testONNXFull = "testONNXFull"
|
||||
RVCSampleMode: TypeAlias = Literal[
|
||||
"production",
|
||||
"testOfficial",
|
||||
"testDDPNTorch",
|
||||
"testDDPNONNX",
|
||||
"testONNXFull",
|
||||
]
|
||||
|
||||
|
||||
def getRVCSampleJsonAndModelIds(mode: RVCSampleMode):
|
||||
if mode == RVCSampleMode.production.value:
|
||||
def getSampleJsonAndModelIds(mode: RVCSampleMode):
|
||||
if mode == "production":
|
||||
return [
|
||||
# "https://huggingface.co/wok000/vcclient_model/raw/main/samples_0001.json",
|
||||
# "https://huggingface.co/wok000/vcclient_model/raw/main/samples_0002.json",
|
||||
"https://huggingface.co/wok000/vcclient_model/raw/main/samples_0003_t.json",
|
||||
"https://huggingface.co/wok000/vcclient_model/raw/main/samples_0003_o.json",
|
||||
"https://huggingface.co/wok000/vcclient_model/raw/main/samples_0003_t2.json",
|
||||
"https://huggingface.co/wok000/vcclient_model/raw/main/samples_0003_o2.json",
|
||||
], [
|
||||
("TokinaShigure_o", True),
|
||||
("KikotoMahiro_o", False),
|
||||
("Amitaro_o", False),
|
||||
("Tsukuyomi-chan_o", False),
|
||||
("TokinaShigure_o", {"useIndex": True}),
|
||||
("KikotoMahiro_o", {"useIndex": False}),
|
||||
("Amitaro_o", {"useIndex": False}),
|
||||
("Tsukuyomi-chan_o", {"useIndex": False}),
|
||||
]
|
||||
elif mode == RVCSampleMode.testOfficial.value:
|
||||
elif mode == "testOfficial":
|
||||
return [
|
||||
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_official_v1_v2.json",
|
||||
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_ddpn_v1_v2.json",
|
||||
], [
|
||||
("test-official-v1-f0-48k-l9-hubert_t", True),
|
||||
("test-official-v1-nof0-48k-l9-hubert_t", False),
|
||||
("test-official-v2-f0-40k-l12-hubert_t", False),
|
||||
("test-official-v2-nof0-40k-l12-hubert_t", False),
|
||||
("test-official-v1-f0-48k-l9-hubert_o", True),
|
||||
("test-official-v1-nof0-48k-l9-hubert_o", False),
|
||||
("test-official-v2-f0-40k-l12-hubert_o", False),
|
||||
("test-official-v2-nof0-40k-l12-hubert_o", False),
|
||||
("test-official-v1-f0-48k-l9-hubert_t", {"useIndex": True}),
|
||||
("test-official-v1-nof0-48k-l9-hubert_t", {"useIndex": False}),
|
||||
("test-official-v2-f0-40k-l12-hubert_t", {"useIndex": False}),
|
||||
("test-official-v2-nof0-40k-l12-hubert_t", {"useIndex": False}),
|
||||
("test-official-v1-f0-48k-l9-hubert_o", {"useIndex": True}),
|
||||
("test-official-v1-nof0-48k-l9-hubert_o", {"useIndex": False}),
|
||||
("test-official-v2-f0-40k-l12-hubert_o", {"useIndex": False}),
|
||||
("test-official-v2-nof0-40k-l12-hubert_o", {"useIndex": False}),
|
||||
]
|
||||
elif mode == RVCSampleMode.testDDPNTorch.value:
|
||||
elif mode == "testDDPNTorch":
|
||||
return [
|
||||
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_official_v1_v2.json",
|
||||
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_ddpn_v1_v2.json",
|
||||
], [
|
||||
("test-ddpn-v1-f0-48k-l9-hubert_t", False),
|
||||
("test-ddpn-v1-nof0-48k-l9-hubert_t", False),
|
||||
("test-ddpn-v2-f0-40k-l12-hubert_t", False),
|
||||
("test-ddpn-v2-nof0-40k-l12-hubert_t", False),
|
||||
("test-ddpn-v2-f0-40k-l12-hubert_jp_t", False),
|
||||
("test-ddpn-v2-nof0-40k-l12-hubert_jp_t", False),
|
||||
("test-ddpn-v1-f0-48k-l9-hubert_t", {"useIndex": False}),
|
||||
("test-ddpn-v1-nof0-48k-l9-hubert_t", {"useIndex": False}),
|
||||
("test-ddpn-v2-f0-40k-l12-hubert_t", {"useIndex": False}),
|
||||
("test-ddpn-v2-nof0-40k-l12-hubert_t", {"useIndex": False}),
|
||||
("test-ddpn-v2-f0-40k-l12-hubert_jp_t", {"useIndex": False}),
|
||||
("test-ddpn-v2-nof0-40k-l12-hubert_jp_t", {"useIndex": False}),
|
||||
]
|
||||
elif mode == RVCSampleMode.testDDPNONNX.value:
|
||||
elif mode == "testDDPNONNX":
|
||||
return [
|
||||
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_official_v1_v2.json",
|
||||
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_ddpn_v1_v2.json",
|
||||
], [
|
||||
("test-ddpn-v1-f0-48k-l9-hubert_o", False),
|
||||
("test-ddpn-v1-nof0-48k-l9-hubert_o", False),
|
||||
("test-ddpn-v2-f0-40k-l12-hubert_o", False),
|
||||
("test-ddpn-v2-nof0-40k-l12-hubert_o", False),
|
||||
("test-ddpn-v2-f0-40k-l12-hubert_jp_o", False),
|
||||
("test-ddpn-v2-nof0-40k-l12-hubert_jp_o", False),
|
||||
("test-ddpn-v1-f0-48k-l9-hubert_o", {"useIndex": False}),
|
||||
("test-ddpn-v1-nof0-48k-l9-hubert_o", {"useIndex": False}),
|
||||
("test-ddpn-v2-f0-40k-l12-hubert_o", {"useIndex": False}),
|
||||
("test-ddpn-v2-nof0-40k-l12-hubert_o", {"useIndex": False}),
|
||||
("test-ddpn-v2-f0-40k-l12-hubert_jp_o", {"useIndex": False}),
|
||||
("test-ddpn-v2-nof0-40k-l12-hubert_jp_o", {"useIndex": False}),
|
||||
]
|
||||
elif mode == RVCSampleMode.testONNXFull.value:
|
||||
elif mode == "testONNXFull":
|
||||
return [
|
||||
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_official_v1_v2.json",
|
||||
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_ddpn_v1_v2.json",
|
||||
], [
|
||||
("test-official-v1-f0-48k-l9-hubert_o_full", False),
|
||||
("test-official-v1-nof0-48k-l9-hubert_o_full", False),
|
||||
("test-official-v2-f0-40k-l12-hubert_o_full", False),
|
||||
("test-official-v2-nof0-40k-l12-hubert_o_full", False),
|
||||
("test-ddpn-v1-f0-48k-l9-hubert_o_full", False),
|
||||
("test-ddpn-v1-nof0-48k-l9-hubert_o_full", False),
|
||||
("test-ddpn-v2-f0-40k-l12-hubert_o_full", False),
|
||||
("test-ddpn-v2-nof0-40k-l12-hubert_o_full", False),
|
||||
("test-ddpn-v2-f0-40k-l12-hubert_jp_o_full", False),
|
||||
("test-ddpn-v2-nof0-40k-l12-hubert_jp_o_full", False),
|
||||
("test-official-v1-f0-48k-l9-hubert_o_full", {"useIndex": False}),
|
||||
("test-official-v1-nof0-48k-l9-hubert_o_full", {"useIndex": False}),
|
||||
("test-official-v2-f0-40k-l12-hubert_o_full", {"useIndex": False}),
|
||||
("test-official-v2-nof0-40k-l12-hubert_o_full", {"useIndex": False}),
|
||||
("test-ddpn-v1-f0-48k-l9-hubert_o_full", {"useIndex": False}),
|
||||
("test-ddpn-v1-nof0-48k-l9-hubert_o_full", {"useIndex": False}),
|
||||
("test-ddpn-v2-f0-40k-l12-hubert_o_full", {"useIndex": False}),
|
||||
("test-ddpn-v2-nof0-40k-l12-hubert_o_full", {"useIndex": False}),
|
||||
("test-ddpn-v2-f0-40k-l12-hubert_jp_o_full", {"useIndex": False}),
|
||||
("test-ddpn-v2-nof0-40k-l12-hubert_jp_o_full", {"useIndex": False}),
|
||||
]
|
||||
else:
|
||||
return [], []
|
||||
|
||||
|
||||
RVC_MODEL_DIRNAME = "rvc"
|
||||
RVC_MAX_SLOT_NUM = 10
|
||||
MAX_SLOT_NUM = 10
|
||||
|
37
server/data/ModelSample.py
Normal file
37
server/data/ModelSample.py
Normal file
@ -0,0 +1,37 @@
|
||||
from dataclasses import dataclass, field
|
||||
from typing import TypeAlias, Union, Any
|
||||
from const import VoiceChangerType
|
||||
|
||||
|
||||
@dataclass
|
||||
class ModelSample:
|
||||
id: str = ""
|
||||
voiceChangerType: VoiceChangerType | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class RVCModelSample(ModelSample):
|
||||
voiceChangerType: VoiceChangerType = "RVC"
|
||||
lang: str = ""
|
||||
tag: list[str] = field(default_factory=lambda: [])
|
||||
name: str = ""
|
||||
modelUrl: str = ""
|
||||
indexUrl: str = ""
|
||||
termsOfUseUrl: str = ""
|
||||
icon: str = ""
|
||||
credit: str = ""
|
||||
description: str = ""
|
||||
|
||||
sampleRate: int = 48000
|
||||
modelType: str = ""
|
||||
f0: bool = True
|
||||
|
||||
|
||||
ModelSamples: TypeAlias = Union[ModelSample, RVCModelSample]
|
||||
|
||||
|
||||
def generateModelSample(params: Any) -> ModelSamples:
|
||||
if params["voiceChangerType"] == "RVC":
|
||||
return RVCModelSample(**params)
|
||||
else:
|
||||
return ModelSample(**{k: v for k, v in params.items() if k in ModelSample.__annotations__})
|
59
server/data/ModelSlot.py
Normal file
59
server/data/ModelSlot.py
Normal file
@ -0,0 +1,59 @@
|
||||
from typing import TypeAlias, Union
|
||||
from const import EnumInferenceTypes, EnumEmbedderTypes, VoiceChangerType
|
||||
|
||||
from dataclasses import dataclass, asdict
|
||||
|
||||
import os
|
||||
import json
|
||||
|
||||
|
||||
@dataclass
|
||||
class ModelSlot:
|
||||
voiceChangerType: VoiceChangerType | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class RVCModelSlot(ModelSlot):
|
||||
voiceChangerType: VoiceChangerType = "RVC"
|
||||
modelFile: str = ""
|
||||
indexFile: str = ""
|
||||
defaultTune: int = 0
|
||||
defaultIndexRatio: int = 1
|
||||
defaultProtect: float = 0.5
|
||||
isONNX: bool = False
|
||||
modelType: str = EnumInferenceTypes.pyTorchRVC.value
|
||||
samplingRate: int = -1
|
||||
f0: bool = True
|
||||
embChannels: int = 256
|
||||
embOutputLayer: int = 9
|
||||
useFinalProj: bool = True
|
||||
deprecated: bool = False
|
||||
embedder: str = EnumEmbedderTypes.hubert.value
|
||||
|
||||
name: str = ""
|
||||
description: str = ""
|
||||
credit: str = ""
|
||||
termsOfUseUrl: str = ""
|
||||
sampleId: str = ""
|
||||
iconFile: str = ""
|
||||
|
||||
|
||||
ModelSlots: TypeAlias = Union[ModelSlot, RVCModelSlot]
|
||||
|
||||
|
||||
def loadSlotInfo(model_dir: str, slotIndex: int) -> ModelSlots:
|
||||
slotDir = os.path.join(model_dir, str(slotIndex))
|
||||
jsonFile = os.path.join(slotDir, "params.json")
|
||||
if not os.path.exists(jsonFile):
|
||||
return ModelSlot()
|
||||
jsonDict = json.load(open(os.path.join(slotDir, "params.json")))
|
||||
slotInfo = ModelSlot(**{k: v for k, v in jsonDict.items() if k in ModelSlot.__annotations__})
|
||||
if slotInfo.voiceChangerType == "RVC":
|
||||
return RVCModelSlot(**jsonDict)
|
||||
else:
|
||||
return ModelSlot()
|
||||
|
||||
|
||||
def saveSlotInfo(model_dir: str, slotIndex: int, slotInfo: ModelSlots):
|
||||
slotDir = os.path.join(model_dir, str(slotIndex))
|
||||
json.dump(asdict(slotInfo), open(os.path.join(slotDir, "params.json"), "w"))
|
@ -69,19 +69,14 @@ class MMVC_Rest:
|
||||
StaticFiles(directory=f"{getFrontendPath()}", html=True),
|
||||
name="static",
|
||||
)
|
||||
app_fastapi.mount(
|
||||
"/tmp", StaticFiles(directory=f"{TMP_DIR}"), name="static"
|
||||
)
|
||||
app_fastapi.mount(
|
||||
"/upload_dir", StaticFiles(directory=f"{UPLOAD_DIR}"), name="static"
|
||||
)
|
||||
app_fastapi.mount("/tmp", StaticFiles(directory=f"{TMP_DIR}"), name="static")
|
||||
app_fastapi.mount("/upload_dir", StaticFiles(directory=f"{UPLOAD_DIR}"), name="static")
|
||||
|
||||
if sys.platform.startswith("darwin"):
|
||||
p1 = os.path.dirname(sys._MEIPASS)
|
||||
p2 = os.path.dirname(p1)
|
||||
p3 = os.path.dirname(p2)
|
||||
model_dir = os.path.join(p3, voiceChangerParams.model_dir)
|
||||
print("mac model_dir:", model_dir)
|
||||
app_fastapi.mount(
|
||||
f"/{voiceChangerParams.model_dir}",
|
||||
StaticFiles(directory=model_dir),
|
||||
|
@ -1,6 +1,4 @@
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
from typing import Union
|
||||
from fastapi import APIRouter
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
@ -10,8 +8,7 @@ from fastapi import UploadFile, File, Form
|
||||
from restapi.mods.FileUploader import upload_file, concat_file_chunks
|
||||
from voice_changer.VoiceChangerManager import VoiceChangerManager
|
||||
|
||||
from const import MODEL_DIR, UPLOAD_DIR, ModelType
|
||||
from voice_changer.utils.LoadModelParams import LoadModelParams
|
||||
from const import MODEL_DIR, UPLOAD_DIR, VoiceChangerType
|
||||
|
||||
|
||||
os.makedirs(UPLOAD_DIR, exist_ok=True)
|
||||
@ -24,123 +21,130 @@ class MMVC_Rest_Fileuploader:
|
||||
self.router = APIRouter()
|
||||
self.router.add_api_route("/info", self.get_info, methods=["GET"])
|
||||
self.router.add_api_route("/performance", self.get_performance, methods=["GET"])
|
||||
self.router.add_api_route(
|
||||
"/upload_file", self.post_upload_file, methods=["POST"]
|
||||
)
|
||||
self.router.add_api_route(
|
||||
"/concat_uploaded_file", self.post_concat_uploaded_file, methods=["POST"]
|
||||
)
|
||||
self.router.add_api_route(
|
||||
"/update_settings", self.post_update_settings, methods=["POST"]
|
||||
)
|
||||
self.router.add_api_route("/upload_file", self.post_upload_file, methods=["POST"])
|
||||
self.router.add_api_route("/concat_uploaded_file", self.post_concat_uploaded_file, methods=["POST"])
|
||||
self.router.add_api_route("/update_settings", self.post_update_settings, methods=["POST"])
|
||||
self.router.add_api_route("/load_model", self.post_load_model, methods=["POST"])
|
||||
self.router.add_api_route("/model_type", self.post_model_type, methods=["POST"])
|
||||
self.router.add_api_route("/model_type", self.get_model_type, methods=["GET"])
|
||||
# self.router.add_api_route("/model_type", self.get_model_type, methods=["GET"])
|
||||
self.router.add_api_route("/onnx", self.get_onnx, methods=["GET"])
|
||||
self.router.add_api_route(
|
||||
"/merge_model", self.post_merge_models, methods=["POST"]
|
||||
)
|
||||
self.router.add_api_route(
|
||||
"/update_model_default", self.post_update_model_default, methods=["POST"]
|
||||
)
|
||||
self.router.add_api_route(
|
||||
"/update_model_info", self.post_update_model_info, methods=["POST"]
|
||||
)
|
||||
self.router.add_api_route(
|
||||
"/upload_model_assets", self.post_upload_model_assets, methods=["POST"]
|
||||
)
|
||||
self.router.add_api_route("/merge_model", self.post_merge_models, methods=["POST"])
|
||||
self.router.add_api_route("/update_model_default", self.post_update_model_default, methods=["POST"])
|
||||
self.router.add_api_route("/update_model_info", self.post_update_model_info, methods=["POST"])
|
||||
self.router.add_api_route("/upload_model_assets", self.post_upload_model_assets, methods=["POST"])
|
||||
|
||||
def post_upload_file(self, file: UploadFile = File(...), filename: str = Form(...)):
|
||||
try:
|
||||
res = upload_file(UPLOAD_DIR, file, filename)
|
||||
json_compatible_item_data = jsonable_encoder(res)
|
||||
return JSONResponse(content=json_compatible_item_data)
|
||||
except Exception as e:
|
||||
print("[Voice Changer] ", e)
|
||||
|
||||
def post_concat_uploaded_file(
|
||||
self, filename: str = Form(...), filenameChunkNum: int = Form(...)
|
||||
):
|
||||
def post_concat_uploaded_file(self, filename: str = Form(...), filenameChunkNum: int = Form(...)):
|
||||
try:
|
||||
res = concat_file_chunks(UPLOAD_DIR, filename, filenameChunkNum, UPLOAD_DIR)
|
||||
json_compatible_item_data = jsonable_encoder(res)
|
||||
return JSONResponse(content=json_compatible_item_data)
|
||||
except Exception as e:
|
||||
print("[Voice Changer] ", e)
|
||||
|
||||
def get_info(self):
|
||||
try:
|
||||
info = self.voiceChangerManager.get_info()
|
||||
json_compatible_item_data = jsonable_encoder(info)
|
||||
return JSONResponse(content=json_compatible_item_data)
|
||||
except Exception as e:
|
||||
print("[Voice Changer] ", e)
|
||||
|
||||
def get_performance(self):
|
||||
try:
|
||||
info = self.voiceChangerManager.get_performance()
|
||||
json_compatible_item_data = jsonable_encoder(info)
|
||||
return JSONResponse(content=json_compatible_item_data)
|
||||
except Exception as e:
|
||||
print("[Voice Changer] ", e)
|
||||
|
||||
def post_update_settings(
|
||||
self, key: str = Form(...), val: Union[int, str, float] = Form(...)
|
||||
):
|
||||
def post_update_settings(self, key: str = Form(...), val: Union[int, str, float] = Form(...)):
|
||||
try:
|
||||
print("[Voice Changer] update configuration:", key, val)
|
||||
info = self.voiceChangerManager.update_settings(key, val)
|
||||
json_compatible_item_data = jsonable_encoder(info)
|
||||
return JSONResponse(content=json_compatible_item_data)
|
||||
except Exception as e:
|
||||
print("[Voice Changer] ", e)
|
||||
|
||||
def post_load_model(
|
||||
self,
|
||||
slot: int = Form(...),
|
||||
isHalf: bool = Form(...),
|
||||
voiceChangerType: str = Form(...),
|
||||
params: str = Form(...),
|
||||
):
|
||||
paramDict = json.loads(params)
|
||||
# print("paramDict", paramDict)
|
||||
|
||||
# Change Filepath
|
||||
newFilesDict = {}
|
||||
for key, val in paramDict["files"].items():
|
||||
if val != "-" and val != "":
|
||||
uploadPath = os.path.join(UPLOAD_DIR, val)
|
||||
storePath = os.path.join(UPLOAD_DIR, f"{slot}", val)
|
||||
storeDir = os.path.dirname(storePath)
|
||||
os.makedirs(storeDir, exist_ok=True)
|
||||
shutil.move(uploadPath, storePath)
|
||||
newFilesDict[key] = storePath
|
||||
paramDict["files"] = newFilesDict
|
||||
|
||||
props: LoadModelParams = LoadModelParams(
|
||||
slot=slot, isHalf=isHalf, params=paramDict
|
||||
)
|
||||
|
||||
info = self.voiceChangerManager.loadModel(props)
|
||||
try:
|
||||
info = self.voiceChangerManager.loadModel(slot, voiceChangerType, params)
|
||||
json_compatible_item_data = jsonable_encoder(info)
|
||||
return JSONResponse(content=json_compatible_item_data)
|
||||
except Exception as e:
|
||||
print("[Voice Changer] ", e)
|
||||
|
||||
def post_model_type(self, modelType: ModelType = Form(...)):
|
||||
info = self.voiceChangerManager.switchModelType(modelType)
|
||||
json_compatible_item_data = jsonable_encoder(info)
|
||||
def post_model_type(self, modelType: VoiceChangerType = Form(...)):
|
||||
try:
|
||||
# info = self.voiceChangerManager.switchModelType(modelType)
|
||||
# json_compatible_item_data = jsonable_encoder(info)
|
||||
json_compatible_item_data = jsonable_encoder({"status": "ok"})
|
||||
return JSONResponse(content=json_compatible_item_data)
|
||||
except Exception as e:
|
||||
print("[Voice Changer] ", e)
|
||||
|
||||
def get_model_type(self):
|
||||
info = self.voiceChangerManager.getModelType()
|
||||
json_compatible_item_data = jsonable_encoder(info)
|
||||
return JSONResponse(content=json_compatible_item_data)
|
||||
# def get_model_type(self):
|
||||
# try:
|
||||
# # info = self.voiceChangerManager.getModelType()
|
||||
# # json_compatible_item_data = jsonable_encoder(info)
|
||||
# print(
|
||||
# "-------------- get_model_type",
|
||||
# )
|
||||
# json_compatible_item_data = jsonable_encoder({"status": "ok"})
|
||||
# return JSONResponse(content=json_compatible_item_data)
|
||||
# except Exception as e:
|
||||
# print("[Voice Changer] ", e)
|
||||
|
||||
def get_onnx(self):
|
||||
try:
|
||||
info = self.voiceChangerManager.export2onnx()
|
||||
json_compatible_item_data = jsonable_encoder(info)
|
||||
return JSONResponse(content=json_compatible_item_data)
|
||||
except Exception as e:
|
||||
print("[Voice Changer] ", e)
|
||||
|
||||
def post_merge_models(self, request: str = Form(...)):
|
||||
try:
|
||||
print(request)
|
||||
info = self.voiceChangerManager.merge_models(request)
|
||||
json_compatible_item_data = jsonable_encoder(info)
|
||||
return JSONResponse(content=json_compatible_item_data)
|
||||
except Exception as e:
|
||||
print("[Voice Changer] ", e)
|
||||
|
||||
def post_update_model_default(self):
|
||||
try:
|
||||
info = self.voiceChangerManager.update_model_default()
|
||||
json_compatible_item_data = jsonable_encoder(info)
|
||||
return JSONResponse(content=json_compatible_item_data)
|
||||
except Exception as e:
|
||||
print("[Voice Changer] ", e)
|
||||
|
||||
def post_update_model_info(self, newData: str = Form(...)):
|
||||
try:
|
||||
info = self.voiceChangerManager.update_model_info(newData)
|
||||
json_compatible_item_data = jsonable_encoder(info)
|
||||
return JSONResponse(content=json_compatible_item_data)
|
||||
except Exception as e:
|
||||
print("[Voice Changer] ", e)
|
||||
|
||||
def post_upload_model_assets(self, params: str = Form(...)):
|
||||
try:
|
||||
info = self.voiceChangerManager.upload_model_assets(params)
|
||||
json_compatible_item_data = jsonable_encoder(info)
|
||||
return JSONResponse(content=json_compatible_item_data)
|
||||
except Exception as e:
|
||||
print("[Voice Changer] ", e)
|
||||
|
@ -1,14 +0,0 @@
|
||||
|
||||
from fastapi.responses import FileResponse
|
||||
import os
|
||||
|
||||
|
||||
def mod_get_model(modelFile: str):
|
||||
modelPath = os.path.join("MMVC_Trainer/logs", modelFile)
|
||||
return FileResponse(path=modelPath)
|
||||
|
||||
|
||||
def mod_delete_model(modelFile: str):
|
||||
modelPath = os.path.join("MMVC_Trainer/logs", modelFile)
|
||||
os.remove(modelPath)
|
||||
return {"Model deleted": f"{modelFile}"}
|
@ -1,23 +0,0 @@
|
||||
|
||||
from fastapi.responses import JSONResponse
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
from restapi.utils.files import get_file_list
|
||||
import os
|
||||
|
||||
def mod_get_models():
|
||||
gModels = get_file_list(f'MMVC_Trainer/logs/G*.pth')
|
||||
dModels = get_file_list(f'MMVC_Trainer/logs/D*.pth')
|
||||
configs = get_file_list(f'MMVC_Trainer/logs/config.json')
|
||||
models = []
|
||||
models.extend(gModels)
|
||||
models.extend(dModels)
|
||||
models.extend(configs)
|
||||
models = [ os.path.basename(x) for x in models]
|
||||
|
||||
models = sorted(models)
|
||||
data = {
|
||||
"models":models
|
||||
}
|
||||
json_compatible_item_data = jsonable_encoder(data)
|
||||
return JSONResponse(content=json_compatible_item_data)
|
||||
|
@ -1,26 +0,0 @@
|
||||
from fastapi.responses import JSONResponse
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
import os
|
||||
|
||||
MULTI_SPEAKER_SETTING_PATH = "MMVC_Trainer/dataset/multi_speaker_correspondence.txt"
|
||||
def mod_get_multi_speaker_setting():
|
||||
data = {}
|
||||
if os.path.isfile(MULTI_SPEAKER_SETTING_PATH) == False:
|
||||
with open(MULTI_SPEAKER_SETTING_PATH, "w") as f:
|
||||
f.write("")
|
||||
f.flush()
|
||||
f.close()
|
||||
|
||||
with open(MULTI_SPEAKER_SETTING_PATH, "r") as f:
|
||||
setting = f.read()
|
||||
data["multi_speaker_setting"] = setting
|
||||
json_compatible_item_data = jsonable_encoder(data)
|
||||
return JSONResponse(content=json_compatible_item_data)
|
||||
|
||||
|
||||
def mod_post_multi_speaker_setting(setting:str):
|
||||
with open(MULTI_SPEAKER_SETTING_PATH, "w") as f:
|
||||
f.write(setting)
|
||||
f.flush()
|
||||
f.close()
|
||||
return {"Write Multispeaker setting": f"{setting}"}
|
@ -1,15 +0,0 @@
|
||||
import shutil
|
||||
from restapi.mods.Trainer_MultiSpeakerSetting import MULTI_SPEAKER_SETTING_PATH
|
||||
|
||||
def mod_delete_speaker(speaker:str):
|
||||
shutil.rmtree(f"MMVC_Trainer/dataset/textful/{speaker}")
|
||||
|
||||
with open(MULTI_SPEAKER_SETTING_PATH, "r") as f:
|
||||
setting = f.readlines()
|
||||
|
||||
filtered = filter(lambda x: x.startswith(f"{speaker}|")==False, setting)
|
||||
with open(MULTI_SPEAKER_SETTING_PATH, "w") as f:
|
||||
f.writelines(list(filtered))
|
||||
f.flush()
|
||||
f.close()
|
||||
return {"Speaker deleted": f"{speaker}"}
|
@ -1,28 +0,0 @@
|
||||
from fastapi.responses import JSONResponse
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
import os, base64
|
||||
|
||||
def mod_get_speaker_voice(speaker:str, voice:str):
|
||||
wav_file = f'MMVC_Trainer/dataset/textful/{speaker}/wav/{voice}.wav'
|
||||
text_file = f'MMVC_Trainer/dataset/textful/{speaker}/text/{voice}.txt'
|
||||
readable_text_file = f'MMVC_Trainer/dataset/textful/{speaker}/readable_text/{voice}.txt'
|
||||
|
||||
data = {}
|
||||
if os.path.exists(wav_file):
|
||||
with open(wav_file, "rb") as f:
|
||||
wav_data = f.read()
|
||||
wav_data_base64 = base64.b64encode(wav_data).decode('utf-8')
|
||||
data["wav"] = wav_data_base64
|
||||
|
||||
|
||||
if os.path.exists(text_file):
|
||||
with open(text_file, "r") as f:
|
||||
text_data = f.read()
|
||||
data["text"] = text_data
|
||||
|
||||
if os.path.exists(readable_text_file):
|
||||
with open(readable_text_file, "r") as f:
|
||||
text_data = f.read()
|
||||
data["readable_text"] = text_data
|
||||
json_compatible_item_data = jsonable_encoder(data)
|
||||
return JSONResponse(content=json_compatible_item_data)
|
@ -1,22 +0,0 @@
|
||||
from fastapi.responses import JSONResponse
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
from restapi.utils.files import get_file_list
|
||||
import os
|
||||
|
||||
def mod_get_speaker_voices(speaker:str):
|
||||
voices = get_file_list(f'MMVC_Trainer/dataset/textful/{speaker}/wav/*.wav')
|
||||
|
||||
texts = get_file_list(f'MMVC_Trainer/dataset/textful/{speaker}/text/*.txt')
|
||||
|
||||
readable_texts = get_file_list(f'MMVC_Trainer/dataset/textful/{speaker}/readable_text/*.txt')
|
||||
|
||||
items = voices
|
||||
items.extend(texts)
|
||||
items.extend(readable_texts)
|
||||
items = [ os.path.splitext(os.path.basename(x))[0] for x in items]
|
||||
items = sorted(set(items))
|
||||
data = {
|
||||
"voices":items
|
||||
}
|
||||
json_compatible_item_data = jsonable_encoder(data)
|
||||
return JSONResponse(content=json_compatible_item_data)
|
@ -1,15 +0,0 @@
|
||||
from fastapi.responses import JSONResponse
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
from restapi.utils.files import get_dir_list
|
||||
import os
|
||||
# CreateはFileUploaderで実装。
|
||||
|
||||
def mod_get_speakers():
|
||||
os.makedirs("MMVC_Trainer/dataset/textful", exist_ok=True)
|
||||
speakers = get_dir_list("MMVC_Trainer/dataset/textful/")
|
||||
|
||||
data = {
|
||||
"speakers":sorted(speakers)
|
||||
}
|
||||
json_compatible_item_data = jsonable_encoder(data)
|
||||
return JSONResponse(content=json_compatible_item_data)
|
@ -1,176 +0,0 @@
|
||||
import subprocess,os
|
||||
from restapi.utils.files import get_file_list
|
||||
from fastapi.responses import JSONResponse
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
|
||||
LOG_DIR = "info"
|
||||
train_proc = None
|
||||
|
||||
SUCCESS = 0
|
||||
ERROR = -1
|
||||
### Submodule for Pre train
|
||||
def sync_exec(cmd:str, log_path:str, cwd=None):
|
||||
shortCmdStr = cmd[:20]
|
||||
try:
|
||||
with open(log_path, 'w') as log_file:
|
||||
if cwd == None:
|
||||
proc = subprocess.run(cmd, shell=True, text=True, stdout=log_file, stderr=log_file)
|
||||
else:
|
||||
proc = subprocess.run(cmd, shell=True, text=True, stdout=log_file, stderr=log_file, cwd=cwd)
|
||||
print(f"{shortCmdStr} returncode:{proc.returncode}")
|
||||
if proc.returncode != 0:
|
||||
print(f"{shortCmdStr} exception:")
|
||||
return (ERROR, f"returncode:{proc.returncode}")
|
||||
except Exception as e:
|
||||
print(f"{shortCmdStr} exception:", str(e))
|
||||
return (ERROR, str(e))
|
||||
return (SUCCESS, "success")
|
||||
|
||||
def sync_exec_with_stdout(cmd:str, log_path:str):
|
||||
shortCmdStr = cmd[:20]
|
||||
try:
|
||||
with open(log_path, 'w') as log_file:
|
||||
proc = subprocess.run(cmd, shell=True, text=True, stdout=subprocess.PIPE,
|
||||
stderr=log_file, cwd="MMVC_Trainer")
|
||||
print(f"STDOUT{shortCmdStr}",proc.stdout)
|
||||
except Exception as e:
|
||||
print(f"{shortCmdStr} exception:", str(e))
|
||||
return (ERROR, str(e))
|
||||
return (SUCCESS, proc.stdout)
|
||||
|
||||
|
||||
def create_dataset():
|
||||
cmd = "python3 create_dataset_jtalk.py -f train_config -s 24000 -m dataset/multi_speaker_correspondence.txt"
|
||||
log_file = os.path.join(LOG_DIR, "log_create_dataset_jtalk.txt")
|
||||
res = sync_exec(cmd, log_file, "MMVC_Trainer")
|
||||
return res
|
||||
|
||||
def set_batch_size(batch:int):
|
||||
cmd = "sed -i 's/\"batch_size\": [0-9]*/\"batch_size\": " + str(batch) + "/' MMVC_Trainer/configs/baseconfig.json"
|
||||
log_file = os.path.join(LOG_DIR, "log_set_batch_size.txt")
|
||||
res = sync_exec(cmd, log_file)
|
||||
return res
|
||||
|
||||
def set_dummy_device_count():
|
||||
cmd = 'sed -ie "s/torch.cuda.device_count()/1/" MMVC_Trainer/train_ms.py'
|
||||
log_file = os.path.join(LOG_DIR, "log_set_dummy_device_count.txt")
|
||||
res = sync_exec(cmd, log_file)
|
||||
return res
|
||||
|
||||
### Submodule for Train
|
||||
def exec_training(enable_finetuning:bool, GModel:str, DModel:str):
|
||||
global train_proc
|
||||
log_file = os.path.join(LOG_DIR, "training.txt")
|
||||
|
||||
# トレーニング開始確認(二重起動回避)
|
||||
if train_proc != None:
|
||||
status = train_proc.poll()
|
||||
if status != None:
|
||||
print("Training have ended.", status)
|
||||
train_proc = None
|
||||
else:
|
||||
print("Training have stated.")
|
||||
return (ERROR, "Training have started")
|
||||
|
||||
try:
|
||||
with open(log_file, 'w') as log_file:
|
||||
if enable_finetuning == True:
|
||||
GModelPath = os.path.join("logs", GModel) # 実行時にcwdを指定しているのでフォルダはlogsでよい。
|
||||
DModelPath = os.path.join("logs", DModel)
|
||||
cmd = f'python3 train_ms.py -c configs/train_config.json -m ./ -fg {GModelPath} -fd {DModelPath}'
|
||||
else:
|
||||
cmd = 'python3 train_ms.py -c configs/train_config.json -m ./'
|
||||
print("exec:",cmd)
|
||||
train_proc = subprocess.Popen("exec "+cmd, shell=True, text=True, stdout=log_file, stderr=log_file, cwd="MMVC_Trainer")
|
||||
print("Training stated")
|
||||
print(f"returncode:{train_proc.returncode}")
|
||||
except Exception as e:
|
||||
print("start training exception:", str(e))
|
||||
return (ERROR, str(e))
|
||||
|
||||
return (SUCCESS, "success")
|
||||
|
||||
def stop_training():
|
||||
global train_proc
|
||||
if train_proc == None:
|
||||
print("Training have not stated.")
|
||||
return (ERROR, "Training have not stated.")
|
||||
|
||||
status = train_proc.poll()
|
||||
if status != None:
|
||||
print("Training have already ended.", status)
|
||||
train_proc = None
|
||||
return (ERROR, "Training have already ended. " + status)
|
||||
else:
|
||||
train_proc.kill()
|
||||
print("Training have stoped.")
|
||||
return (SUCCESS, "success")
|
||||
|
||||
### Main
|
||||
def mod_post_pre_training(batch:int):
|
||||
res = set_batch_size(batch)
|
||||
if res[0] == ERROR:
|
||||
return {"result":"failed", "detail": f"Preprocess(set_batch_size) failed. {res[1]}"}
|
||||
|
||||
res = set_dummy_device_count()
|
||||
if res[0] == ERROR:
|
||||
return {"result":"failed", "detail": f"Preprocess(set_dummy_device_count) failed. {res[1]}"}
|
||||
|
||||
res = create_dataset()
|
||||
if res[0] == ERROR:
|
||||
return {"result":"failed", "detail": f"Preprocess failed(create_dataset). {res[1]}"}
|
||||
|
||||
return {"result":"success", "detail": f"Preprocess succeeded. {res[1]}"}
|
||||
|
||||
|
||||
def mod_post_start_training(enable_finetuning:str, GModel:str, DModel:str):
|
||||
print("START_TRAINING:::::::", enable_finetuning, GModel, DModel)
|
||||
res = exec_training(enable_finetuning, GModel, DModel)
|
||||
if res[0] == ERROR:
|
||||
return {"result":"failed", "detail": f"Start training failed. {res[1]}"}
|
||||
|
||||
return {"result":"success", "detail": f"Start training succeeded. {res[1]}"}
|
||||
|
||||
def mod_post_stop_training():
|
||||
res = stop_training()
|
||||
if res[0] == ERROR:
|
||||
return {"result":"failed", "detail": f"Stop training failed. {res[1]}"}
|
||||
|
||||
return {"result":"success", "detail": f"Stop training succeeded. {res[1]}"}
|
||||
|
||||
### DEBUG
|
||||
def mod_get_related_files():
|
||||
files = get_file_list(os.path.join(LOG_DIR,"*"))
|
||||
files.extend([
|
||||
"MMVC_Trainer/dataset/multi_speaker_correspondence.txt",
|
||||
"MMVC_Trainer/train_ms.py",
|
||||
])
|
||||
files.extend(
|
||||
get_file_list("MMVC_Trainer/configs/*")
|
||||
)
|
||||
|
||||
res = []
|
||||
for f in files:
|
||||
size = os.path.getsize(f)
|
||||
data = ""
|
||||
if size < 1024*1024:
|
||||
with open(f, "r") as input:
|
||||
data = input.read()
|
||||
|
||||
res.append({
|
||||
"name":f,
|
||||
"size":size,
|
||||
"data":data
|
||||
})
|
||||
|
||||
json_compatible_item_data = jsonable_encoder(res)
|
||||
return JSONResponse(content=json_compatible_item_data)
|
||||
|
||||
def mod_get_tail_training_log(num:int):
|
||||
training_log_file = os.path.join(LOG_DIR, "training.txt")
|
||||
res = sync_exec(f"cat {training_log_file} | sed -e 's/.*\r//' > /tmp/out","/dev/null")
|
||||
cmd = f'tail -n {num} /tmp/out'
|
||||
res = sync_exec_with_stdout(cmd, "/dev/null")
|
||||
if res[0] == ERROR:
|
||||
return {"result":"failed", "detail": f"Tail training log failed. {res[1]}"}
|
||||
return {"result":"success", "detail":res[1]}
|
@ -1,26 +0,0 @@
|
||||
import os
|
||||
import glob
|
||||
|
||||
|
||||
# def get_file_list(top_dir):
|
||||
# for root, dirs, files in os.walk(top_dir):
|
||||
# for dir in dirs:
|
||||
# dirPath = os.path.join(root, dir)
|
||||
# print(f'dirPath = {dirPath}')
|
||||
|
||||
# for file in files:
|
||||
# filePath = os.path.join(root, file)
|
||||
# print(f'filePath = {filePath}')
|
||||
|
||||
|
||||
def get_dir_list(top_dir):
|
||||
dirlist = []
|
||||
files = os.listdir(top_dir)
|
||||
for filename in files:
|
||||
if os.path.isdir(os.path.join(top_dir, filename)):
|
||||
dirlist.append(filename)
|
||||
return dirlist
|
||||
|
||||
|
||||
def get_file_list(top_dir):
|
||||
return glob.glob(top_dir)
|
165
server/utils/downloader/SampleDownloader.py
Normal file
165
server/utils/downloader/SampleDownloader.py
Normal file
@ -0,0 +1,165 @@
|
||||
import json
|
||||
import os
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from typing import Any, Tuple
|
||||
|
||||
from const import RVCSampleMode, getSampleJsonAndModelIds
|
||||
from data.ModelSample import ModelSamples, generateModelSample
|
||||
from data.ModelSlot import RVCModelSlot, loadSlotInfo, saveSlotInfo
|
||||
from voice_changer.RVC.ModelSlotGenerator import _setInfoByONNX, _setInfoByPytorch
|
||||
from utils.downloader.Downloader import download, download_no_tqdm
|
||||
|
||||
|
||||
def downloadInitialSamples(mode: RVCSampleMode, model_dir: str):
|
||||
sampleJsonUrls, sampleModels = getSampleJsonAndModelIds(mode)
|
||||
sampleJsons = _downloadSampleJsons(sampleJsonUrls)
|
||||
if os.path.exists(model_dir):
|
||||
print("[Voice Changer] model_dir is already exists. skil download samples.")
|
||||
return
|
||||
samples = _generateSampleList(sampleJsons)
|
||||
slotIndex = list(range(len(sampleModels)))
|
||||
_downloadSamples(samples, sampleModels, model_dir, slotIndex)
|
||||
|
||||
pass
|
||||
|
||||
|
||||
def downloadSample(mode: RVCSampleMode, modelId: str, model_dir: str, slotIndex: int, params: Any):
|
||||
sampleJsonUrls, _sampleModels = getSampleJsonAndModelIds(mode)
|
||||
sampleJsons = _generateSampleJsons(sampleJsonUrls)
|
||||
samples = _generateSampleList(sampleJsons)
|
||||
_downloadSamples(samples, [(modelId, params)], model_dir, [slotIndex])
|
||||
pass
|
||||
|
||||
|
||||
def getSampleInfos(mode: RVCSampleMode):
|
||||
sampleJsonUrls, _sampleModels = getSampleJsonAndModelIds(mode)
|
||||
sampleJsons = _generateSampleJsons(sampleJsonUrls)
|
||||
samples = _generateSampleList(sampleJsons)
|
||||
return samples
|
||||
|
||||
|
||||
def _downloadSampleJsons(sampleJsonUrls: list[str]):
|
||||
sampleJsons = []
|
||||
for url in sampleJsonUrls:
|
||||
filename = os.path.basename(url)
|
||||
download_no_tqdm({"url": url, "saveTo": filename, "position": 0})
|
||||
sampleJsons.append(filename)
|
||||
return sampleJsons
|
||||
|
||||
|
||||
def _generateSampleJsons(sampleJsonUrls: list[str]):
|
||||
sampleJsons = []
|
||||
for url in sampleJsonUrls:
|
||||
filename = os.path.basename(url)
|
||||
sampleJsons.append(filename)
|
||||
return sampleJsons
|
||||
|
||||
|
||||
def _generateSampleList(sampleJsons: list[str]):
|
||||
samples: list[ModelSamples] = []
|
||||
for file in sampleJsons:
|
||||
with open(file, "r", encoding="utf-8") as f:
|
||||
jsonDict = json.load(f)
|
||||
for vcType in jsonDict:
|
||||
for sampleParams in jsonDict[vcType]:
|
||||
sample = generateModelSample(sampleParams)
|
||||
samples.append(sample)
|
||||
return samples
|
||||
|
||||
|
||||
def _downloadSamples(samples: list[ModelSamples], sampleModelIds: list[Tuple[str, Any]], model_dir: str, slotIndex: list[int]):
|
||||
downloadParams = []
|
||||
line_num = 0
|
||||
|
||||
for i, initSampleId in enumerate(sampleModelIds):
|
||||
targetSampleId = initSampleId[0]
|
||||
targetSampleParams = initSampleId[1]
|
||||
tagetSlotIndex = slotIndex[i]
|
||||
|
||||
# 初期サンプルをサーチ
|
||||
match = False
|
||||
for sample in samples:
|
||||
print("sample", sample)
|
||||
if sample.id == targetSampleId:
|
||||
match = True
|
||||
break
|
||||
if match is False:
|
||||
print(f"[Voice Changer] initiail sample not found. {targetSampleId}")
|
||||
continue
|
||||
|
||||
# 検出されたら、、、
|
||||
slotDir = os.path.join(model_dir, str(tagetSlotIndex))
|
||||
if sample.voiceChangerType == "RVC":
|
||||
slotInfo: RVCModelSlot = RVCModelSlot()
|
||||
|
||||
os.makedirs(slotDir, exist_ok=True)
|
||||
modelFilePath = os.path.join(
|
||||
slotDir,
|
||||
os.path.basename(sample.modelUrl),
|
||||
)
|
||||
downloadParams.append(
|
||||
{
|
||||
"url": sample.modelUrl,
|
||||
"saveTo": modelFilePath,
|
||||
"position": line_num,
|
||||
}
|
||||
)
|
||||
slotInfo.modelFile = modelFilePath
|
||||
line_num += 1
|
||||
|
||||
if targetSampleParams["useIndex"] is True and hasattr(sample, "indexUrl") and sample.indexUrl != "":
|
||||
indexPath = os.path.join(
|
||||
slotDir,
|
||||
os.path.basename(sample.indexUrl),
|
||||
)
|
||||
downloadParams.append(
|
||||
{
|
||||
"url": sample.indexUrl,
|
||||
"saveTo": indexPath,
|
||||
"position": line_num,
|
||||
}
|
||||
)
|
||||
slotInfo.indexFile = indexPath
|
||||
line_num += 1
|
||||
|
||||
if hasattr(sample, "icon") and sample.icon != "":
|
||||
iconPath = os.path.join(
|
||||
slotDir,
|
||||
os.path.basename(sample.icon),
|
||||
)
|
||||
downloadParams.append(
|
||||
{
|
||||
"url": sample.icon,
|
||||
"saveTo": iconPath,
|
||||
"position": line_num,
|
||||
}
|
||||
)
|
||||
slotInfo.iconFile = iconPath
|
||||
line_num += 1
|
||||
|
||||
slotInfo.sampleId = sample.id
|
||||
slotInfo.credit = sample.credit
|
||||
slotInfo.description = sample.description
|
||||
slotInfo.name = sample.name
|
||||
slotInfo.termsOfUseUrl = sample.termsOfUseUrl
|
||||
slotInfo.defaultTune = 0
|
||||
slotInfo.defaultIndexRatio = 1
|
||||
slotInfo.defaultProtect = 0.5
|
||||
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
|
||||
saveSlotInfo(model_dir, tagetSlotIndex, slotInfo)
|
||||
|
||||
# ダウンロード
|
||||
print("[Voice Changer] Downloading model files...")
|
||||
with ThreadPoolExecutor() as pool:
|
||||
pool.map(download, downloadParams)
|
||||
|
||||
# メタデータ作成
|
||||
print("[Voice Changer] Generating metadata...")
|
||||
for targetSlotIndex in slotIndex:
|
||||
slotInfo = loadSlotInfo(model_dir, targetSlotIndex)
|
||||
if slotInfo.voiceChangerType == "RVC":
|
||||
if slotInfo.isONNX:
|
||||
_setInfoByONNX(slotInfo)
|
||||
else:
|
||||
_setInfoByPytorch(slotInfo)
|
||||
saveSlotInfo(model_dir, targetSlotIndex, slotInfo)
|
64
server/utils/downloader/WeightDownloader.py
Normal file
64
server/utils/downloader/WeightDownloader.py
Normal file
@ -0,0 +1,64 @@
|
||||
import os
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
from utils.downloader.Downloader import download
|
||||
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
||||
from Exceptions import WeightDownladException
|
||||
|
||||
|
||||
def downloadWeight(voiceChangerParams: VoiceChangerParams):
|
||||
hubert_base = voiceChangerParams.hubert_base
|
||||
hubert_base_jp = voiceChangerParams.hubert_base_jp
|
||||
hubert_soft = voiceChangerParams.hubert_soft
|
||||
nsf_hifigan = voiceChangerParams.nsf_hifigan
|
||||
|
||||
# file exists check (currently only for rvc)
|
||||
downloadParams = []
|
||||
if os.path.exists(hubert_base) is False:
|
||||
downloadParams.append(
|
||||
{
|
||||
"url": "https://huggingface.co/ddPn08/rvc-webui-models/resolve/main/embeddings/hubert_base.pt",
|
||||
"saveTo": hubert_base,
|
||||
"position": 0,
|
||||
}
|
||||
)
|
||||
if os.path.exists(hubert_base_jp) is False:
|
||||
downloadParams.append(
|
||||
{
|
||||
"url": "https://huggingface.co/rinna/japanese-hubert-base/resolve/main/fairseq/model.pt",
|
||||
"saveTo": hubert_base_jp,
|
||||
"position": 1,
|
||||
}
|
||||
)
|
||||
if os.path.exists(hubert_soft) is False:
|
||||
downloadParams.append(
|
||||
{
|
||||
"url": "https://huggingface.co/wok000/weights/resolve/main/ddsp-svc30/embedder/hubert-soft-0d54a1f4.pt",
|
||||
"saveTo": hubert_soft,
|
||||
"position": 2,
|
||||
}
|
||||
)
|
||||
if os.path.exists(nsf_hifigan) is False:
|
||||
downloadParams.append(
|
||||
{
|
||||
"url": "https://huggingface.co/wok000/weights/resolve/main/ddsp-svc30/nsf_hifigan_20221211/model.bin",
|
||||
"saveTo": nsf_hifigan,
|
||||
"position": 3,
|
||||
}
|
||||
)
|
||||
nsf_hifigan_config = os.path.join(os.path.dirname(nsf_hifigan), "config.json")
|
||||
|
||||
if os.path.exists(nsf_hifigan_config) is False:
|
||||
downloadParams.append(
|
||||
{
|
||||
"url": "https://huggingface.co/wok000/weights/raw/main/ddsp-svc30/nsf_hifigan_20221211/config.json",
|
||||
"saveTo": nsf_hifigan_config,
|
||||
"position": 4,
|
||||
}
|
||||
)
|
||||
|
||||
with ThreadPoolExecutor() as pool:
|
||||
pool.map(download, downloadParams)
|
||||
|
||||
if os.path.exists(hubert_base) is False or os.path.exists(hubert_base_jp) is False or os.path.exists(hubert_soft) is False or os.path.exists(nsf_hifigan) is False or os.path.exists(nsf_hifigan_config) is False:
|
||||
raise WeightDownladException()
|
134
server/voice_changer/Local/ServerDevice.py
Normal file
134
server/voice_changer/Local/ServerDevice.py
Normal file
@ -0,0 +1,134 @@
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
import librosa
|
||||
import sounddevice as sd
|
||||
|
||||
from voice_changer.Local.AudioDeviceList import ServerAudioDevice
|
||||
from voice_changer.VoiceChanger import VoiceChanger
|
||||
from voice_changer.utils.Timer import Timer
|
||||
|
||||
|
||||
class ServerDevice:
|
||||
def __init__(self):
|
||||
self.voiceChanger: VoiceChanger | None = None
|
||||
pass
|
||||
|
||||
def audio_callback(self, indata: np.ndarray, outdata: np.ndarray, frames, times, status):
|
||||
if self.voiceChanger is None:
|
||||
print("[Voice Changer] voiceChanger is None")
|
||||
return
|
||||
|
||||
try:
|
||||
indata = indata * self.voiceChanger.settings.serverInputAudioGain
|
||||
with Timer("all_inference_time") as t:
|
||||
unpackedData = librosa.to_mono(indata.T) * 32768.0
|
||||
out_wav, times = self.voiceChanger.on_request(unpackedData)
|
||||
outputChunnels = outdata.shape[1]
|
||||
outdata[:] = np.repeat(out_wav, outputChunnels).reshape(-1, outputChunnels) / 32768.0
|
||||
outdata[:] = outdata * self.voiceChanger.settings.serverOutputAudioGain
|
||||
all_inference_time = t.secs
|
||||
performance = [all_inference_time] + times
|
||||
if self.voiceChanger.emitTo is not None:
|
||||
self.voiceChanger.emitTo(performance)
|
||||
self.voiceChanger.settings.performance = [round(x * 1000) for x in performance]
|
||||
except Exception as e:
|
||||
print("[Voice Changer] ex:", e)
|
||||
|
||||
def getServerAudioDevice(self, audioDeviceList: list[ServerAudioDevice], index: int):
|
||||
serverAudioDevice = [x for x in audioDeviceList if x.index == index]
|
||||
if len(serverAudioDevice) > 0:
|
||||
return serverAudioDevice[0]
|
||||
else:
|
||||
return None
|
||||
|
||||
def serverLocal(self, _vc: VoiceChanger):
|
||||
self.voiceChanger = _vc
|
||||
vc = self.voiceChanger
|
||||
|
||||
currentInputDeviceId = -1
|
||||
currentModelSamplingRate = -1
|
||||
currentOutputDeviceId = -1
|
||||
currentInputChunkNum = -1
|
||||
while True:
|
||||
if vc.settings.serverAudioStated == 0 or vc.settings.serverInputDeviceId == -1 or vc is None:
|
||||
vc.settings.inputSampleRate = 48000
|
||||
time.sleep(2)
|
||||
else:
|
||||
sd._terminate()
|
||||
sd._initialize()
|
||||
|
||||
sd.default.device[0] = vc.settings.serverInputDeviceId
|
||||
currentInputDeviceId = vc.settings.serverInputDeviceId
|
||||
sd.default.device[1] = vc.settings.serverOutputDeviceId
|
||||
currentOutputDeviceId = vc.settings.serverOutputDeviceId
|
||||
|
||||
currentInputChannelNum = vc.settings.serverAudioInputDevices
|
||||
|
||||
serverInputAudioDevice = self.getServerAudioDevice(vc.settings.serverAudioInputDevices, currentInputDeviceId)
|
||||
serverOutputAudioDevice = self.getServerAudioDevice(vc.settings.serverAudioOutputDevices, currentOutputDeviceId)
|
||||
print(serverInputAudioDevice, serverOutputAudioDevice)
|
||||
if serverInputAudioDevice is None or serverOutputAudioDevice is None:
|
||||
time.sleep(2)
|
||||
print("serverInputAudioDevice or serverOutputAudioDevice is None")
|
||||
continue
|
||||
|
||||
currentInputChannelNum = serverInputAudioDevice.maxInputChannels
|
||||
currentOutputChannelNum = serverOutputAudioDevice.maxOutputChannels
|
||||
|
||||
currentInputChunkNum = vc.settings.serverReadChunkSize
|
||||
block_frame = currentInputChunkNum * 128
|
||||
|
||||
# sample rate precheck(alsa cannot use 40000?)
|
||||
try:
|
||||
currentModelSamplingRate = self.voiceChanger.voiceChangerModel.get_processing_sampling_rate()
|
||||
except Exception as e:
|
||||
print("[Voice Changer] ex: get_processing_sampling_rate", e)
|
||||
continue
|
||||
try:
|
||||
with sd.Stream(
|
||||
callback=self.audio_callback,
|
||||
blocksize=block_frame,
|
||||
samplerate=currentModelSamplingRate,
|
||||
dtype="float32",
|
||||
channels=[currentInputChannelNum, currentOutputChannelNum],
|
||||
):
|
||||
pass
|
||||
vc.settings.serverInputAudioSampleRate = currentModelSamplingRate
|
||||
vc.settings.inputSampleRate = currentModelSamplingRate
|
||||
print(f"[Voice Changer] sample rate {vc.settings.serverInputAudioSampleRate}")
|
||||
except Exception as e:
|
||||
print(
|
||||
"[Voice Changer] ex: fallback to device default samplerate",
|
||||
e,
|
||||
)
|
||||
vc.settings.serverInputAudioSampleRate = serverInputAudioDevice.default_samplerate
|
||||
vc.settings.inputSampleRate = vc.settings.serverInputAudioSampleRate
|
||||
|
||||
# main loop
|
||||
try:
|
||||
with sd.Stream(
|
||||
callback=self.audio_callback,
|
||||
blocksize=block_frame,
|
||||
samplerate=vc.settings.serverInputAudioSampleRate,
|
||||
dtype="float32",
|
||||
channels=[currentInputChannelNum, currentOutputChannelNum],
|
||||
):
|
||||
while vc.settings.serverAudioStated == 1 and currentInputDeviceId == vc.settings.serverInputDeviceId and currentOutputDeviceId == vc.settings.serverOutputDeviceId and currentModelSamplingRate == self.voiceChanger.voiceChangerModel.get_processing_sampling_rate() and currentInputChunkNum == vc.settings.serverReadChunkSize:
|
||||
time.sleep(2)
|
||||
print(
|
||||
"[Voice Changer] server audio",
|
||||
vc.settings.performance,
|
||||
)
|
||||
print(
|
||||
"[Voice Changer] info:",
|
||||
vc.settings.serverAudioStated,
|
||||
currentInputDeviceId,
|
||||
currentOutputDeviceId,
|
||||
vc.settings.serverInputAudioSampleRate,
|
||||
currentInputChunkNum,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
print("[Voice Changer] ex:", e)
|
||||
time.sleep(2)
|
@ -3,26 +3,26 @@ from const import EnumInferenceTypes, EnumEmbedderTypes
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class ModelSlot:
|
||||
modelFile: str = ""
|
||||
indexFile: str = ""
|
||||
defaultTune: int = 0
|
||||
defaultIndexRatio: int = 1
|
||||
defaultProtect: float = 0.5
|
||||
isONNX: bool = False
|
||||
modelType: str = EnumInferenceTypes.pyTorchRVC.value
|
||||
samplingRate: int = -1
|
||||
f0: bool = True
|
||||
embChannels: int = 256
|
||||
embOutputLayer: int = 9
|
||||
useFinalProj: bool = True
|
||||
deprecated: bool = False
|
||||
embedder: str = EnumEmbedderTypes.hubert.value
|
||||
# @dataclass
|
||||
# class ModelSlot:
|
||||
# modelFile: str = ""
|
||||
# indexFile: str = ""
|
||||
# defaultTune: int = 0
|
||||
# defaultIndexRatio: int = 1
|
||||
# defaultProtect: float = 0.5
|
||||
# isONNX: bool = False
|
||||
# modelType: str = EnumInferenceTypes.pyTorchRVC.value
|
||||
# samplingRate: int = -1
|
||||
# f0: bool = True
|
||||
# embChannels: int = 256
|
||||
# embOutputLayer: int = 9
|
||||
# useFinalProj: bool = True
|
||||
# deprecated: bool = False
|
||||
# embedder: str = EnumEmbedderTypes.hubert.value
|
||||
|
||||
name: str = ""
|
||||
description: str = ""
|
||||
credit: str = ""
|
||||
termsOfUseUrl: str = ""
|
||||
sampleId: str = ""
|
||||
iconFile: str = ""
|
||||
# name: str = ""
|
||||
# description: str = ""
|
||||
# credit: str = ""
|
||||
# termsOfUseUrl: str = ""
|
||||
# sampleId: str = ""
|
||||
# iconFile: str = ""
|
||||
|
@ -1,9 +1,45 @@
|
||||
from const import EnumEmbedderTypes, EnumInferenceTypes
|
||||
from voice_changer.RVC.ModelSlot import ModelSlot
|
||||
from const import UPLOAD_DIR, EnumEmbedderTypes, EnumInferenceTypes
|
||||
|
||||
import torch
|
||||
import onnxruntime
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
from data.ModelSlot import ModelSlot, RVCModelSlot, saveSlotInfo
|
||||
|
||||
|
||||
def setSlotAsRVC(model_dir: str, slot: int, paramDict):
|
||||
slotInfo: RVCModelSlot = RVCModelSlot()
|
||||
slotDir = os.path.join(model_dir, str(slot))
|
||||
os.makedirs(slotDir, exist_ok=True)
|
||||
|
||||
print("RVC SLot Load", slot, paramDict)
|
||||
for f in paramDict["files"]:
|
||||
srcPath = os.path.join(UPLOAD_DIR, f["name"])
|
||||
dstPath = os.path.join(slotDir, f["name"])
|
||||
if f["kind"] == "rvcModel":
|
||||
slotInfo.modelFile = dstPath
|
||||
slotInfo.name = os.path.splitext(f["name"])[0]
|
||||
elif f["kind"] == "rvcIndex":
|
||||
slotInfo.indexFile = dstPath
|
||||
else:
|
||||
print(f"[Voice Changer] unknown file kind {f['kind']}")
|
||||
|
||||
shutil.move(srcPath, dstPath)
|
||||
|
||||
slotInfo.defaultTune = 0
|
||||
slotInfo.defaultIndexRatio = 1
|
||||
slotInfo.defaultProtect = 0.5
|
||||
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
|
||||
|
||||
if slotInfo.isONNX:
|
||||
_setInfoByONNX(slotInfo)
|
||||
else:
|
||||
_setInfoByPytorch(slotInfo)
|
||||
|
||||
saveSlotInfo(model_dir, slot, slotInfo)
|
||||
|
||||
print("[Voice Changer] new model added:", slotInfo)
|
||||
|
||||
|
||||
def _setInfoByPytorch(slot: ModelSlot):
|
||||
@ -15,22 +51,14 @@ def _setInfoByPytorch(slot: ModelSlot):
|
||||
slot.f0 = True if cpt["f0"] == 1 else False
|
||||
version = cpt.get("version", "v1")
|
||||
if version is None or version == "v1":
|
||||
slot.modelType = (
|
||||
EnumInferenceTypes.pyTorchRVC.value
|
||||
if slot.f0
|
||||
else EnumInferenceTypes.pyTorchRVCNono.value
|
||||
)
|
||||
slot.modelType = EnumInferenceTypes.pyTorchRVC.value if slot.f0 else EnumInferenceTypes.pyTorchRVCNono.value
|
||||
slot.embChannels = 256
|
||||
slot.embOutputLayer = 9
|
||||
slot.useFinalProj = True
|
||||
slot.embedder = EnumEmbedderTypes.hubert.value
|
||||
print("[Voice Changer] Official Model(pyTorch) : v1")
|
||||
else:
|
||||
slot.modelType = (
|
||||
EnumInferenceTypes.pyTorchRVCv2.value
|
||||
if slot.f0
|
||||
else EnumInferenceTypes.pyTorchRVCv2Nono.value
|
||||
)
|
||||
slot.modelType = EnumInferenceTypes.pyTorchRVCv2.value if slot.f0 else EnumInferenceTypes.pyTorchRVCv2Nono.value
|
||||
slot.embChannels = 768
|
||||
slot.embOutputLayer = 12
|
||||
slot.useFinalProj = False
|
||||
@ -40,37 +68,21 @@ def _setInfoByPytorch(slot: ModelSlot):
|
||||
else:
|
||||
# DDPN RVC
|
||||
slot.f0 = True if cpt["f0"] == 1 else False
|
||||
slot.modelType = (
|
||||
EnumInferenceTypes.pyTorchWebUI.value
|
||||
if slot.f0
|
||||
else EnumInferenceTypes.pyTorchWebUINono.value
|
||||
)
|
||||
slot.modelType = EnumInferenceTypes.pyTorchWebUI.value if slot.f0 else EnumInferenceTypes.pyTorchWebUINono.value
|
||||
slot.embChannels = cpt["config"][17]
|
||||
slot.embOutputLayer = (
|
||||
cpt["embedder_output_layer"] if "embedder_output_layer" in cpt else 9
|
||||
)
|
||||
slot.embOutputLayer = cpt["embedder_output_layer"] if "embedder_output_layer" in cpt else 9
|
||||
if slot.embChannels == 256:
|
||||
slot.useFinalProj = True
|
||||
else:
|
||||
slot.useFinalProj = False
|
||||
|
||||
# DDPNモデルの情報を表示
|
||||
if (
|
||||
slot.embChannels == 256
|
||||
and slot.embOutputLayer == 9
|
||||
and slot.useFinalProj is True
|
||||
):
|
||||
if slot.embChannels == 256 and slot.embOutputLayer == 9 and slot.useFinalProj is True:
|
||||
print("[Voice Changer] DDPN Model(pyTorch) : Official v1 like")
|
||||
elif (
|
||||
slot.embChannels == 768
|
||||
and slot.embOutputLayer == 12
|
||||
and slot.useFinalProj is False
|
||||
):
|
||||
elif slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False:
|
||||
print("[Voice Changer] DDPN Model(pyTorch): Official v2 like")
|
||||
else:
|
||||
print(
|
||||
f"[Voice Changer] DDPN Model(pyTorch): ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}"
|
||||
)
|
||||
print(f"[Voice Changer] DDPN Model(pyTorch): ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}")
|
||||
|
||||
slot.embedder = cpt["embedder_name"]
|
||||
if slot.embedder.endswith("768"):
|
||||
@ -91,9 +103,8 @@ def _setInfoByPytorch(slot: ModelSlot):
|
||||
|
||||
|
||||
def _setInfoByONNX(slot: ModelSlot):
|
||||
tmp_onnx_session = onnxruntime.InferenceSession(
|
||||
slot.modelFile, providers=["CPUExecutionProvider"]
|
||||
)
|
||||
print("......................................_setInfoByONNX")
|
||||
tmp_onnx_session = onnxruntime.InferenceSession(slot.modelFile, providers=["CPUExecutionProvider"])
|
||||
modelmeta = tmp_onnx_session.get_modelmeta()
|
||||
try:
|
||||
metadata = json.loads(modelmeta.custom_metadata_map["metadata"])
|
||||
@ -101,16 +112,8 @@ def _setInfoByONNX(slot: ModelSlot):
|
||||
# slot.modelType = metadata["modelType"]
|
||||
slot.embChannels = metadata["embChannels"]
|
||||
|
||||
slot.embOutputLayer = (
|
||||
metadata["embOutputLayer"] if "embOutputLayer" in metadata else 9
|
||||
)
|
||||
slot.useFinalProj = (
|
||||
metadata["useFinalProj"]
|
||||
if "useFinalProj" in metadata
|
||||
else True
|
||||
if slot.embChannels == 256
|
||||
else False
|
||||
)
|
||||
slot.embOutputLayer = metadata["embOutputLayer"] if "embOutputLayer" in metadata else 9
|
||||
slot.useFinalProj = metadata["useFinalProj"] if "useFinalProj" in metadata else True if slot.embChannels == 256 else False
|
||||
|
||||
if slot.embChannels == 256:
|
||||
slot.useFinalProj = True
|
||||
@ -118,22 +121,12 @@ def _setInfoByONNX(slot: ModelSlot):
|
||||
slot.useFinalProj = False
|
||||
|
||||
# ONNXモデルの情報を表示
|
||||
if (
|
||||
slot.embChannels == 256
|
||||
and slot.embOutputLayer == 9
|
||||
and slot.useFinalProj is True
|
||||
):
|
||||
if slot.embChannels == 256 and slot.embOutputLayer == 9 and slot.useFinalProj is True:
|
||||
print("[Voice Changer] ONNX Model: Official v1 like")
|
||||
elif (
|
||||
slot.embChannels == 768
|
||||
and slot.embOutputLayer == 12
|
||||
and slot.useFinalProj is False
|
||||
):
|
||||
elif slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False:
|
||||
print("[Voice Changer] ONNX Model: Official v2 like")
|
||||
else:
|
||||
print(
|
||||
f"[Voice Changer] ONNX Model: ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}"
|
||||
)
|
||||
print(f"[Voice Changer] ONNX Model: ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}")
|
||||
|
||||
if "embedder" not in metadata:
|
||||
slot.embedder = EnumEmbedderTypes.hubert.value
|
||||
@ -149,11 +142,9 @@ def _setInfoByONNX(slot: ModelSlot):
|
||||
# raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder")
|
||||
|
||||
slot.f0 = metadata["f0"]
|
||||
slot.modelType = (
|
||||
EnumInferenceTypes.onnxRVC.value
|
||||
if slot.f0
|
||||
else EnumInferenceTypes.onnxRVCNono.value
|
||||
)
|
||||
print("slot.modelType1", slot.modelType)
|
||||
slot.modelType = EnumInferenceTypes.onnxRVC.value if slot.f0 else EnumInferenceTypes.onnxRVCNono.value
|
||||
print("slot.modelType2", slot.modelType)
|
||||
slot.samplingRate = metadata["samplingRate"]
|
||||
slot.deprecated = False
|
||||
|
||||
|
@ -1,14 +1,12 @@
|
||||
import sys
|
||||
import os
|
||||
from dataclasses import asdict
|
||||
from dataclasses import dataclass, asdict
|
||||
from typing import cast
|
||||
import numpy as np
|
||||
import torch
|
||||
import torchaudio
|
||||
from ModelSample import getModelSamples
|
||||
from voice_changer.RVC.ModelSlot import ModelSlot
|
||||
from voice_changer.RVC.SampleDownloader import downloadModelFiles
|
||||
|
||||
from data.ModelSlot import loadSlotInfo
|
||||
from voice_changer.RVC.RVCSlotInfo import RVCSlotInfo
|
||||
|
||||
# avoiding parse arg error in RVC
|
||||
sys.argv = ["MMVCServerSIO.py"]
|
||||
@ -31,7 +29,6 @@ from voice_changer.RVC.ModelSlotGenerator import (
|
||||
)
|
||||
from voice_changer.RVC.RVCSettings import RVCSettings
|
||||
from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager
|
||||
from voice_changer.utils.LoadModelParams import LoadModelParams
|
||||
from voice_changer.utils.VoiceChangerModel import AudioInOut
|
||||
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
||||
from voice_changer.RVC.onnxExporter.export2onnx import export2onnx
|
||||
@ -40,147 +37,104 @@ from voice_changer.RVC.pipeline.PipelineGenerator import createPipeline
|
||||
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
|
||||
from voice_changer.RVC.pipeline.Pipeline import Pipeline
|
||||
|
||||
from Exceptions import DeviceCannotSupportHalfPrecisionException, NoModeLoadedException
|
||||
from Exceptions import DeviceCannotSupportHalfPrecisionException
|
||||
from const import (
|
||||
RVC_MODEL_DIRNAME,
|
||||
UPLOAD_DIR,
|
||||
getRVCSampleJsonAndModelIds,
|
||||
)
|
||||
import shutil
|
||||
import json
|
||||
|
||||
|
||||
class RVC:
|
||||
initialLoad: bool = True
|
||||
settings: RVCSettings = RVCSettings()
|
||||
# initialLoad: bool = True
|
||||
# currentSlot: int = 0
|
||||
# needSwitch: bool = False
|
||||
|
||||
pipeline: Pipeline | None = None
|
||||
|
||||
deviceManager = DeviceManager.get_instance()
|
||||
|
||||
audio_buffer: AudioInOut | None = None
|
||||
prevVol: float = 0
|
||||
params: VoiceChangerParams
|
||||
currentSlot: int = 0
|
||||
needSwitch: bool = False
|
||||
|
||||
def __init__(self, params: VoiceChangerParams):
|
||||
self.pitchExtractor = PitchExtractorManager.getPitchExtractor(
|
||||
self.settings.f0Detector
|
||||
)
|
||||
self.params = params
|
||||
def __init__(self, slotIndex: int, params: VoiceChangerParams):
|
||||
self.params: VoiceChangerParams = params
|
||||
self.settings: RVCSettings = RVCSettings()
|
||||
self.deviceManager = DeviceManager.get_instance()
|
||||
EmbedderManager.initialize(params)
|
||||
self.loadSlots()
|
||||
print("[Voice Changer] RVC initialization: ", params)
|
||||
|
||||
# サンプルカタログ作成
|
||||
sampleJsons: list[str] = []
|
||||
sampleJsonUrls, _sampleModels = getRVCSampleJsonAndModelIds(
|
||||
params.rvc_sample_mode
|
||||
)
|
||||
for url in sampleJsonUrls:
|
||||
filename = os.path.basename(url)
|
||||
sampleJsons.append(filename)
|
||||
sampleModels = getModelSamples(sampleJsons, "RVC")
|
||||
if sampleModels is not None:
|
||||
self.settings.sampleModels = sampleModels
|
||||
self.pipeline: Pipeline | None = None
|
||||
self.pitchExtractor = PitchExtractorManager.getPitchExtractor(self.settings.f0Detector)
|
||||
|
||||
# 起動時にスロットにモデルがある場合はロードしておく
|
||||
if len(self.settings.modelSlots) > 0:
|
||||
for i, slot in enumerate(self.settings.modelSlots):
|
||||
if len(slot.modelFile) > 0:
|
||||
self.prepareModel(i)
|
||||
self.settings.modelSlotIndex = i
|
||||
self.switchModel(self.settings.modelSlotIndex)
|
||||
self.initialLoad = False
|
||||
break
|
||||
self.audio_buffer: AudioInOut | None = None
|
||||
|
||||
print("[Voice Changer] RVC Slot initialization. global params:", params)
|
||||
|
||||
self.slotIndex = slotIndex
|
||||
self.slotInfo: RVCSlotInfo = loadSlotInfo(params.model_dir, self.slotIndex)
|
||||
self.prevVol = 0.0
|
||||
print("[Voice Changer] RVC Slot initialization. slot info:", self.slotInfo)
|
||||
|
||||
def getSampleInfo(self, id: str):
|
||||
sampleInfos = list(filter(lambda x: x.id == id, self.settings.sampleModels))
|
||||
if len(sampleInfos) > 0:
|
||||
return sampleInfos[0]
|
||||
else:
|
||||
None
|
||||
# def loadModel(self, props: LoadModelParams):
|
||||
# target_slot_idx = props.slot
|
||||
# params = props.params
|
||||
# slotInfo: ModelSlot = ModelSlot()
|
||||
|
||||
def moveToModelDir(self, file: str, dstDir: str):
|
||||
dst = os.path.join(dstDir, os.path.basename(file))
|
||||
if os.path.exists(dst):
|
||||
os.remove(dst)
|
||||
shutil.move(file, dst)
|
||||
return dst
|
||||
# print("loadModel", params)
|
||||
# # サンプルが指定されたときはダウンロードしてメタデータをでっちあげる
|
||||
# if len(params["sampleId"]) > 0:
|
||||
# sampleId = params["sampleId"]
|
||||
# sampleInfo = self.getSampleInfo(sampleId)
|
||||
# useIndex = params["rvcIndexDownload"]
|
||||
|
||||
def loadModel(self, props: LoadModelParams):
|
||||
target_slot_idx = props.slot
|
||||
params = props.params
|
||||
slotInfo: ModelSlot = ModelSlot()
|
||||
# if sampleInfo is None:
|
||||
# print("[Voice Changer] sampleInfo is None")
|
||||
# return
|
||||
# modelPath, indexPath, iconPath = downloadModelFiles(sampleInfo, useIndex)
|
||||
# slotInfo.modelFile = modelPath
|
||||
# if indexPath is not None:
|
||||
# slotInfo.indexFile = indexPath
|
||||
# if iconPath is not None:
|
||||
# slotInfo.iconFile = iconPath
|
||||
|
||||
print("loadModel", params)
|
||||
# サンプルが指定されたときはダウンロードしてメタデータをでっちあげる
|
||||
if len(params["sampleId"]) > 0:
|
||||
sampleId = params["sampleId"]
|
||||
sampleInfo = self.getSampleInfo(sampleId)
|
||||
useIndex = params["rvcIndexDownload"]
|
||||
# slotInfo.sampleId = sampleInfo.id
|
||||
# slotInfo.credit = sampleInfo.credit
|
||||
# slotInfo.description = sampleInfo.description
|
||||
# slotInfo.name = sampleInfo.name
|
||||
# slotInfo.termsOfUseUrl = sampleInfo.termsOfUseUrl
|
||||
|
||||
if sampleInfo is None:
|
||||
print("[Voice Changer] sampleInfo is None")
|
||||
return
|
||||
modelPath, indexPath, iconPath = downloadModelFiles(sampleInfo, useIndex)
|
||||
slotInfo.modelFile = modelPath
|
||||
if indexPath is not None:
|
||||
slotInfo.indexFile = indexPath
|
||||
if iconPath is not None:
|
||||
slotInfo.iconFile = iconPath
|
||||
# # slotInfo.samplingRate = sampleInfo.sampleRate
|
||||
# # slotInfo.modelType = sampleInfo.modelType
|
||||
# # slotInfo.f0 = sampleInfo.f0
|
||||
# else:
|
||||
# slotInfo.modelFile = params["files"]["rvcModel"]
|
||||
# slotInfo.indexFile = params["files"]["rvcIndex"] if "rvcIndex" in params["files"] else None
|
||||
|
||||
slotInfo.sampleId = sampleInfo.id
|
||||
slotInfo.credit = sampleInfo.credit
|
||||
slotInfo.description = sampleInfo.description
|
||||
slotInfo.name = sampleInfo.name
|
||||
slotInfo.termsOfUseUrl = sampleInfo.termsOfUseUrl
|
||||
# slotInfo.defaultTune = params["defaultTune"]
|
||||
# slotInfo.defaultIndexRatio = params["defaultIndexRatio"]
|
||||
# slotInfo.defaultProtect = params["defaultProtect"]
|
||||
# slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
|
||||
|
||||
# slotInfo.samplingRate = sampleInfo.sampleRate
|
||||
# slotInfo.modelType = sampleInfo.modelType
|
||||
# slotInfo.f0 = sampleInfo.f0
|
||||
else:
|
||||
slotInfo.modelFile = params["files"]["rvcModel"]
|
||||
slotInfo.indexFile = (
|
||||
params["files"]["rvcIndex"] if "rvcIndex" in params["files"] else None
|
||||
)
|
||||
# if slotInfo.isONNX:
|
||||
# _setInfoByONNX(slotInfo)
|
||||
# else:
|
||||
# _setInfoByPytorch(slotInfo)
|
||||
|
||||
slotInfo.defaultTune = params["defaultTune"]
|
||||
slotInfo.defaultIndexRatio = params["defaultIndexRatio"]
|
||||
slotInfo.defaultProtect = params["defaultProtect"]
|
||||
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
|
||||
# # メタデータを見て、永続化モデルフォルダに移動させる
|
||||
# # その際に、メタデータのファイル格納場所も書き換える
|
||||
# slotDir = os.path.join(self.params.model_dir, RVC_MODEL_DIRNAME, str(target_slot_idx))
|
||||
# os.makedirs(slotDir, exist_ok=True)
|
||||
# slotInfo.modelFile = self.moveToModelDir(slotInfo.modelFile, slotDir)
|
||||
# if slotInfo.indexFile is not None and len(slotInfo.indexFile) > 0:
|
||||
# slotInfo.indexFile = self.moveToModelDir(slotInfo.indexFile, slotDir)
|
||||
# if slotInfo.iconFile is not None and len(slotInfo.iconFile) > 0:
|
||||
# slotInfo.iconFile = self.moveToModelDir(slotInfo.iconFile, slotDir)
|
||||
# json.dump(asdict(slotInfo), open(os.path.join(slotDir, "params.json"), "w"))
|
||||
# self.loadSlots()
|
||||
|
||||
if slotInfo.isONNX:
|
||||
_setInfoByONNX(slotInfo)
|
||||
else:
|
||||
_setInfoByPytorch(slotInfo)
|
||||
# # 初回のみロード(起動時にスロットにモデルがあった場合はinitialLoadはFalseになっている)
|
||||
# if self.initialLoad:
|
||||
# self.prepareModel(target_slot_idx)
|
||||
# self.settings.modelSlotIndex = target_slot_idx
|
||||
# self.switchModel(self.settings.modelSlotIndex)
|
||||
# self.initialLoad = False
|
||||
# elif target_slot_idx == self.currentSlot:
|
||||
# self.prepareModel(target_slot_idx)
|
||||
|
||||
# メタデータを見て、永続化モデルフォルダに移動させる
|
||||
# その際に、メタデータのファイル格納場所も書き換える
|
||||
slotDir = os.path.join(
|
||||
self.params.model_dir, RVC_MODEL_DIRNAME, str(target_slot_idx)
|
||||
)
|
||||
os.makedirs(slotDir, exist_ok=True)
|
||||
slotInfo.modelFile = self.moveToModelDir(slotInfo.modelFile, slotDir)
|
||||
if slotInfo.indexFile is not None and len(slotInfo.indexFile) > 0:
|
||||
slotInfo.indexFile = self.moveToModelDir(slotInfo.indexFile, slotDir)
|
||||
if slotInfo.iconFile is not None and len(slotInfo.iconFile) > 0:
|
||||
slotInfo.iconFile = self.moveToModelDir(slotInfo.iconFile, slotDir)
|
||||
json.dump(asdict(slotInfo), open(os.path.join(slotDir, "params.json"), "w"))
|
||||
self.loadSlots()
|
||||
|
||||
# 初回のみロード(起動時にスロットにモデルがあった場合はinitialLoadはFalseになっている)
|
||||
if self.initialLoad:
|
||||
self.prepareModel(target_slot_idx)
|
||||
self.settings.modelSlotIndex = target_slot_idx
|
||||
self.switchModel(self.settings.modelSlotIndex)
|
||||
self.initialLoad = False
|
||||
elif target_slot_idx == self.currentSlot:
|
||||
self.prepareModel(target_slot_idx)
|
||||
|
||||
return self.get_info()
|
||||
# return self.get_info()
|
||||
|
||||
def loadSlots(self):
|
||||
dirname = os.path.join(self.params.model_dir, RVC_MODEL_DIRNAME)
|
||||
@ -189,9 +143,7 @@ class RVC:
|
||||
|
||||
modelSlots: list[ModelSlot] = []
|
||||
for slot_idx in range(len(self.settings.modelSlots)):
|
||||
slotDir = os.path.join(
|
||||
self.params.model_dir, RVC_MODEL_DIRNAME, str(slot_idx)
|
||||
)
|
||||
slotDir = os.path.join(self.params.model_dir, RVC_MODEL_DIRNAME, str(slot_idx))
|
||||
jsonDict = os.path.join(slotDir, "params.json")
|
||||
if os.path.exists(jsonDict):
|
||||
jsonDict = json.load(open(os.path.join(slotDir, "params.json")))
|
||||
@ -205,76 +157,56 @@ class RVC:
|
||||
if key in self.settings.intData:
|
||||
# 設定前処理
|
||||
val = cast(int, val)
|
||||
if key == "modelSlotIndex":
|
||||
if val < 0:
|
||||
return True
|
||||
val = val % 1000 # Quick hack for same slot is selected
|
||||
if (
|
||||
self.settings.modelSlots[val].modelFile is None
|
||||
or self.settings.modelSlots[val].modelFile == ""
|
||||
):
|
||||
print("[Voice Changer] slot does not have model.")
|
||||
return True
|
||||
self.prepareModel(val)
|
||||
|
||||
# 設定
|
||||
setattr(self.settings, key, val)
|
||||
|
||||
if key == "gpu":
|
||||
self.deviceManager.setForceTensor(False)
|
||||
self.prepareModel(self.settings.modelSlotIndex)
|
||||
self.prepareModel()
|
||||
|
||||
elif key in self.settings.floatData:
|
||||
setattr(self.settings, key, float(val))
|
||||
elif key in self.settings.strData:
|
||||
setattr(self.settings, key, str(val))
|
||||
if key == "f0Detector" and self.pipeline is not None:
|
||||
pitchExtractor = PitchExtractorManager.getPitchExtractor(
|
||||
self.settings.f0Detector
|
||||
)
|
||||
pitchExtractor = PitchExtractorManager.getPitchExtractor(self.settings.f0Detector)
|
||||
self.pipeline.setPitchExtractor(pitchExtractor)
|
||||
else:
|
||||
return False
|
||||
return True
|
||||
|
||||
def prepareModel(self, slot: int):
|
||||
if slot < 0:
|
||||
print("[Voice Changer] Prepare Model of slot skip:", slot)
|
||||
return self.get_info()
|
||||
modelSlot = self.settings.modelSlots[slot]
|
||||
|
||||
print("[Voice Changer] Prepare Model of slot:", slot)
|
||||
def prepareModel(self):
|
||||
print("[Voice Changer] Prepare Model of slot:", self.slotIndex)
|
||||
|
||||
# pipelineの生成
|
||||
self.next_pipeline = createPipeline(
|
||||
modelSlot, self.settings.gpu, self.settings.f0Detector
|
||||
)
|
||||
self.pipeline = createPipeline(self.slotInfo, self.settings.gpu, self.settings.f0Detector)
|
||||
|
||||
# その他の設定
|
||||
self.next_trans = modelSlot.defaultTune
|
||||
self.next_index_ratio = modelSlot.defaultIndexRatio
|
||||
self.next_protect = modelSlot.defaultProtect
|
||||
self.next_samplingRate = modelSlot.samplingRate
|
||||
self.next_framework = "ONNX" if modelSlot.isONNX else "PyTorch"
|
||||
# self.needSwitch = True
|
||||
print("[Voice Changer] Prepare done.")
|
||||
self.switchModel(slot)
|
||||
# # その他の設定
|
||||
# self.next_trans = modelSlot.defaultTune
|
||||
# self.next_index_ratio = modelSlot.defaultIndexRatio
|
||||
# self.next_protect = modelSlot.defaultProtect
|
||||
# self.next_samplingRate = modelSlot.samplingRate
|
||||
# self.next_framework = "ONNX" if modelSlot.isONNX else "PyTorch"
|
||||
# # self.needSwitch = True
|
||||
# print("[Voice Changer] Prepare done.")
|
||||
# self.switchModel(slot)
|
||||
return self.get_info()
|
||||
|
||||
def switchModel(self, slot: int):
|
||||
print("[Voice Changer] Switching model..")
|
||||
self.pipeline = self.next_pipeline
|
||||
self.settings.tran = self.next_trans
|
||||
self.settings.indexRatio = self.next_index_ratio
|
||||
self.settings.protect = self.next_protect
|
||||
self.settings.modelSamplingRate = self.next_samplingRate
|
||||
self.settings.framework = self.next_framework
|
||||
# def switchModel(self, slot: int):
|
||||
# print("[Voice Changer] Switching model..")
|
||||
# self.pipeline = self.next_pipeline
|
||||
# self.settings.tran = self.next_trans
|
||||
# self.settings.indexRatio = self.next_index_ratio
|
||||
# self.settings.protect = self.next_protect
|
||||
# self.settings.modelSamplingRate = self.next_samplingRate
|
||||
# self.settings.framework = self.next_framework
|
||||
|
||||
# self.currentSlot = self.settings.modelSlotIndex # prepareModelから呼ばれるということはupdate_settingsの中で呼ばれるということなので、まだmodelSlotIndexは更新されていない
|
||||
self.currentSlot = slot
|
||||
print(
|
||||
"[Voice Changer] Switching model..done",
|
||||
)
|
||||
# # self.currentSlot = self.settings.modelSlotIndex # prepareModelから呼ばれるということはupdate_settingsの中で呼ばれるということなので、まだmodelSlotIndexは更新されていない
|
||||
# self.currentSlot = slot
|
||||
# print(
|
||||
# "[Voice Changer] Switching model..done",
|
||||
# )
|
||||
|
||||
def get_info(self):
|
||||
data = asdict(self.settings)
|
||||
@ -293,9 +225,7 @@ class RVC:
|
||||
crossfadeSize: int,
|
||||
solaSearchFrame: int = 0,
|
||||
):
|
||||
newData = (
|
||||
newData.astype(np.float32) / 32768.0
|
||||
) # RVCのモデルのサンプリングレートで入ってきている。(extraDataLength, Crossfade等も同じSRで処理)(★1)
|
||||
newData = newData.astype(np.float32) / 32768.0 # RVCのモデルのサンプリングレートで入ってきている。(extraDataLength, Crossfade等も同じSRで処理)(★1)
|
||||
|
||||
if self.audio_buffer is not None:
|
||||
# 過去のデータに連結
|
||||
@ -303,18 +233,14 @@ class RVC:
|
||||
else:
|
||||
self.audio_buffer = newData
|
||||
|
||||
convertSize = (
|
||||
inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
|
||||
)
|
||||
convertSize = inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
|
||||
|
||||
if convertSize % 128 != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
|
||||
convertSize = convertSize + (128 - (convertSize % 128))
|
||||
|
||||
# バッファがたまっていない場合はzeroで補う
|
||||
if self.audio_buffer.shape[0] < convertSize:
|
||||
self.audio_buffer = np.concatenate(
|
||||
[np.zeros([convertSize]), self.audio_buffer]
|
||||
)
|
||||
self.audio_buffer = np.concatenate([np.zeros([convertSize]), self.audio_buffer])
|
||||
|
||||
convertOffset = -1 * convertSize
|
||||
self.audio_buffer = self.audio_buffer[convertOffset:] # 変換対象の部分だけ抽出
|
||||
@ -324,9 +250,7 @@ class RVC:
|
||||
else:
|
||||
device = torch.device("cpu")
|
||||
|
||||
audio_buffer = torch.from_numpy(self.audio_buffer).to(
|
||||
device=device, dtype=torch.float32
|
||||
)
|
||||
audio_buffer = torch.from_numpy(self.audio_buffer).to(device=device, dtype=torch.float32)
|
||||
|
||||
# 出力部分だけ切り出して音量を確認。(TODO:段階的消音にする)
|
||||
cropOffset = -1 * (inputSize + crossfadeSize)
|
||||
@ -339,21 +263,14 @@ class RVC:
|
||||
return (audio_buffer, convertSize, vol)
|
||||
|
||||
def inference(self, data):
|
||||
if self.settings.modelSlotIndex < 0:
|
||||
print(
|
||||
"[Voice Changer] wait for loading model...",
|
||||
self.settings.modelSlotIndex,
|
||||
self.currentSlot,
|
||||
)
|
||||
raise NoModeLoadedException("model_common")
|
||||
# if self.needSwitch:
|
||||
# if self.settings.modelSlotIndex < 0:
|
||||
# print(
|
||||
# f"[Voice Changer] Switch model {self.currentSlot} -> {self.settings.modelSlotIndex}"
|
||||
# "[Voice Changer] wait for loading model...",
|
||||
# self.settings.modelSlotIndex,
|
||||
# self.currentSlot,
|
||||
# )
|
||||
# self.switchModel()
|
||||
# self.needSwitch = False
|
||||
# raise NoModeLoadedException("model_common")
|
||||
|
||||
# half = self.deviceManager.halfPrecisionAvailable(self.settings.gpu)
|
||||
half = self.pipeline.isHalf
|
||||
|
||||
audio = data[0]
|
||||
@ -363,18 +280,16 @@ class RVC:
|
||||
if vol < self.settings.silentThreshold:
|
||||
return np.zeros(convertSize).astype(np.int16)
|
||||
|
||||
audio = torchaudio.functional.resample(
|
||||
audio, self.settings.modelSamplingRate, 16000, rolloff=0.99
|
||||
)
|
||||
audio = torchaudio.functional.resample(audio, self.settings.modelSamplingRate, 16000, rolloff=0.99)
|
||||
repeat = 3 if half else 1
|
||||
repeat *= self.settings.rvcQuality # 0 or 3
|
||||
sid = 0
|
||||
f0_up_key = self.settings.tran
|
||||
index_rate = self.settings.indexRatio
|
||||
protect = self.settings.protect
|
||||
if_f0 = 1 if self.settings.modelSlots[self.currentSlot].f0 else 0
|
||||
embOutputLayer = self.settings.modelSlots[self.currentSlot].embOutputLayer
|
||||
useFinalProj = self.settings.modelSlots[self.currentSlot].useFinalProj
|
||||
if_f0 = 1 if self.slotInfo.f0 else 0
|
||||
embOutputLayer = self.slotInfo.embOutputLayer
|
||||
useFinalProj = self.slotInfo.useFinalProj
|
||||
try:
|
||||
audio_out = self.pipeline.exec(
|
||||
sid,
|
||||
@ -382,8 +297,7 @@ class RVC:
|
||||
f0_up_key,
|
||||
index_rate,
|
||||
if_f0,
|
||||
self.settings.extraConvertSize
|
||||
/ self.settings.modelSamplingRate, # extaraDataSizeの秒数。RVCのモデルのサンプリングレートで処理(★1)。
|
||||
self.settings.extraConvertSize / self.settings.modelSamplingRate, # extaraDataSizeの秒数。RVCのモデルのサンプリングレートで処理(★1)。
|
||||
embOutputLayer,
|
||||
useFinalProj,
|
||||
repeat,
|
||||
@ -393,9 +307,7 @@ class RVC:
|
||||
|
||||
return result
|
||||
except DeviceCannotSupportHalfPrecisionException as e:
|
||||
print(
|
||||
"[Device Manager] Device cannot support half precision. Fallback to float...."
|
||||
)
|
||||
print("[Device Manager] Device cannot support half precision. Fallback to float....")
|
||||
self.deviceManager.setForceTensor(True)
|
||||
self.prepareModel(self.settings.modelSlotIndex)
|
||||
raise e
|
||||
@ -405,7 +317,7 @@ class RVC:
|
||||
def __del__(self):
|
||||
del self.pipeline
|
||||
|
||||
# print("---------- REMOVING ---------------")
|
||||
print("---------- REMOVING ---------------")
|
||||
|
||||
remove_path = os.path.join("RVC")
|
||||
sys.path = [x for x in sys.path if x.endswith(remove_path) is False]
|
||||
@ -461,9 +373,7 @@ class RVC:
|
||||
"sampleId": "",
|
||||
"files": {"rvcModel": storeFile},
|
||||
}
|
||||
props: LoadModelParams = LoadModelParams(
|
||||
slot=targetSlot, isHalf=True, params=params
|
||||
)
|
||||
props: LoadModelParams = LoadModelParams(slot=targetSlot, isHalf=True, params=params)
|
||||
self.loadModel(props)
|
||||
self.prepareModel(targetSlot)
|
||||
self.settings.modelSlotIndex = targetSlot
|
||||
@ -471,12 +381,8 @@ class RVC:
|
||||
|
||||
def update_model_default(self):
|
||||
print("[Voice Changer] UPDATE MODEL DEFAULT!!")
|
||||
slotDir = os.path.join(
|
||||
self.params.model_dir, RVC_MODEL_DIRNAME, str(self.currentSlot)
|
||||
)
|
||||
params = json.load(
|
||||
open(os.path.join(slotDir, "params.json"), "r", encoding="utf-8")
|
||||
)
|
||||
slotDir = os.path.join(self.params.model_dir, RVC_MODEL_DIRNAME, str(self.currentSlot))
|
||||
params = json.load(open(os.path.join(slotDir, "params.json"), "r", encoding="utf-8"))
|
||||
params["defaultTune"] = self.settings.tran
|
||||
params["defaultIndexRatio"] = self.settings.indexRatio
|
||||
params["defaultProtect"] = self.settings.protect
|
||||
@ -488,14 +394,10 @@ class RVC:
|
||||
print("[Voice Changer] UPDATE MODEL INFO", newData)
|
||||
newDataDict = json.loads(newData)
|
||||
try:
|
||||
slotDir = os.path.join(
|
||||
self.params.model_dir, RVC_MODEL_DIRNAME, str(newDataDict["slot"])
|
||||
)
|
||||
slotDir = os.path.join(self.params.model_dir, RVC_MODEL_DIRNAME, str(newDataDict["slot"]))
|
||||
except Exception as e:
|
||||
print("Exception::::", e)
|
||||
params = json.load(
|
||||
open(os.path.join(slotDir, "params.json"), "r", encoding="utf-8")
|
||||
)
|
||||
params = json.load(open(os.path.join(slotDir, "params.json"), "r", encoding="utf-8"))
|
||||
params[newDataDict["key"]] = newDataDict["val"]
|
||||
json.dump(params, open(os.path.join(slotDir, "params.json"), "w"))
|
||||
self.loadSlots()
|
||||
@ -504,9 +406,7 @@ class RVC:
|
||||
print("[Voice Changer] UPLOAD ASSETS", params)
|
||||
paramsDict = json.loads(params)
|
||||
uploadPath = os.path.join(UPLOAD_DIR, paramsDict["file"])
|
||||
storeDir = os.path.join(
|
||||
self.params.model_dir, RVC_MODEL_DIRNAME, str(paramsDict["slot"])
|
||||
)
|
||||
storeDir = os.path.join(self.params.model_dir, RVC_MODEL_DIRNAME, str(paramsDict["slot"]))
|
||||
storePath = os.path.join(
|
||||
storeDir,
|
||||
paramsDict["file"],
|
||||
|
@ -1,8 +1,4 @@
|
||||
from dataclasses import dataclass, field
|
||||
from ModelSample import RVCModelSample
|
||||
from const import RVC_MAX_SLOT_NUM
|
||||
|
||||
from voice_changer.RVC.ModelSlot import ModelSlot
|
||||
|
||||
|
||||
@dataclass
|
||||
@ -17,11 +13,6 @@ class RVCSettings:
|
||||
clusterInferRatio: float = 0.1
|
||||
|
||||
framework: str = "PyTorch" # PyTorch or ONNX
|
||||
modelSlots: list[ModelSlot] = field(
|
||||
default_factory=lambda: [ModelSlot() for _x in range(RVC_MAX_SLOT_NUM)]
|
||||
)
|
||||
|
||||
sampleModels: list[RVCModelSample] = field(default_factory=lambda: [])
|
||||
|
||||
indexRatio: float = 0
|
||||
protect: float = 0.5
|
||||
|
28
server/voice_changer/RVC/RVCSlotInfo.py
Normal file
28
server/voice_changer/RVC/RVCSlotInfo.py
Normal file
@ -0,0 +1,28 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
from voice_changer.VoiceChanger import SlotInfo
|
||||
|
||||
|
||||
@dataclass
|
||||
class RVCSlotInfo(SlotInfo):
|
||||
modelFile: str = ""
|
||||
indexFile: str = ""
|
||||
defaultTune: int = 0
|
||||
defaultIndexRatio: float = 0
|
||||
defaultProtect: float = 1
|
||||
isONNX: bool = False
|
||||
modelType: str = ""
|
||||
samplingRate: int = 40000
|
||||
f0: bool = True
|
||||
embChannels: int = 256
|
||||
embOutputLayer: int = 12
|
||||
useFinalProj: bool = False
|
||||
deprecated: bool = False
|
||||
embedder: str = ""
|
||||
|
||||
name: str = ""
|
||||
description: str = ""
|
||||
credit: str = ""
|
||||
termsOfUseUrl: str = ""
|
||||
sampleId: str = ""
|
||||
iconFile: str = ""
|
@ -1,174 +1,52 @@
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from dataclasses import asdict
|
||||
import os
|
||||
from const import RVC_MODEL_DIRNAME, TMP_DIR
|
||||
from Downloader import download, download_no_tqdm
|
||||
from ModelSample import RVCModelSample, getModelSamples
|
||||
import json
|
||||
# from concurrent.futures import ThreadPoolExecutor
|
||||
# from dataclasses import asdict
|
||||
# import os
|
||||
# from const import RVC_MODEL_DIRNAME, TMP_DIR
|
||||
# from Downloader import download, download_no_tqdm
|
||||
# from ModelSample import RVCModelSample, getModelSamples
|
||||
# import json
|
||||
|
||||
from voice_changer.RVC.ModelSlot import ModelSlot
|
||||
from voice_changer.RVC.ModelSlotGenerator import _setInfoByONNX, _setInfoByPytorch
|
||||
# from voice_changer.RVC.ModelSlot import ModelSlot
|
||||
# from voice_changer.RVC.ModelSlotGenerator import _setInfoByONNX, _setInfoByPytorch
|
||||
|
||||
|
||||
def checkRvcModelExist(model_dir: str):
|
||||
rvcModelDir = os.path.join(model_dir, RVC_MODEL_DIRNAME)
|
||||
if not os.path.exists(rvcModelDir):
|
||||
return False
|
||||
return True
|
||||
# def downloadModelFiles(sampleInfo: RVCModelSample, useIndex: bool = True):
|
||||
# downloadParams = []
|
||||
|
||||
# modelPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.modelUrl))
|
||||
# downloadParams.append(
|
||||
# {
|
||||
# "url": sampleInfo.modelUrl,
|
||||
# "saveTo": modelPath,
|
||||
# "position": 0,
|
||||
# }
|
||||
# )
|
||||
|
||||
def downloadInitialSampleModels(
|
||||
sampleJsons: list[str], sampleModelIds: list[str], model_dir: str
|
||||
):
|
||||
sampleModels = getModelSamples(sampleJsons, "RVC")
|
||||
if sampleModels is None:
|
||||
return
|
||||
# indexPath = None
|
||||
# if useIndex is True and hasattr(sampleInfo, "indexUrl") and sampleInfo.indexUrl != "":
|
||||
# print("[Voice Changer] Download sample with index.")
|
||||
# indexPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.indexUrl))
|
||||
# downloadParams.append(
|
||||
# {
|
||||
# "url": sampleInfo.indexUrl,
|
||||
# "saveTo": indexPath,
|
||||
# "position": 1,
|
||||
# }
|
||||
# )
|
||||
|
||||
downloadParams = []
|
||||
slot_count = 0
|
||||
line_num = 0
|
||||
for initSampleId in sampleModelIds:
|
||||
# 初期サンプルをサーチ
|
||||
match = False
|
||||
for sample in sampleModels:
|
||||
if sample.id == initSampleId[0]:
|
||||
match = True
|
||||
break
|
||||
if match is False:
|
||||
print(f"[Voice Changer] initiail sample not found. {initSampleId[0]}")
|
||||
continue
|
||||
# iconPath = None
|
||||
# if hasattr(sampleInfo, "icon") and sampleInfo.icon != "":
|
||||
# iconPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.icon))
|
||||
# downloadParams.append(
|
||||
# {
|
||||
# "url": sampleInfo.icon,
|
||||
# "saveTo": iconPath,
|
||||
# "position": 2,
|
||||
# }
|
||||
# )
|
||||
|
||||
# 検出されたら、、、
|
||||
slotInfo: ModelSlot = ModelSlot()
|
||||
# sampleParams: Any = {"files": {}}
|
||||
|
||||
slotDir = os.path.join(model_dir, RVC_MODEL_DIRNAME, str(slot_count))
|
||||
os.makedirs(slotDir, exist_ok=True)
|
||||
modelFilePath = os.path.join(
|
||||
slotDir,
|
||||
os.path.basename(sample.modelUrl),
|
||||
)
|
||||
downloadParams.append(
|
||||
{
|
||||
"url": sample.modelUrl,
|
||||
"saveTo": modelFilePath,
|
||||
"position": line_num,
|
||||
}
|
||||
)
|
||||
slotInfo.modelFile = modelFilePath
|
||||
line_num += 1
|
||||
|
||||
if (
|
||||
initSampleId[1] is True
|
||||
and hasattr(sample, "indexUrl")
|
||||
and sample.indexUrl != ""
|
||||
):
|
||||
indexPath = os.path.join(
|
||||
slotDir,
|
||||
os.path.basename(sample.indexUrl),
|
||||
)
|
||||
downloadParams.append(
|
||||
{
|
||||
"url": sample.indexUrl,
|
||||
"saveTo": indexPath,
|
||||
"position": line_num,
|
||||
}
|
||||
)
|
||||
slotInfo.indexFile = indexPath
|
||||
line_num += 1
|
||||
if hasattr(sample, "icon") and sample.icon != "":
|
||||
iconPath = os.path.join(
|
||||
slotDir,
|
||||
os.path.basename(sample.icon),
|
||||
)
|
||||
downloadParams.append(
|
||||
{
|
||||
"url": sample.icon,
|
||||
"saveTo": iconPath,
|
||||
"position": line_num,
|
||||
}
|
||||
)
|
||||
slotInfo.iconFile = iconPath
|
||||
line_num += 1
|
||||
|
||||
slotInfo.sampleId = sample.id
|
||||
slotInfo.credit = sample.credit
|
||||
slotInfo.description = sample.description
|
||||
slotInfo.name = sample.name
|
||||
slotInfo.termsOfUseUrl = sample.termsOfUseUrl
|
||||
slotInfo.defaultTune = 0
|
||||
slotInfo.defaultIndexRatio = 1
|
||||
slotInfo.defaultProtect = 0.5
|
||||
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
|
||||
|
||||
# この時点ではまだファイルはダウンロードされていない
|
||||
# if slotInfo.isONNX:
|
||||
# _setInfoByONNX(slotInfo)
|
||||
# else:
|
||||
# _setInfoByPytorch(slotInfo)
|
||||
|
||||
json.dump(asdict(slotInfo), open(os.path.join(slotDir, "params.json"), "w"))
|
||||
slot_count += 1
|
||||
|
||||
# ダウンロード
|
||||
print("[Voice Changer] Downloading model files...")
|
||||
with ThreadPoolExecutor() as pool:
|
||||
pool.map(download, downloadParams)
|
||||
|
||||
# メタデータ作成
|
||||
print("[Voice Changer] Generating metadata...")
|
||||
for slotId in range(slot_count):
|
||||
slotDir = os.path.join(model_dir, RVC_MODEL_DIRNAME, str(slotId))
|
||||
jsonDict = json.load(open(os.path.join(slotDir, "params.json")))
|
||||
slotInfo = ModelSlot(**jsonDict)
|
||||
if slotInfo.isONNX:
|
||||
_setInfoByONNX(slotInfo)
|
||||
else:
|
||||
_setInfoByPytorch(slotInfo)
|
||||
json.dump(asdict(slotInfo), open(os.path.join(slotDir, "params.json"), "w"))
|
||||
|
||||
|
||||
def downloadModelFiles(sampleInfo: RVCModelSample, useIndex: bool = True):
|
||||
downloadParams = []
|
||||
|
||||
modelPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.modelUrl))
|
||||
downloadParams.append(
|
||||
{
|
||||
"url": sampleInfo.modelUrl,
|
||||
"saveTo": modelPath,
|
||||
"position": 0,
|
||||
}
|
||||
)
|
||||
|
||||
indexPath = None
|
||||
if (
|
||||
useIndex is True
|
||||
and hasattr(sampleInfo, "indexUrl")
|
||||
and sampleInfo.indexUrl != ""
|
||||
):
|
||||
print("[Voice Changer] Download sample with index.")
|
||||
indexPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.indexUrl))
|
||||
downloadParams.append(
|
||||
{
|
||||
"url": sampleInfo.indexUrl,
|
||||
"saveTo": indexPath,
|
||||
"position": 1,
|
||||
}
|
||||
)
|
||||
|
||||
iconPath = None
|
||||
if hasattr(sampleInfo, "icon") and sampleInfo.icon != "":
|
||||
iconPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.icon))
|
||||
downloadParams.append(
|
||||
{
|
||||
"url": sampleInfo.icon,
|
||||
"saveTo": iconPath,
|
||||
"position": 2,
|
||||
}
|
||||
)
|
||||
|
||||
print("[Voice Changer] Downloading model files...", end="")
|
||||
with ThreadPoolExecutor() as pool:
|
||||
pool.map(download_no_tqdm, downloadParams)
|
||||
print("")
|
||||
return modelPath, indexPath, iconPath
|
||||
# print("[Voice Changer] Downloading model files...", end="")
|
||||
# with ThreadPoolExecutor() as pool:
|
||||
# pool.map(download_no_tqdm, downloadParams)
|
||||
# print("")
|
||||
# return modelPath, indexPath, iconPath
|
||||
|
@ -14,7 +14,7 @@ class RVCInferencer(Inferencer):
|
||||
|
||||
dev = DeviceManager.get_instance().getDevice(gpu)
|
||||
isHalf = DeviceManager.get_instance().halfPrecisionAvailable(gpu)
|
||||
|
||||
print("LLLLLLLLLLLLLLLOOOOOOOOOOOOOOOOOOOOOOO", file)
|
||||
cpt = torch.load(file, map_location="cpu")
|
||||
model = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=isHalf)
|
||||
|
||||
|
@ -4,7 +4,7 @@ import torch
|
||||
from onnxsim import simplify
|
||||
import onnx
|
||||
from const import TMP_DIR, EnumInferenceTypes
|
||||
from voice_changer.RVC.ModelSlot import ModelSlot
|
||||
from data.ModelSlot import ModelSlot
|
||||
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
|
||||
from voice_changer.RVC.onnxExporter.SynthesizerTrnMs256NSFsid_ONNX import (
|
||||
SynthesizerTrnMs256NSFsid_ONNX,
|
||||
@ -30,9 +30,7 @@ def export2onnx(gpu: int, modelSlot: ModelSlot):
|
||||
modelFile = modelSlot.modelFile
|
||||
|
||||
output_file = os.path.splitext(os.path.basename(modelFile))[0] + ".onnx"
|
||||
output_file_simple = (
|
||||
os.path.splitext(os.path.basename(modelFile))[0] + "_simple.onnx"
|
||||
)
|
||||
output_file_simple = os.path.splitext(os.path.basename(modelFile))[0] + "_simple.onnx"
|
||||
output_path = os.path.join(TMP_DIR, output_file)
|
||||
output_path_simple = os.path.join(TMP_DIR, output_file_simple)
|
||||
metadata = {
|
||||
@ -52,9 +50,7 @@ def export2onnx(gpu: int, modelSlot: ModelSlot):
|
||||
if gpuMomory > 0:
|
||||
_export2onnx(modelFile, output_path, output_path_simple, True, metadata)
|
||||
else:
|
||||
print(
|
||||
"[Voice Changer] Warning!!! onnx export with float32. maybe size is doubled."
|
||||
)
|
||||
print("[Voice Changer] Warning!!! onnx export with float32. maybe size is doubled.")
|
||||
_export2onnx(modelFile, output_path, output_path_simple, False, metadata)
|
||||
return output_file_simple
|
||||
|
||||
|
@ -1,8 +1,8 @@
|
||||
import os
|
||||
import traceback
|
||||
import faiss
|
||||
from voice_changer.RVC.RVCSlotInfo import RVCSlotInfo
|
||||
|
||||
from voice_changer.RVC.ModelSlot import ModelSlot
|
||||
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
|
||||
from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager
|
||||
from voice_changer.RVC.inferencer.InferencerManager import InferencerManager
|
||||
@ -10,15 +10,13 @@ from voice_changer.RVC.pipeline.Pipeline import Pipeline
|
||||
from voice_changer.RVC.pitchExtractor.PitchExtractorManager import PitchExtractorManager
|
||||
|
||||
|
||||
def createPipeline(modelSlot: ModelSlot, gpu: int, f0Detector: str):
|
||||
def createPipeline(slotInfo: RVCSlotInfo, gpu: int, f0Detector: str):
|
||||
dev = DeviceManager.get_instance().getDevice(gpu)
|
||||
half = DeviceManager.get_instance().halfPrecisionAvailable(gpu)
|
||||
|
||||
# Inferencer 生成
|
||||
try:
|
||||
inferencer = InferencerManager.getInferencer(
|
||||
modelSlot.modelType, modelSlot.modelFile, gpu
|
||||
)
|
||||
inferencer = InferencerManager.getInferencer(slotInfo.modelType, slotInfo.modelFile, gpu)
|
||||
except Exception as e:
|
||||
print("[Voice Changer] exception! loading inferencer", e)
|
||||
traceback.print_exc()
|
||||
@ -26,7 +24,7 @@ def createPipeline(modelSlot: ModelSlot, gpu: int, f0Detector: str):
|
||||
# Embedder 生成
|
||||
try:
|
||||
embedder = EmbedderManager.getEmbedder(
|
||||
modelSlot.embedder,
|
||||
slotInfo.embedder,
|
||||
# emmbedderFilename,
|
||||
half,
|
||||
dev,
|
||||
@ -39,14 +37,14 @@ def createPipeline(modelSlot: ModelSlot, gpu: int, f0Detector: str):
|
||||
pitchExtractor = PitchExtractorManager.getPitchExtractor(f0Detector)
|
||||
|
||||
# index, feature
|
||||
index = _loadIndex(modelSlot)
|
||||
index = _loadIndex(slotInfo)
|
||||
|
||||
pipeline = Pipeline(
|
||||
embedder,
|
||||
inferencer,
|
||||
pitchExtractor,
|
||||
index,
|
||||
modelSlot.samplingRate,
|
||||
slotInfo.samplingRate,
|
||||
dev,
|
||||
half,
|
||||
)
|
||||
@ -54,21 +52,21 @@ def createPipeline(modelSlot: ModelSlot, gpu: int, f0Detector: str):
|
||||
return pipeline
|
||||
|
||||
|
||||
def _loadIndex(modelSlot: ModelSlot):
|
||||
def _loadIndex(slotInfo: RVCSlotInfo):
|
||||
# Indexのロード
|
||||
print("[Voice Changer] Loading index...")
|
||||
# ファイル指定がない場合はNone
|
||||
if modelSlot.indexFile is None:
|
||||
if slotInfo.indexFile is None:
|
||||
print("[Voice Changer] Index is None, not used")
|
||||
return None
|
||||
|
||||
# ファイル指定があってもファイルがない場合はNone
|
||||
if os.path.exists(modelSlot.indexFile) is not True:
|
||||
if os.path.exists(slotInfo.indexFile) is not True:
|
||||
return None
|
||||
|
||||
try:
|
||||
print("Try loading...", modelSlot.indexFile)
|
||||
index = faiss.read_index(modelSlot.indexFile)
|
||||
print("Try loading...", slotInfo.indexFile)
|
||||
index = faiss.read_index(slotInfo.indexFile)
|
||||
except:
|
||||
print("[Voice Changer] load index failed. Use no index.")
|
||||
traceback.print_exc()
|
||||
|
@ -28,7 +28,7 @@ class DioPitchExtractor(PitchExtractor):
|
||||
f0_floor=f0_min,
|
||||
f0_ceil=f0_max,
|
||||
channels_in_octave=2,
|
||||
frame_period=10,
|
||||
frame_period=10, # 10ms, <= 160(window) / 16000(samplerate) * 1000(ms)
|
||||
)
|
||||
f0 = pyworld.stonemask(audio.astype(np.double), _f0, t, sr)
|
||||
f0 = np.pad(f0.astype("float"), (start_frame, n_frames - len(f0) - start_frame))
|
||||
|
@ -27,7 +27,7 @@ class HarvestPitchExtractor(PitchExtractor):
|
||||
audio.astype(np.double),
|
||||
fs=sr,
|
||||
f0_ceil=f0_max,
|
||||
frame_period=10,
|
||||
frame_period=10, # 10ms, <= 160(window) / 16000(samplerate) * 1000(ms)
|
||||
)
|
||||
f0 = pyworld.stonemask(audio.astype(np.double), f0, t, sr)
|
||||
f0 = signal.medfilt(f0, 3)
|
||||
|
@ -1,21 +1,21 @@
|
||||
from typing import Any, Union, cast
|
||||
|
||||
import socketio
|
||||
from const import TMP_DIR, ModelType
|
||||
from const import TMP_DIR, VoiceChangerType
|
||||
import torch
|
||||
import os
|
||||
import traceback
|
||||
import numpy as np
|
||||
from dataclasses import dataclass, asdict, field
|
||||
import resampy
|
||||
from data.ModelSlot import loadSlotInfo
|
||||
|
||||
|
||||
from voice_changer.IORecorder import IORecorder
|
||||
from voice_changer.Local.AudioDeviceList import ServerAudioDevice, list_audio_device
|
||||
from voice_changer.utils.LoadModelParams import LoadModelParams
|
||||
|
||||
from voice_changer.utils.Timer import Timer
|
||||
from voice_changer.utils.VoiceChangerModel import VoiceChangerModel, AudioInOut
|
||||
from voice_changer.utils.VoiceChangerModel import AudioInOut
|
||||
from Exceptions import (
|
||||
DeviceCannotSupportHalfPrecisionException,
|
||||
DeviceChangingException,
|
||||
@ -26,15 +26,22 @@ from Exceptions import (
|
||||
VoiceChangerIsNotSelectedException,
|
||||
)
|
||||
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
||||
import threading
|
||||
import time
|
||||
import sounddevice as sd
|
||||
import librosa
|
||||
|
||||
# import threading
|
||||
# import time
|
||||
# import sounddevice as sd
|
||||
# import librosa
|
||||
import json
|
||||
|
||||
STREAM_INPUT_FILE = os.path.join(TMP_DIR, "in.wav")
|
||||
STREAM_OUTPUT_FILE = os.path.join(TMP_DIR, "out.wav")
|
||||
|
||||
|
||||
@dataclass
|
||||
class SlotInfo:
|
||||
voiceChangerType: VoiceChangerType | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class VoiceChangerSettings:
|
||||
inputSampleRate: int = 48000 # 48000 or 24000
|
||||
@ -45,9 +52,7 @@ class VoiceChangerSettings:
|
||||
|
||||
recordIO: int = 0 # 0:off, 1:on
|
||||
serverAudioInputDevices: list[ServerAudioDevice] = field(default_factory=lambda: [])
|
||||
serverAudioOutputDevices: list[ServerAudioDevice] = field(
|
||||
default_factory=lambda: []
|
||||
)
|
||||
serverAudioOutputDevices: list[ServerAudioDevice] = field(default_factory=lambda: [])
|
||||
|
||||
enableServerAudio: int = 0 # 0:off, 1:on
|
||||
serverAudioStated: int = 0 # 0:off, 1:on
|
||||
@ -93,160 +98,131 @@ class VoiceChangerSettings:
|
||||
|
||||
|
||||
class VoiceChanger:
|
||||
settings: VoiceChangerSettings = VoiceChangerSettings()
|
||||
voiceChanger: VoiceChangerModel | None = None
|
||||
ioRecorder: IORecorder
|
||||
sola_buffer: AudioInOut
|
||||
namespace: socketio.AsyncNamespace | None = None
|
||||
# settings: VoiceChangerSettings = VoiceChangerSettings()
|
||||
# voiceChangerModel: VoiceChangerModel | None = None
|
||||
#
|
||||
#
|
||||
# namespace: socketio.AsyncNamespace | None = None
|
||||
|
||||
localPerformanceShowTime = 0.0
|
||||
# localPerformanceShowTime = 0.0
|
||||
|
||||
emitTo = None
|
||||
# emitTo = None
|
||||
|
||||
def audio_callback(
|
||||
self, indata: np.ndarray, outdata: np.ndarray, frames, times, status
|
||||
):
|
||||
try:
|
||||
indata = indata * self.settings.serverInputAudioGain
|
||||
with Timer("all_inference_time") as t:
|
||||
unpackedData = librosa.to_mono(indata.T) * 32768.0
|
||||
out_wav, times = self.on_request(unpackedData)
|
||||
outputChunnels = outdata.shape[1]
|
||||
outdata[:] = (
|
||||
np.repeat(out_wav, outputChunnels).reshape(-1, outputChunnels)
|
||||
/ 32768.0
|
||||
)
|
||||
outdata[:] = outdata * self.settings.serverOutputAudioGain
|
||||
all_inference_time = t.secs
|
||||
performance = [all_inference_time] + times
|
||||
if self.emitTo is not None:
|
||||
self.emitTo(performance)
|
||||
self.settings.performance = [round(x * 1000) for x in performance]
|
||||
except Exception as e:
|
||||
print("[Voice Changer] ex:", e)
|
||||
# def audio_callback(self, indata: np.ndarray, outdata: np.ndarray, frames, times, status):
|
||||
# try:
|
||||
# indata = indata * self.settings.serverInputAudioGain
|
||||
# with Timer("all_inference_time") as t:
|
||||
# unpackedData = librosa.to_mono(indata.T) * 32768.0
|
||||
# out_wav, times = self.on_request(unpackedData)
|
||||
# outputChunnels = outdata.shape[1]
|
||||
# outdata[:] = np.repeat(out_wav, outputChunnels).reshape(-1, outputChunnels) / 32768.0
|
||||
# outdata[:] = outdata * self.settings.serverOutputAudioGain
|
||||
# all_inference_time = t.secs
|
||||
# performance = [all_inference_time] + times
|
||||
# if self.emitTo is not None:
|
||||
# self.emitTo(performance)
|
||||
# self.settings.performance = [round(x * 1000) for x in performance]
|
||||
# except Exception as e:
|
||||
# print("[Voice Changer] ex:", e)
|
||||
|
||||
def getServerAudioDevice(
|
||||
self, audioDeviceList: list[ServerAudioDevice], index: int
|
||||
):
|
||||
serverAudioDevice = [x for x in audioDeviceList if x.index == index]
|
||||
if len(serverAudioDevice) > 0:
|
||||
return serverAudioDevice[0]
|
||||
else:
|
||||
return None
|
||||
# def getServerAudioDevice(self, audioDeviceList: list[ServerAudioDevice], index: int):
|
||||
# serverAudioDevice = [x for x in audioDeviceList if x.index == index]
|
||||
# if len(serverAudioDevice) > 0:
|
||||
# return serverAudioDevice[0]
|
||||
# else:
|
||||
# return None
|
||||
|
||||
def serverLocal(self, _vc):
|
||||
vc: VoiceChanger = _vc
|
||||
# def serverLocal(self, _vc):
|
||||
# vc: VoiceChanger = _vc
|
||||
|
||||
currentInputDeviceId = -1
|
||||
currentModelSamplingRate = -1
|
||||
currentOutputDeviceId = -1
|
||||
currentInputChunkNum = -1
|
||||
while True:
|
||||
if (
|
||||
vc.settings.serverAudioStated == 0
|
||||
or vc.settings.serverInputDeviceId == -1
|
||||
or vc.voiceChanger is None
|
||||
):
|
||||
vc.settings.inputSampleRate = 48000
|
||||
time.sleep(2)
|
||||
else:
|
||||
sd._terminate()
|
||||
sd._initialize()
|
||||
# currentInputDeviceId = -1
|
||||
# currentModelSamplingRate = -1
|
||||
# currentOutputDeviceId = -1
|
||||
# currentInputChunkNum = -1
|
||||
# while True:
|
||||
# if vc.settings.serverAudioStated == 0 or vc.settings.serverInputDeviceId == -1 or vc.voiceChanger is None:
|
||||
# vc.settings.inputSampleRate = 48000
|
||||
# time.sleep(2)
|
||||
# else:
|
||||
# sd._terminate()
|
||||
# sd._initialize()
|
||||
|
||||
sd.default.device[0] = vc.settings.serverInputDeviceId
|
||||
currentInputDeviceId = vc.settings.serverInputDeviceId
|
||||
sd.default.device[1] = vc.settings.serverOutputDeviceId
|
||||
currentOutputDeviceId = vc.settings.serverOutputDeviceId
|
||||
# sd.default.device[0] = vc.settings.serverInputDeviceId
|
||||
# currentInputDeviceId = vc.settings.serverInputDeviceId
|
||||
# sd.default.device[1] = vc.settings.serverOutputDeviceId
|
||||
# currentOutputDeviceId = vc.settings.serverOutputDeviceId
|
||||
|
||||
currentInputChannelNum = vc.settings.serverAudioInputDevices
|
||||
# currentInputChannelNum = vc.settings.serverAudioInputDevices
|
||||
|
||||
serverInputAudioDevice = self.getServerAudioDevice(
|
||||
vc.settings.serverAudioInputDevices, currentInputDeviceId
|
||||
)
|
||||
serverOutputAudioDevice = self.getServerAudioDevice(
|
||||
vc.settings.serverAudioOutputDevices, currentOutputDeviceId
|
||||
)
|
||||
print(serverInputAudioDevice, serverOutputAudioDevice)
|
||||
if serverInputAudioDevice is None or serverOutputAudioDevice is None:
|
||||
time.sleep(2)
|
||||
print("serverInputAudioDevice or serverOutputAudioDevice is None")
|
||||
continue
|
||||
# serverInputAudioDevice = self.getServerAudioDevice(vc.settings.serverAudioInputDevices, currentInputDeviceId)
|
||||
# serverOutputAudioDevice = self.getServerAudioDevice(vc.settings.serverAudioOutputDevices, currentOutputDeviceId)
|
||||
# print(serverInputAudioDevice, serverOutputAudioDevice)
|
||||
# if serverInputAudioDevice is None or serverOutputAudioDevice is None:
|
||||
# time.sleep(2)
|
||||
# print("serverInputAudioDevice or serverOutputAudioDevice is None")
|
||||
# continue
|
||||
|
||||
currentInputChannelNum = serverInputAudioDevice.maxInputChannels
|
||||
currentOutputChannelNum = serverOutputAudioDevice.maxOutputChannels
|
||||
# currentInputChannelNum = serverInputAudioDevice.maxInputChannels
|
||||
# currentOutputChannelNum = serverOutputAudioDevice.maxOutputChannels
|
||||
|
||||
currentInputChunkNum = vc.settings.serverReadChunkSize
|
||||
block_frame = currentInputChunkNum * 128
|
||||
# currentInputChunkNum = vc.settings.serverReadChunkSize
|
||||
# block_frame = currentInputChunkNum * 128
|
||||
|
||||
# sample rate precheck(alsa cannot use 40000?)
|
||||
try:
|
||||
currentModelSamplingRate = (
|
||||
self.voiceChanger.get_processing_sampling_rate()
|
||||
)
|
||||
except Exception as e:
|
||||
print("[Voice Changer] ex: get_processing_sampling_rate", e)
|
||||
continue
|
||||
try:
|
||||
with sd.Stream(
|
||||
callback=self.audio_callback,
|
||||
blocksize=block_frame,
|
||||
samplerate=currentModelSamplingRate,
|
||||
dtype="float32",
|
||||
channels=[currentInputChannelNum, currentOutputChannelNum],
|
||||
):
|
||||
pass
|
||||
vc.settings.serverInputAudioSampleRate = currentModelSamplingRate
|
||||
vc.settings.inputSampleRate = currentModelSamplingRate
|
||||
print(
|
||||
f"[Voice Changer] sample rate {vc.settings.serverInputAudioSampleRate}"
|
||||
)
|
||||
except Exception as e:
|
||||
print(
|
||||
"[Voice Changer] ex: fallback to device default samplerate",
|
||||
e,
|
||||
)
|
||||
vc.settings.serverInputAudioSampleRate = (
|
||||
serverInputAudioDevice.default_samplerate
|
||||
)
|
||||
vc.settings.inputSampleRate = vc.settings.serverInputAudioSampleRate
|
||||
# # sample rate precheck(alsa cannot use 40000?)
|
||||
# try:
|
||||
# currentModelSamplingRate = self.voiceChanger.get_processing_sampling_rate()
|
||||
# except Exception as e:
|
||||
# print("[Voice Changer] ex: get_processing_sampling_rate", e)
|
||||
# continue
|
||||
# try:
|
||||
# with sd.Stream(
|
||||
# callback=self.audio_callback,
|
||||
# blocksize=block_frame,
|
||||
# samplerate=currentModelSamplingRate,
|
||||
# dtype="float32",
|
||||
# channels=[currentInputChannelNum, currentOutputChannelNum],
|
||||
# ):
|
||||
# pass
|
||||
# vc.settings.serverInputAudioSampleRate = currentModelSamplingRate
|
||||
# vc.settings.inputSampleRate = currentModelSamplingRate
|
||||
# print(f"[Voice Changer] sample rate {vc.settings.serverInputAudioSampleRate}")
|
||||
# except Exception as e:
|
||||
# print(
|
||||
# "[Voice Changer] ex: fallback to device default samplerate",
|
||||
# e,
|
||||
# )
|
||||
# vc.settings.serverInputAudioSampleRate = serverInputAudioDevice.default_samplerate
|
||||
# vc.settings.inputSampleRate = vc.settings.serverInputAudioSampleRate
|
||||
|
||||
# main loop
|
||||
try:
|
||||
with sd.Stream(
|
||||
callback=self.audio_callback,
|
||||
blocksize=block_frame,
|
||||
samplerate=vc.settings.serverInputAudioSampleRate,
|
||||
dtype="float32",
|
||||
channels=[currentInputChannelNum, currentOutputChannelNum],
|
||||
):
|
||||
while (
|
||||
vc.settings.serverAudioStated == 1
|
||||
and currentInputDeviceId == vc.settings.serverInputDeviceId
|
||||
and currentOutputDeviceId
|
||||
== vc.settings.serverOutputDeviceId
|
||||
and currentModelSamplingRate
|
||||
== self.voiceChanger.get_processing_sampling_rate()
|
||||
and currentInputChunkNum == vc.settings.serverReadChunkSize
|
||||
):
|
||||
time.sleep(2)
|
||||
print(
|
||||
"[Voice Changer] server audio",
|
||||
self.settings.performance,
|
||||
)
|
||||
print(
|
||||
"[Voice Changer] info:",
|
||||
vc.settings.serverAudioStated,
|
||||
currentInputDeviceId,
|
||||
currentOutputDeviceId,
|
||||
vc.settings.serverInputAudioSampleRate,
|
||||
currentInputChunkNum,
|
||||
)
|
||||
# # main loop
|
||||
# try:
|
||||
# with sd.Stream(
|
||||
# callback=self.audio_callback,
|
||||
# blocksize=block_frame,
|
||||
# samplerate=vc.settings.serverInputAudioSampleRate,
|
||||
# dtype="float32",
|
||||
# channels=[currentInputChannelNum, currentOutputChannelNum],
|
||||
# ):
|
||||
# while vc.settings.serverAudioStated == 1 and currentInputDeviceId == vc.settings.serverInputDeviceId and currentOutputDeviceId == vc.settings.serverOutputDeviceId and currentModelSamplingRate == self.voiceChanger.get_processing_sampling_rate() and currentInputChunkNum == vc.settings.serverReadChunkSize:
|
||||
# time.sleep(2)
|
||||
# print(
|
||||
# "[Voice Changer] server audio",
|
||||
# self.settings.performance,
|
||||
# )
|
||||
# print(
|
||||
# "[Voice Changer] info:",
|
||||
# vc.settings.serverAudioStated,
|
||||
# currentInputDeviceId,
|
||||
# currentOutputDeviceId,
|
||||
# vc.settings.serverInputAudioSampleRate,
|
||||
# currentInputChunkNum,
|
||||
# )
|
||||
|
||||
except Exception as e:
|
||||
print("[Voice Changer] ex:", e)
|
||||
time.sleep(2)
|
||||
# except Exception as e:
|
||||
# print("[Voice Changer] ex:", e)
|
||||
# time.sleep(2)
|
||||
|
||||
def __init__(self, params: VoiceChangerParams):
|
||||
def __init__(self, params: VoiceChangerParams, slotIndex: int):
|
||||
# 初期化
|
||||
self.settings = VoiceChangerSettings()
|
||||
self.onnx_session = None
|
||||
@ -255,147 +231,80 @@ class VoiceChanger:
|
||||
self.currentCrossFadeOverlapSize = 0 # setting
|
||||
self.crossfadeSize = 0 # calculated
|
||||
|
||||
self.voiceChanger = None
|
||||
self.modelType: ModelType | None = None
|
||||
self.voiceChangerModel = None
|
||||
self.modelType: VoiceChangerType | None = None
|
||||
self.params = params
|
||||
self.gpu_num = torch.cuda.device_count()
|
||||
self.prev_audio = np.zeros(4096)
|
||||
self.mps_enabled: bool = (
|
||||
getattr(torch.backends, "mps", None) is not None
|
||||
and torch.backends.mps.is_available()
|
||||
)
|
||||
self.ioRecorder: IORecorder | None = None
|
||||
self.sola_buffer: AudioInOut | None = None
|
||||
|
||||
audioinput, audiooutput = list_audio_device()
|
||||
self.settings.serverAudioInputDevices = audioinput
|
||||
self.settings.serverAudioOutputDevices = audiooutput
|
||||
|
||||
thread = threading.Thread(target=self.serverLocal, args=(self,))
|
||||
thread.start()
|
||||
print(
|
||||
f"VoiceChanger Initialized (GPU_NUM:{self.gpu_num}, mps_enabled:{self.mps_enabled})"
|
||||
)
|
||||
|
||||
def switchModelType(self, modelType: ModelType):
|
||||
try:
|
||||
if self.voiceChanger is not None:
|
||||
# return {"status": "ERROR", "msg": "vc is already selected. currently re-select is not implemented"}
|
||||
del self.voiceChanger
|
||||
self.voiceChanger = None
|
||||
|
||||
self.modelType = modelType
|
||||
if self.modelType == "MMVCv15":
|
||||
from voice_changer.MMVCv15.MMVCv15 import MMVCv15
|
||||
|
||||
self.voiceChanger = MMVCv15() # type: ignore
|
||||
elif self.modelType == "MMVCv13":
|
||||
from voice_changer.MMVCv13.MMVCv13 import MMVCv13
|
||||
|
||||
self.voiceChanger = MMVCv13()
|
||||
elif self.modelType == "so-vits-svc-40v2":
|
||||
from voice_changer.SoVitsSvc40v2.SoVitsSvc40v2 import SoVitsSvc40v2
|
||||
|
||||
self.voiceChanger = SoVitsSvc40v2(self.params)
|
||||
elif (
|
||||
self.modelType == "so-vits-svc-40"
|
||||
or self.modelType == "so-vits-svc-40_c"
|
||||
):
|
||||
from voice_changer.SoVitsSvc40.SoVitsSvc40 import SoVitsSvc40
|
||||
|
||||
self.voiceChanger = SoVitsSvc40(self.params)
|
||||
elif self.modelType == "DDSP-SVC":
|
||||
from voice_changer.DDSP_SVC.DDSP_SVC import DDSP_SVC
|
||||
|
||||
self.voiceChanger = DDSP_SVC(self.params)
|
||||
elif self.modelType == "RVC":
|
||||
self.slotIndex = slotIndex
|
||||
self.slotInfo = loadSlotInfo(params.model_dir, self.slotIndex)
|
||||
if self.slotInfo.voiceChangerType is None:
|
||||
print(f"[Voice Changer] Voice Changer Type is None for slot {slotIndex} is not found.")
|
||||
return
|
||||
elif self.slotInfo.voiceChangerType == "RVC":
|
||||
from voice_changer.RVC.RVC import RVC
|
||||
|
||||
self.voiceChanger = RVC(self.params)
|
||||
self.voiceChangerModel = RVC(self.slotIndex, self.params)
|
||||
else:
|
||||
from voice_changer.MMVCv13.MMVCv13 import MMVCv13
|
||||
print(f"[Voice Changer] unknwon voice changer type. {self.slotInfo.voiceChangerType}")
|
||||
|
||||
self.voiceChanger = MMVCv13()
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print(traceback.format_exc())
|
||||
return {"status": "OK", "msg": "vc is switched."}
|
||||
# thread = threading.Thread(target=self.serverLocal, args=(self,))
|
||||
# thread.start()
|
||||
|
||||
def getModelType(self):
|
||||
if self.modelType is not None:
|
||||
return {"status": "OK", "vc": self.modelType}
|
||||
else:
|
||||
return {"status": "OK", "vc": "none"}
|
||||
|
||||
def loadModel(self, props: LoadModelParams):
|
||||
try:
|
||||
if self.voiceChanger is None:
|
||||
raise VoiceChangerIsNotSelectedException(
|
||||
"Voice Changer is not selected."
|
||||
)
|
||||
return self.voiceChanger.loadModel(props)
|
||||
except Exception as e:
|
||||
print(traceback.format_exc())
|
||||
print("[Voice Changer] Model Load Error! Check your model is valid.", e)
|
||||
return {"status": "NG"}
|
||||
def prepareModel(self):
|
||||
self.voiceChangerModel.prepareModel()
|
||||
|
||||
def get_info(self):
|
||||
data = asdict(self.settings)
|
||||
if self.voiceChanger is not None:
|
||||
data.update(self.voiceChanger.get_info())
|
||||
if self.voiceChangerModel is not None:
|
||||
data.update(self.voiceChangerModel.get_info())
|
||||
|
||||
devCount = torch.cuda.device_count()
|
||||
gpus = []
|
||||
for id in range(devCount):
|
||||
name = torch.cuda.get_device_name(id)
|
||||
memory = torch.cuda.get_device_properties(id).total_memory
|
||||
gpu = {"id": id, "name": name, "memory": memory}
|
||||
gpus.append(gpu)
|
||||
data["gpus"] = gpus
|
||||
return data
|
||||
|
||||
def get_performance(self):
|
||||
return self.settings.performance
|
||||
|
||||
def update_settings(self, key: str, val: Any):
|
||||
if self.voiceChanger is None:
|
||||
if self.voiceChangerModel is None:
|
||||
print("[Voice Changer] Voice Changer is not selected.")
|
||||
return self.get_info()
|
||||
return
|
||||
|
||||
if key in self.settings.intData:
|
||||
setattr(self.settings, key, int(val))
|
||||
if key == "crossFadeOffsetRate" or key == "crossFadeEndRate":
|
||||
self.crossfadeSize = 0
|
||||
if key == "recordIO" and val == 1:
|
||||
if hasattr(self, "ioRecorder"):
|
||||
if self.ioRecorder is not None:
|
||||
self.ioRecorder.close()
|
||||
self.ioRecorder = IORecorder(
|
||||
STREAM_INPUT_FILE, STREAM_OUTPUT_FILE, self.settings.inputSampleRate
|
||||
)
|
||||
self.ioRecorder = IORecorder(STREAM_INPUT_FILE, STREAM_OUTPUT_FILE, self.settings.inputSampleRate)
|
||||
if key == "recordIO" and val == 0:
|
||||
if hasattr(self, "ioRecorder"):
|
||||
if self.ioRecorder is not None:
|
||||
self.ioRecorder.close()
|
||||
self.ioRecorder = None
|
||||
pass
|
||||
if key == "recordIO" and val == 2:
|
||||
if hasattr(self, "ioRecorder"):
|
||||
if self.ioRecorder is not None:
|
||||
self.ioRecorder.close()
|
||||
self.ioRecorder = None
|
||||
|
||||
elif key in self.settings.floatData:
|
||||
setattr(self.settings, key, float(val))
|
||||
elif key in self.settings.strData:
|
||||
setattr(self.settings, key, str(val))
|
||||
else:
|
||||
ret = self.voiceChanger.update_settings(key, val)
|
||||
ret = self.voiceChangerModel.update_settings(key, val)
|
||||
if ret is False:
|
||||
pass
|
||||
# print(f"({key} is not mutable variable or unknown variable)")
|
||||
return self.get_info()
|
||||
|
||||
def _generate_strength(self, crossfadeSize: int):
|
||||
if (
|
||||
self.crossfadeSize != crossfadeSize
|
||||
or self.currentCrossFadeOffsetRate != self.settings.crossFadeOffsetRate
|
||||
or self.currentCrossFadeEndRate != self.settings.crossFadeEndRate
|
||||
or self.currentCrossFadeOverlapSize != self.settings.crossFadeOverlapSize
|
||||
):
|
||||
if self.crossfadeSize != crossfadeSize or self.currentCrossFadeOffsetRate != self.settings.crossFadeOffsetRate or self.currentCrossFadeEndRate != self.settings.crossFadeEndRate or self.currentCrossFadeOverlapSize != self.settings.crossFadeOverlapSize:
|
||||
self.crossfadeSize = crossfadeSize
|
||||
self.currentCrossFadeOffsetRate = self.settings.crossFadeOffsetRate
|
||||
self.currentCrossFadeEndRate = self.settings.crossFadeEndRate
|
||||
@ -424,32 +333,25 @@ class VoiceChanger:
|
||||
]
|
||||
)
|
||||
|
||||
print(
|
||||
f"Generated Strengths: for prev:{self.np_prev_strength.shape}, for cur:{self.np_cur_strength.shape}"
|
||||
)
|
||||
print(f"Generated Strengths: for prev:{self.np_prev_strength.shape}, for cur:{self.np_cur_strength.shape}")
|
||||
|
||||
# ひとつ前の結果とサイズが変わるため、記録は消去する。
|
||||
if hasattr(self, "np_prev_audio1") is True:
|
||||
delattr(self, "np_prev_audio1")
|
||||
if hasattr(self, "sola_buffer") is True:
|
||||
if self.sola_buffer is not None:
|
||||
del self.sola_buffer
|
||||
self.sola_buffer = None
|
||||
|
||||
# receivedData: tuple of short
|
||||
def on_request(
|
||||
self, receivedData: AudioInOut
|
||||
) -> tuple[AudioInOut, list[Union[int, float]]]:
|
||||
def on_request(self, receivedData: AudioInOut) -> tuple[AudioInOut, list[Union[int, float]]]:
|
||||
return self.on_request_sola(receivedData)
|
||||
|
||||
def on_request_sola(
|
||||
self, receivedData: AudioInOut
|
||||
) -> tuple[AudioInOut, list[Union[int, float]]]:
|
||||
def on_request_sola(self, receivedData: AudioInOut) -> tuple[AudioInOut, list[Union[int, float]]]:
|
||||
try:
|
||||
if self.voiceChanger is None:
|
||||
raise VoiceChangerIsNotSelectedException(
|
||||
"Voice Changer is not selected."
|
||||
)
|
||||
if self.voiceChangerModel is None:
|
||||
raise VoiceChangerIsNotSelectedException("Voice Changer is not selected.")
|
||||
|
||||
processing_sampling_rate = self.voiceChanger.get_processing_sampling_rate()
|
||||
processing_sampling_rate = self.voiceChangerModel.get_processing_sampling_rate()
|
||||
# 前処理
|
||||
with Timer("pre-process") as t:
|
||||
if self.settings.inputSampleRate != processing_sampling_rate:
|
||||
@ -470,21 +372,17 @@ class VoiceChanger:
|
||||
crossfade_frame = min(self.settings.crossFadeOverlapSize, block_frame)
|
||||
self._generate_strength(crossfade_frame)
|
||||
|
||||
data = self.voiceChanger.generate_input(
|
||||
newData, block_frame, crossfade_frame, sola_search_frame
|
||||
)
|
||||
data = self.voiceChangerModel.generate_input(newData, block_frame, crossfade_frame, sola_search_frame)
|
||||
preprocess_time = t.secs
|
||||
|
||||
# 変換処理
|
||||
with Timer("main-process") as t:
|
||||
# Inference
|
||||
audio = self.voiceChanger.inference(data)
|
||||
audio = self.voiceChangerModel.inference(data)
|
||||
|
||||
if hasattr(self, "sola_buffer") is True:
|
||||
if self.sola_buffer is not None:
|
||||
np.set_printoptions(threshold=10000)
|
||||
audio_offset = -1 * (
|
||||
sola_search_frame + crossfade_frame + block_frame
|
||||
)
|
||||
audio_offset = -1 * (sola_search_frame + crossfade_frame + block_frame)
|
||||
audio = audio[audio_offset:]
|
||||
|
||||
# SOLA algorithm from https://github.com/yxlllc/DDSP-SVC, https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI
|
||||
@ -512,10 +410,7 @@ class VoiceChanger:
|
||||
print("[Voice Changer] warming up... generating sola buffer.")
|
||||
result = np.zeros(4096).astype(np.int16)
|
||||
|
||||
if (
|
||||
hasattr(self, "sola_buffer") is True
|
||||
and sola_offset < sola_search_frame
|
||||
):
|
||||
if self.sola_buffer is not None and sola_offset < sola_search_frame:
|
||||
offset = -1 * (sola_search_frame + crossfade_frame - sola_offset)
|
||||
end = -1 * (sola_search_frame - sola_offset)
|
||||
sola_buf_org = audio[offset:end]
|
||||
@ -545,9 +440,7 @@ class VoiceChanger:
|
||||
else:
|
||||
outputData = result
|
||||
|
||||
print_convert_processing(
|
||||
f" Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz"
|
||||
)
|
||||
print_convert_processing(f" Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz")
|
||||
|
||||
if receivedData.shape[0] != outputData.shape[0]:
|
||||
# print(
|
||||
@ -564,9 +457,7 @@ class VoiceChanger:
|
||||
|
||||
postprocess_time = t.secs
|
||||
|
||||
print_convert_processing(
|
||||
f" [fin] Input/Output size:{receivedData.shape[0]},{outputData.shape[0]}"
|
||||
)
|
||||
print_convert_processing(f" [fin] Input/Output size:{receivedData.shape[0]},{outputData.shape[0]}")
|
||||
perf = [preprocess_time, mainprocess_time, postprocess_time]
|
||||
return outputData, perf
|
||||
|
||||
@ -586,9 +477,7 @@ class VoiceChanger:
|
||||
print("[Voice Changer] embedder:", e)
|
||||
return np.zeros(1).astype(np.int16), [0, 0, 0]
|
||||
except VoiceChangerIsNotSelectedException:
|
||||
print(
|
||||
"[Voice Changer] Voice Changer is not selected. Wait a bit and if there is no improvement, please re-select vc."
|
||||
)
|
||||
print("[Voice Changer] Voice Changer is not selected. Wait a bit and if there is no improvement, please re-select vc.")
|
||||
return np.zeros(1).astype(np.int16), [0, 0, 0]
|
||||
except DeviceCannotSupportHalfPrecisionException:
|
||||
# RVC.pyでfallback処理をするので、ここはダミーデータ返すだけ。
|
||||
|
@ -1,35 +1,105 @@
|
||||
import numpy as np
|
||||
import threading
|
||||
from data.ModelSample import ModelSamples
|
||||
from data.ModelSlot import ModelSlots, loadSlotInfo
|
||||
from utils.downloader.SampleDownloader import downloadSample, getSampleInfos
|
||||
from voice_changer.Local.ServerDevice import ServerDevice
|
||||
from voice_changer.RVC.ModelSlotGenerator import setSlotAsRVC
|
||||
|
||||
from voice_changer.VoiceChanger import VoiceChanger
|
||||
from const import ModelType
|
||||
from voice_changer.utils.LoadModelParams import LoadModelParams
|
||||
from const import MAX_SLOT_NUM, VoiceChangerType
|
||||
from voice_changer.utils.VoiceChangerModel import AudioInOut
|
||||
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
||||
|
||||
from dataclasses import dataclass, asdict, field
|
||||
import torch
|
||||
import json
|
||||
|
||||
|
||||
@dataclass()
|
||||
class GPUInfo:
|
||||
id: int
|
||||
name: str
|
||||
memory: int
|
||||
|
||||
|
||||
@dataclass()
|
||||
class VoiceChangerManagerSettings:
|
||||
slotIndex: int
|
||||
intData: list[str] = field(default_factory=lambda: ["slotIndex"])
|
||||
|
||||
|
||||
class VoiceChangerManager(object):
|
||||
_instance = None
|
||||
voiceChanger: VoiceChanger = None
|
||||
|
||||
def __init__(self, params: VoiceChangerParams):
|
||||
self.voiceChanger: VoiceChanger = None
|
||||
self.settings: VoiceChangerManagerSettings = VoiceChangerManagerSettings(slotIndex=0)
|
||||
self.params: VoiceChangerParams = params
|
||||
|
||||
self.serverDevice = ServerDevice()
|
||||
|
||||
# スタティックな情報を収集
|
||||
self.sampleModels: list[ModelSamples] = getSampleInfos(self.params.sample_mode)
|
||||
self.gpus: list[GPUInfo] = self._get_gpuInfos()
|
||||
|
||||
def _get_gpuInfos(self):
|
||||
devCount = torch.cuda.device_count()
|
||||
gpus = []
|
||||
for id in range(devCount):
|
||||
name = torch.cuda.get_device_name(id)
|
||||
memory = torch.cuda.get_device_properties(id).total_memory
|
||||
gpu = {"id": id, "name": name, "memory": memory}
|
||||
gpus.append(gpu)
|
||||
return gpus
|
||||
|
||||
@classmethod
|
||||
def get_instance(cls, params: VoiceChangerParams):
|
||||
if cls._instance is None:
|
||||
cls._instance = cls()
|
||||
cls._instance.voiceChanger = VoiceChanger(params)
|
||||
cls._instance = cls(params)
|
||||
|
||||
gpu_num = torch.cuda.device_count()
|
||||
mps_enabled: bool = getattr(torch.backends, "mps", None) is not None and torch.backends.mps.is_available()
|
||||
print(f"VoiceChanger Initialized (GPU_NUM:{gpu_num}, mps_enabled:{mps_enabled})")
|
||||
|
||||
cls._instance.voiceChanger = VoiceChanger(params, cls._instance.settings.slotIndex)
|
||||
thread = threading.Thread(target=cls._instance.serverDevice.serverLocal, args=(cls._instance.voiceChanger,))
|
||||
thread.start()
|
||||
cls._instance.voiceChanger.prepareModel()
|
||||
return cls._instance
|
||||
|
||||
def loadModel(self, props: LoadModelParams):
|
||||
info = self.voiceChanger.loadModel(props)
|
||||
if hasattr(info, "status") and info["status"] == "NG":
|
||||
return info
|
||||
def loadModel(self, slot: int, voiceChangerType: VoiceChangerType, params: str):
|
||||
print(slot, voiceChangerType, params)
|
||||
paramDict = json.loads(params)
|
||||
if voiceChangerType == "RVC":
|
||||
if "sampleId" in paramDict and len(paramDict["sampleId"]) > 0:
|
||||
print("[Voice Canger]: Download RVC sample.")
|
||||
downloadSample(self.params.sample_mode, paramDict["sampleId"], self.params.model_dir, slot, {"useIndex": paramDict["rvcIndexDownload"]})
|
||||
else:
|
||||
info["status"] = "OK"
|
||||
return info
|
||||
print("[Voice Canger]: Set uploaded RVC model to slot.")
|
||||
setSlotAsRVC(self.params.model_dir, slot, paramDict)
|
||||
|
||||
return self.get_info()
|
||||
|
||||
def get_slotInfos(self):
|
||||
slotInfos: list[ModelSlots] = []
|
||||
for slotIndex in range(MAX_SLOT_NUM):
|
||||
slotInfo = loadSlotInfo(self.params.model_dir, slotIndex)
|
||||
slotInfos.append(slotInfo)
|
||||
return slotInfos
|
||||
|
||||
def get_info(self):
|
||||
data = asdict(self.settings)
|
||||
slotInfos = self.get_slotInfos()
|
||||
data["slotInfos"] = slotInfos
|
||||
data["gpus"] = self.gpus
|
||||
data["sampleModels"] = self.sampleModels
|
||||
|
||||
data["status"] = "OK"
|
||||
if hasattr(self, "voiceChanger"):
|
||||
info = self.voiceChanger.get_info()
|
||||
info["status"] = "OK"
|
||||
return info
|
||||
data.update(info)
|
||||
return data
|
||||
else:
|
||||
return {"status": "ERROR", "msg": "no model loaded"}
|
||||
|
||||
@ -41,12 +111,22 @@ class VoiceChangerManager(object):
|
||||
return {"status": "ERROR", "msg": "no model loaded"}
|
||||
|
||||
def update_settings(self, key: str, val: str | int | float):
|
||||
if hasattr(self, "voiceChanger"):
|
||||
info = self.voiceChanger.update_settings(key, val)
|
||||
info["status"] = "OK"
|
||||
return info
|
||||
if key in self.settings.intData:
|
||||
setattr(self.settings, key, int(val))
|
||||
if key == "slotIndex":
|
||||
val = val % 1000 # Quick hack for same slot is selected
|
||||
setattr(self.settings, key, int(val))
|
||||
|
||||
newVoiceChanger = VoiceChanger(self.params, self.settings.slotIndex)
|
||||
newVoiceChanger.prepareModel()
|
||||
self.serverDevice.serverLocal(newVoiceChanger)
|
||||
del self.voiceChanger
|
||||
self.voiceChanger = newVoiceChanger
|
||||
elif hasattr(self, "voiceChanger"):
|
||||
self.voiceChanger.update_settings(key, val)
|
||||
else:
|
||||
return {"status": "ERROR", "msg": "no model loaded"}
|
||||
print(f"[Voice Changer] update is not handled. ({key}:{val})")
|
||||
return self.get_info()
|
||||
|
||||
def changeVoice(self, receivedData: AudioInOut):
|
||||
if hasattr(self, "voiceChanger") is True:
|
||||
@ -55,12 +135,6 @@ class VoiceChangerManager(object):
|
||||
print("Voice Change is not loaded. Did you load a correct model?")
|
||||
return np.zeros(1).astype(np.int16), []
|
||||
|
||||
def switchModelType(self, modelType: ModelType):
|
||||
return self.voiceChanger.switchModelType(modelType)
|
||||
|
||||
def getModelType(self):
|
||||
return self.voiceChanger.getModelType()
|
||||
|
||||
def export2onnx(self):
|
||||
return self.voiceChanger.export2onnx()
|
||||
|
||||
|
@ -10,10 +10,3 @@ class FilePaths:
|
||||
clusterTorchModelFilename: str | None
|
||||
featureFilename: str | None
|
||||
indexFilename: str | None
|
||||
|
||||
|
||||
@dataclass
|
||||
class LoadModelParams:
|
||||
slot: int
|
||||
isHalf: bool
|
||||
params: Any
|
||||
|
@ -1,17 +1,11 @@
|
||||
from typing import Any, Protocol, TypeAlias
|
||||
import numpy as np
|
||||
|
||||
from voice_changer.utils.LoadModelParams import LoadModelParams
|
||||
|
||||
|
||||
AudioInOut: TypeAlias = np.ndarray[Any, np.dtype[np.int16]]
|
||||
|
||||
|
||||
class VoiceChangerModel(Protocol):
|
||||
# loadModel: Callable[..., dict[str, Any]]
|
||||
def loadModel(self, params: LoadModelParams):
|
||||
...
|
||||
|
||||
def get_processing_sampling_rate(self) -> int:
|
||||
...
|
||||
|
||||
@ -21,9 +15,7 @@ class VoiceChangerModel(Protocol):
|
||||
def inference(self, data: tuple[Any, ...]) -> Any:
|
||||
...
|
||||
|
||||
def generate_input(
|
||||
self, newData: AudioInOut, inputSize: int, crossfadeSize: int
|
||||
) -> tuple[Any, ...]:
|
||||
def generate_input(self, newData: AudioInOut, inputSize: int, crossfadeSize: int) -> tuple[Any, ...]:
|
||||
...
|
||||
|
||||
def update_settings(self, key: str, val: Any) -> bool:
|
||||
|
@ -11,4 +11,4 @@ class VoiceChangerParams:
|
||||
hubert_base_jp: str
|
||||
hubert_soft: str
|
||||
nsf_hifigan: str
|
||||
rvc_sample_mode: str
|
||||
sample_mode: str
|
||||
|
Loading…
Reference in New Issue
Block a user