mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-02-02 16:23:58 +03:00
WIP: refactoring
This commit is contained in:
parent
328ea46161
commit
be42bb682d
3
.gitignore
vendored
3
.gitignore
vendored
@ -43,12 +43,15 @@ docker/cudnn/
|
|||||||
server/pretrain/
|
server/pretrain/
|
||||||
server/weights/
|
server/weights/
|
||||||
server/model_dir/
|
server/model_dir/
|
||||||
|
server/model_dir2/
|
||||||
server/weights_/
|
server/weights_/
|
||||||
server/weights__/
|
server/weights__/
|
||||||
server/models/
|
server/models/
|
||||||
server/samples.json
|
server/samples.json
|
||||||
server/samples_0003_t.json
|
server/samples_0003_t.json
|
||||||
server/samples_0003_o.json
|
server/samples_0003_o.json
|
||||||
|
server/samples_0003_o2.json
|
||||||
|
server/samples_0003_t2.json
|
||||||
|
|
||||||
server/test_official_v1_v2.json
|
server/test_official_v1_v2.json
|
||||||
server/test_ddpn_v1_v2.json
|
server/test_ddpn_v1_v2.json
|
||||||
|
11
client/demo/dist/index.html
vendored
11
client/demo/dist/index.html
vendored
@ -1 +1,10 @@
|
|||||||
<!doctype html><html style="width:100%;height:100%;overflow:hidden"><head><meta charset="utf-8"/><title>Voice Changer Client Demo</title><script defer="defer" src="index.js"></script></head><body style="width:100%;height:100%;margin:0"><div id="app" style="width:100%;height:100%"></div></body></html>
|
<!DOCTYPE html>
|
||||||
|
<html style="width: 100%; height: 100%; overflow: hidden">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<title>Voice Changer Client Demo</title>
|
||||||
|
<script defer src="index.js"></script></head>
|
||||||
|
<body style="width: 100%; height: 100%; margin: 0px">
|
||||||
|
<div id="app" style="width: 100%; height: 100%"></div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
1904
client/demo/dist/index.js
vendored
1904
client/demo/dist/index.js
vendored
File diff suppressed because one or more lines are too long
31
client/demo/dist/index.js.LICENSE.txt
vendored
31
client/demo/dist/index.js.LICENSE.txt
vendored
@ -1,31 +0,0 @@
|
|||||||
/*! regenerator-runtime -- Copyright (c) 2014-present, Facebook, Inc. -- license (MIT): https://github.com/facebook/regenerator/blob/main/LICENSE */
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @license React
|
|
||||||
* react-dom.production.min.js
|
|
||||||
*
|
|
||||||
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
||||||
*
|
|
||||||
* This source code is licensed under the MIT license found in the
|
|
||||||
* LICENSE file in the root directory of this source tree.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @license React
|
|
||||||
* react.production.min.js
|
|
||||||
*
|
|
||||||
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
||||||
*
|
|
||||||
* This source code is licensed under the MIT license found in the
|
|
||||||
* LICENSE file in the root directory of this source tree.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @license React
|
|
||||||
* scheduler.production.min.js
|
|
||||||
*
|
|
||||||
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
||||||
*
|
|
||||||
* This source code is licensed under the MIT license found in the
|
|
||||||
* LICENSE file in the root directory of this source tree.
|
|
||||||
*/
|
|
@ -27,7 +27,6 @@ export const useMessageBuilder_old = (): MessageBuilderStateAndMethod => {
|
|||||||
lang = "en"
|
lang = "en"
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(messagesRef.current)
|
|
||||||
return messagesRef.current.find(x => { return x.file == file && x.id == id })?.message[lang] || "unknwon message"
|
return messagesRef.current.find(x => { return x.file == file && x.id == id })?.message[lang] || "unknwon message"
|
||||||
}
|
}
|
||||||
return {
|
return {
|
||||||
|
@ -39,7 +39,7 @@ export const ModelSlotManagerDialog = () => {
|
|||||||
if (mode != "localFile") {
|
if (mode != "localFile") {
|
||||||
return <></>
|
return <></>
|
||||||
}
|
}
|
||||||
if (!serverSetting.serverSetting.modelSlots) {
|
if (!serverSetting.serverSetting.slotInfos) {
|
||||||
return <></>
|
return <></>
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -114,7 +114,7 @@ export const ModelSlotManagerDialog = () => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
const slots = serverSetting.serverSetting.modelSlots.map((x, index) => {
|
const slots = serverSetting.serverSetting.slotInfos.map((x, index) => {
|
||||||
let modelFileName = ""
|
let modelFileName = ""
|
||||||
if (uploadData?.slot == index) {
|
if (uploadData?.slot == index) {
|
||||||
modelFileName = (uploadData.model?.name || "").replace(/^.*[\\\/]/, '')
|
modelFileName = (uploadData.model?.name || "").replace(/^.*[\\\/]/, '')
|
||||||
@ -248,7 +248,7 @@ export const ModelSlotManagerDialog = () => {
|
|||||||
|
|
||||||
}, [
|
}, [
|
||||||
mode,
|
mode,
|
||||||
serverSetting.serverSetting.modelSlots,
|
serverSetting.serverSetting.slotInfos,
|
||||||
serverSetting.fileUploadSettings,
|
serverSetting.fileUploadSettings,
|
||||||
serverSetting.uploadProgress,
|
serverSetting.uploadProgress,
|
||||||
serverSetting.setFileUploadSetting,
|
serverSetting.setFileUploadSetting,
|
||||||
|
@ -13,19 +13,22 @@ export const MergeLabDialog = () => {
|
|||||||
|
|
||||||
// スロットが変更されたときの初期化処理
|
// スロットが変更されたときの初期化処理
|
||||||
const newSlotChangeKey = useMemo(() => {
|
const newSlotChangeKey = useMemo(() => {
|
||||||
if (!serverSetting.serverSetting.modelSlots) {
|
if (!serverSetting.serverSetting.slotInfos) {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
return serverSetting.serverSetting.modelSlots.reduce((prev, cur) => {
|
return serverSetting.serverSetting.slotInfos.reduce((prev, cur) => {
|
||||||
return prev + "_" + cur.modelFile
|
return prev + "_" + cur.modelFile
|
||||||
}, "")
|
}, "")
|
||||||
}, [serverSetting.serverSetting.modelSlots])
|
}, [serverSetting.serverSetting.slotInfos])
|
||||||
|
|
||||||
const filterItems = useMemo(() => {
|
const filterItems = useMemo(() => {
|
||||||
return serverSetting.serverSetting.modelSlots.reduce((prev, cur) => {
|
return serverSetting.serverSetting.slotInfos.reduce((prev, cur) => {
|
||||||
const key = `${cur.modelType},${cur.samplingRate},${cur.embChannels}`
|
const key = `${cur.modelType},${cur.samplingRate},${cur.embChannels}`
|
||||||
const val = { type: cur.modelType, samplingRate: cur.samplingRate, embChannels: cur.embChannels }
|
const val = { type: cur.modelType, samplingRate: cur.samplingRate, embChannels: cur.embChannels }
|
||||||
const existKeys = Object.keys(prev)
|
const existKeys = Object.keys(prev)
|
||||||
|
if (cur.voiceChangerType == null) {
|
||||||
|
return prev
|
||||||
|
}
|
||||||
if (cur.modelFile.length == 0) {
|
if (cur.modelFile.length == 0) {
|
||||||
return prev
|
return prev
|
||||||
}
|
}
|
||||||
@ -41,7 +44,7 @@ export const MergeLabDialog = () => {
|
|||||||
}, [newSlotChangeKey])
|
}, [newSlotChangeKey])
|
||||||
|
|
||||||
const models = useMemo(() => {
|
const models = useMemo(() => {
|
||||||
return serverSetting.serverSetting.modelSlots.filter(x => {
|
return serverSetting.serverSetting.slotInfos.filter(x => {
|
||||||
const filterVals = filterItems[currentFilter]
|
const filterVals = filterItems[currentFilter]
|
||||||
if (!filterVals) {
|
if (!filterVals) {
|
||||||
return false
|
return false
|
||||||
|
@ -9,11 +9,11 @@ export const ModelSwitchRow = (_props: ModelSwitchRowProps) => {
|
|||||||
const appState = useAppState()
|
const appState = useAppState()
|
||||||
|
|
||||||
const modelSwitchRow = useMemo(() => {
|
const modelSwitchRow = useMemo(() => {
|
||||||
const slot = appState.serverSetting.serverSetting.modelSlotIndex
|
const slot = appState.serverSetting.serverSetting.slotIndex
|
||||||
const onSwitchModelClicked = async (slot: number) => {
|
const onSwitchModelClicked = async (slot: number) => {
|
||||||
// Quick hack for same slot is selected. 下3桁が実際のSlotID
|
// Quick hack for same slot is selected. 下3桁が実際のSlotID
|
||||||
const dummyModelSlotIndex = (Math.floor(Date.now() / 1000)) * 1000 + slot
|
const dummyModelSlotIndex = (Math.floor(Date.now() / 1000)) * 1000 + slot
|
||||||
await appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, modelSlotIndex: dummyModelSlotIndex })
|
await appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, slotIndex: dummyModelSlotIndex })
|
||||||
setTimeout(() => { // quick hack
|
setTimeout(() => { // quick hack
|
||||||
appState.getInfo()
|
appState.getInfo()
|
||||||
}, 1000 * 2)
|
}, 1000 * 2)
|
||||||
@ -24,7 +24,7 @@ export const ModelSwitchRow = (_props: ModelSwitchRowProps) => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
const modelSlots = appState.serverSetting.serverSetting.modelSlots
|
const modelSlots = appState.serverSetting.serverSetting.slotInfos
|
||||||
let options: React.JSX.Element[] = []
|
let options: React.JSX.Element[] = []
|
||||||
if (modelSlots) {
|
if (modelSlots) {
|
||||||
options = modelSlots.map((x, index) => {
|
options = modelSlots.map((x, index) => {
|
||||||
|
@ -18,14 +18,17 @@ export const ModelSlotArea = (_props: ModelSlotAreaProps) => {
|
|||||||
|
|
||||||
|
|
||||||
const modelTiles = useMemo(() => {
|
const modelTiles = useMemo(() => {
|
||||||
if (!serverSetting.serverSetting.modelSlots) {
|
if (!serverSetting.serverSetting.slotInfos) {
|
||||||
return []
|
return []
|
||||||
}
|
}
|
||||||
return serverSetting.serverSetting.modelSlots.map((x, index) => {
|
return serverSetting.serverSetting.slotInfos.map((x, index) => {
|
||||||
|
if (x.voiceChangerType == null) {
|
||||||
|
return null
|
||||||
|
}
|
||||||
if (x.modelFile.length == 0) {
|
if (x.modelFile.length == 0) {
|
||||||
return null
|
return null
|
||||||
}
|
}
|
||||||
const tileContainerClass = index == serverSetting.serverSetting.modelSlotIndex ? "model-slot-tile-container-selected" : "model-slot-tile-container"
|
const tileContainerClass = index == serverSetting.serverSetting.slotIndex ? "model-slot-tile-container-selected" : "model-slot-tile-container"
|
||||||
const name = x.name.length > 8 ? x.name.substring(0, 7) + "..." : x.name
|
const name = x.name.length > 8 ? x.name.substring(0, 7) + "..." : x.name
|
||||||
const iconElem = x.iconFile.length > 0 ?
|
const iconElem = x.iconFile.length > 0 ?
|
||||||
<img className="model-slot-tile-icon" src={x.iconFile} alt={x.name} /> :
|
<img className="model-slot-tile-icon" src={x.iconFile} alt={x.name} /> :
|
||||||
@ -33,7 +36,7 @@ export const ModelSlotArea = (_props: ModelSlotAreaProps) => {
|
|||||||
|
|
||||||
const clickAction = async () => {
|
const clickAction = async () => {
|
||||||
const dummyModelSlotIndex = (Math.floor(Date.now() / 1000)) * 1000 + index
|
const dummyModelSlotIndex = (Math.floor(Date.now() / 1000)) * 1000 + index
|
||||||
await serverSetting.updateServerSettings({ ...serverSetting.serverSetting, modelSlotIndex: dummyModelSlotIndex })
|
await serverSetting.updateServerSettings({ ...serverSetting.serverSetting, slotIndex: dummyModelSlotIndex })
|
||||||
setTimeout(() => { // quick hack
|
setTimeout(() => { // quick hack
|
||||||
getInfo()
|
getInfo()
|
||||||
}, 1000 * 2)
|
}, 1000 * 2)
|
||||||
@ -50,7 +53,7 @@ export const ModelSlotArea = (_props: ModelSlotAreaProps) => {
|
|||||||
</div >
|
</div >
|
||||||
)
|
)
|
||||||
}).filter(x => x != null)
|
}).filter(x => x != null)
|
||||||
}, [serverSetting.serverSetting.modelSlots, serverSetting.serverSetting.modelSlotIndex])
|
}, [serverSetting.serverSetting.slotInfos, serverSetting.serverSetting.slotIndex])
|
||||||
|
|
||||||
|
|
||||||
const modelSlotArea = useMemo(() => {
|
const modelSlotArea = useMemo(() => {
|
||||||
|
@ -21,11 +21,12 @@ export const CharacterArea = (_props: CharacterAreaProps) => {
|
|||||||
}, [])
|
}, [])
|
||||||
|
|
||||||
const selected = useMemo(() => {
|
const selected = useMemo(() => {
|
||||||
if (serverSetting.serverSetting.modelSlotIndex == undefined) {
|
console.log("serverSetting.serverSetting.slotInfos::", serverSetting.serverSetting.slotIndex, serverSetting.serverSetting.slotInfos)
|
||||||
|
if (serverSetting.serverSetting.slotIndex == undefined) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
return serverSetting.serverSetting.modelSlots[serverSetting.serverSetting.modelSlotIndex]
|
return serverSetting.serverSetting.slotInfos[serverSetting.serverSetting.slotIndex]
|
||||||
}, [serverSetting.serverSetting.modelSlotIndex, serverSetting.serverSetting.modelSlots])
|
}, [serverSetting.serverSetting.slotIndex, serverSetting.serverSetting.slotInfos])
|
||||||
|
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
|
@ -27,7 +27,6 @@ export const useMessageBuilder = (): MessageBuilderStateAndMethod => {
|
|||||||
lang = "en"
|
lang = "en"
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(messagesRef.current)
|
|
||||||
return messagesRef.current.find(x => { return x.file == file && x.id == id })?.message[lang] || "unknwon message"
|
return messagesRef.current.find(x => { return x.file == file && x.id == id })?.message[lang] || "unknwon message"
|
||||||
}
|
}
|
||||||
return {
|
return {
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
import { ClientType, MergeModelRequest, OnnxExporterInfo, ServerInfo, ServerSettingKey } from "./const";
|
import { ClientType, MergeModelRequest, OnnxExporterInfo, ServerInfo, ServerSettingKey, VoiceChangerType } from "./const";
|
||||||
|
|
||||||
|
|
||||||
type FileChunk = {
|
type FileChunk = {
|
||||||
@ -166,16 +166,12 @@ export class ServerConfigurator {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
loadModel = async (slot: number, isHalf: boolean, params: string = "{}") => {
|
loadModel = async (slot: number, voiceChangerType: VoiceChangerType, params: string = "{}") => {
|
||||||
if (isHalf == undefined || isHalf == null) {
|
|
||||||
console.warn("isHalf is invalid value", isHalf)
|
|
||||||
isHalf = false
|
|
||||||
}
|
|
||||||
const url = this.serverUrl + "/load_model"
|
const url = this.serverUrl + "/load_model"
|
||||||
const info = new Promise<ServerInfo>(async (resolve) => {
|
const info = new Promise<ServerInfo>(async (resolve) => {
|
||||||
const formData = new FormData();
|
const formData = new FormData();
|
||||||
formData.append("slot", "" + slot);
|
formData.append("slot", "" + slot);
|
||||||
formData.append("isHalf", "" + isHalf);
|
formData.append("voiceChangerType", voiceChangerType);
|
||||||
formData.append("params", params);
|
formData.append("params", params);
|
||||||
|
|
||||||
const request = new Request(url, {
|
const request = new Request(url, {
|
||||||
|
@ -3,7 +3,7 @@ import { VoiceChangerWorkletNode, VoiceChangerWorkletListener } from "./VoiceCha
|
|||||||
import workerjs from "raw-loader!../worklet/dist/index.js";
|
import workerjs from "raw-loader!../worklet/dist/index.js";
|
||||||
import { VoiceFocusDeviceTransformer, VoiceFocusTransformDevice } from "amazon-chime-sdk-js";
|
import { VoiceFocusDeviceTransformer, VoiceFocusTransformDevice } from "amazon-chime-sdk-js";
|
||||||
import { createDummyMediaStream, validateUrl } from "./util";
|
import { createDummyMediaStream, validateUrl } from "./util";
|
||||||
import { ClientType, DefaultVoiceChangerClientSetting, MergeModelRequest, ServerSettingKey, VoiceChangerClientSetting, WorkletNodeSetting, WorkletSetting } from "./const";
|
import { ClientType, DefaultVoiceChangerClientSetting, MergeModelRequest, ServerSettingKey, VoiceChangerClientSetting, VoiceChangerType, WorkletNodeSetting, WorkletSetting } from "./const";
|
||||||
import { ServerConfigurator } from "./ServerConfigurator";
|
import { ServerConfigurator } from "./ServerConfigurator";
|
||||||
|
|
||||||
// オーディオデータの流れ
|
// オーディオデータの流れ
|
||||||
@ -298,10 +298,10 @@ export class VoiceChangerClient {
|
|||||||
}
|
}
|
||||||
loadModel = (
|
loadModel = (
|
||||||
slot: number,
|
slot: number,
|
||||||
isHalf: boolean,
|
voiceChangerType: VoiceChangerType,
|
||||||
params: string,
|
params: string,
|
||||||
) => {
|
) => {
|
||||||
return this.configurator.loadModel(slot, isHalf, params)
|
return this.configurator.loadModel(slot, voiceChangerType, params)
|
||||||
}
|
}
|
||||||
uploadAssets = (params: string) => {
|
uploadAssets = (params: string) => {
|
||||||
return this.configurator.uploadAssets(params)
|
return this.configurator.uploadAssets(params)
|
||||||
|
@ -16,6 +16,16 @@ export const ClientType = {
|
|||||||
} as const
|
} as const
|
||||||
export type ClientType = typeof ClientType[keyof typeof ClientType]
|
export type ClientType = typeof ClientType[keyof typeof ClientType]
|
||||||
|
|
||||||
|
export const VoiceChangerType = {
|
||||||
|
"MMVCv15": "MMVCv15",
|
||||||
|
"MMVCv13": "MMVCv13",
|
||||||
|
"so-vits-svc-40": "so-vits-svc-40",
|
||||||
|
"DDSP-SVC": "DDSP-SVC",
|
||||||
|
"RVC": "RVC"
|
||||||
|
} as const
|
||||||
|
export type VoiceChangerType = typeof VoiceChangerType[keyof typeof VoiceChangerType]
|
||||||
|
|
||||||
|
|
||||||
///////////////////////
|
///////////////////////
|
||||||
// サーバセッティング
|
// サーバセッティング
|
||||||
///////////////////////
|
///////////////////////
|
||||||
@ -122,7 +132,8 @@ export const ServerSettingKey = {
|
|||||||
"rvcQuality": "rvcQuality",
|
"rvcQuality": "rvcQuality",
|
||||||
"modelSamplingRate": "modelSamplingRate",
|
"modelSamplingRate": "modelSamplingRate",
|
||||||
"silenceFront": "silenceFront",
|
"silenceFront": "silenceFront",
|
||||||
"modelSlotIndex": "modelSlotIndex",
|
// "modelSlotIndex": "modelSlotIndex",
|
||||||
|
"slotIndex": "slotIndex",
|
||||||
|
|
||||||
"useEnhancer": "useEnhancer",
|
"useEnhancer": "useEnhancer",
|
||||||
"useDiff": "useDiff",
|
"useDiff": "useDiff",
|
||||||
@ -181,7 +192,8 @@ export type VoiceChangerServerSetting = {
|
|||||||
rvcQuality: number // 0:low, 1:high
|
rvcQuality: number // 0:low, 1:high
|
||||||
silenceFront: number // 0:off, 1:on
|
silenceFront: number // 0:off, 1:on
|
||||||
modelSamplingRate: ModelSamplingRate // 32000,40000,48000
|
modelSamplingRate: ModelSamplingRate // 32000,40000,48000
|
||||||
modelSlotIndex: number,
|
// modelSlotIndex: number,
|
||||||
|
slotIndex: number,
|
||||||
|
|
||||||
useEnhancer: number// DDSP-SVC
|
useEnhancer: number// DDSP-SVC
|
||||||
useDiff: number// DDSP-SVC
|
useDiff: number// DDSP-SVC
|
||||||
@ -197,7 +209,31 @@ export type VoiceChangerServerSetting = {
|
|||||||
enableDirectML: number
|
enableDirectML: number
|
||||||
}
|
}
|
||||||
|
|
||||||
type ModelSlot = {
|
// type ModelSlot = {
|
||||||
|
// modelFile: string
|
||||||
|
// featureFile: string,
|
||||||
|
// indexFile: string,
|
||||||
|
|
||||||
|
// defaultTune: number,
|
||||||
|
// defaultIndexRatio: number,
|
||||||
|
// defaultProtect: number,
|
||||||
|
|
||||||
|
// modelType: ModelType,
|
||||||
|
// embChannels: number,
|
||||||
|
// f0: boolean,
|
||||||
|
// samplingRate: number
|
||||||
|
// deprecated: boolean
|
||||||
|
|
||||||
|
|
||||||
|
// name: string,
|
||||||
|
// description: string,
|
||||||
|
// credit: string,
|
||||||
|
// termsOfUseUrl: string,
|
||||||
|
// iconFile: string
|
||||||
|
// }
|
||||||
|
|
||||||
|
type SlotInfo = {
|
||||||
|
voiceChangerType: VoiceChangerType | null
|
||||||
modelFile: string
|
modelFile: string
|
||||||
featureFile: string,
|
featureFile: string,
|
||||||
indexFile: string,
|
indexFile: string,
|
||||||
@ -233,7 +269,8 @@ export type ServerInfo = VoiceChangerServerSetting & {
|
|||||||
pyTorchModelFile: string,
|
pyTorchModelFile: string,
|
||||||
onnxModelFile: string,
|
onnxModelFile: string,
|
||||||
onnxExecutionProviders: OnnxExecutionProvider[]
|
onnxExecutionProviders: OnnxExecutionProvider[]
|
||||||
modelSlots: ModelSlot[]
|
// modelSlots: ModelSlot[]
|
||||||
|
slotInfos: SlotInfo[]
|
||||||
serverAudioInputDevices: ServerAudioDevice[]
|
serverAudioInputDevices: ServerAudioDevice[]
|
||||||
serverAudioOutputDevices: ServerAudioDevice[]
|
serverAudioOutputDevices: ServerAudioDevice[]
|
||||||
sampleModels: RVCSampleModel[]
|
sampleModels: RVCSampleModel[]
|
||||||
@ -311,7 +348,8 @@ export const DefaultServerSetting: ServerInfo = {
|
|||||||
rvcQuality: 0,
|
rvcQuality: 0,
|
||||||
modelSamplingRate: 48000,
|
modelSamplingRate: 48000,
|
||||||
silenceFront: 1,
|
silenceFront: 1,
|
||||||
modelSlotIndex: 0,
|
// modelSlotIndex: 0,
|
||||||
|
slotIndex: 0,
|
||||||
sampleModels: [],
|
sampleModels: [],
|
||||||
gpus: [],
|
gpus: [],
|
||||||
|
|
||||||
@ -331,7 +369,8 @@ export const DefaultServerSetting: ServerInfo = {
|
|||||||
pyTorchModelFile: "",
|
pyTorchModelFile: "",
|
||||||
onnxModelFile: "",
|
onnxModelFile: "",
|
||||||
onnxExecutionProviders: [],
|
onnxExecutionProviders: [],
|
||||||
modelSlots: [],
|
// modelSlots: [],
|
||||||
|
slotInfos: [],
|
||||||
serverAudioInputDevices: [],
|
serverAudioInputDevices: [],
|
||||||
serverAudioOutputDevices: []
|
serverAudioOutputDevices: []
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
import { useState, useMemo, useEffect } from "react"
|
import { useState, useMemo, useEffect } from "react"
|
||||||
import { VoiceChangerServerSetting, ServerInfo, ServerSettingKey, INDEXEDDB_KEY_SERVER, INDEXEDDB_KEY_MODEL_DATA, ClientType, DefaultServerSetting_MMVCv13, DefaultServerSetting_MMVCv15, DefaultServerSetting_so_vits_svc_40v2, DefaultServerSetting_so_vits_svc_40, DefaultServerSetting_so_vits_svc_40_c, DefaultServerSetting_RVC, OnnxExporterInfo, DefaultServerSetting_DDSP_SVC, MAX_MODEL_SLOT_NUM, Framework, MergeModelRequest } from "../const"
|
import { VoiceChangerServerSetting, ServerInfo, ServerSettingKey, INDEXEDDB_KEY_SERVER, INDEXEDDB_KEY_MODEL_DATA, ClientType, DefaultServerSetting_MMVCv13, DefaultServerSetting_MMVCv15, DefaultServerSetting_so_vits_svc_40v2, DefaultServerSetting_so_vits_svc_40, DefaultServerSetting_so_vits_svc_40_c, DefaultServerSetting_RVC, OnnxExporterInfo, DefaultServerSetting_DDSP_SVC, MAX_MODEL_SLOT_NUM, MergeModelRequest, VoiceChangerType } from "../const"
|
||||||
import { VoiceChangerClient } from "../VoiceChangerClient"
|
import { VoiceChangerClient } from "../VoiceChangerClient"
|
||||||
import { useIndexedDB } from "./useIndexedDB"
|
import { useIndexedDB } from "./useIndexedDB"
|
||||||
import { ModelLoadException } from "../exceptions"
|
import { ModelLoadException } from "../exceptions"
|
||||||
@ -18,12 +18,7 @@ export type ModelAssetName = typeof ModelAssetName[keyof typeof ModelAssetName]
|
|||||||
|
|
||||||
|
|
||||||
export type FileUploadSetting = {
|
export type FileUploadSetting = {
|
||||||
isHalf: boolean
|
|
||||||
uploaded: boolean
|
uploaded: boolean
|
||||||
defaultTune: number
|
|
||||||
defaultIndexRatio: number
|
|
||||||
defaultProtect: number
|
|
||||||
framework: Framework
|
|
||||||
params: string
|
params: string
|
||||||
|
|
||||||
mmvcv13Config: ModelData | null
|
mmvcv13Config: ModelData | null
|
||||||
@ -52,12 +47,7 @@ export type FileUploadSetting = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export const InitialFileUploadSetting: FileUploadSetting = {
|
export const InitialFileUploadSetting: FileUploadSetting = {
|
||||||
isHalf: true,
|
|
||||||
uploaded: false,
|
uploaded: false,
|
||||||
defaultTune: 0,
|
|
||||||
defaultIndexRatio: 1,
|
|
||||||
defaultProtect: 0.5,
|
|
||||||
framework: Framework.PyTorch,
|
|
||||||
params: "{}",
|
params: "{}",
|
||||||
|
|
||||||
mmvcv13Config: null,
|
mmvcv13Config: null,
|
||||||
@ -400,43 +390,39 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
|||||||
}
|
}
|
||||||
|
|
||||||
// const configFileName = fileUploadSetting.configFile?.filename || "-"
|
// const configFileName = fileUploadSetting.configFile?.filename || "-"
|
||||||
|
const files: { kind: string, name: string }[] = []
|
||||||
|
if (fileUploadSetting.mmvcv13Config?.filename) files.push({ kind: "mmvcv13Config", name: fileUploadSetting.mmvcv13Config.filename })
|
||||||
|
if (fileUploadSetting.mmvcv13Model?.filename) files.push({ kind: "mmvcv13Model", name: fileUploadSetting.mmvcv13Model.filename })
|
||||||
|
|
||||||
|
if (fileUploadSetting.mmvcv15Config?.filename) files.push({ kind: "mmvcv15Config", name: fileUploadSetting.mmvcv15Config.filename })
|
||||||
|
if (fileUploadSetting.mmvcv15Model?.filename) files.push({ kind: "mmvcv15Model", name: fileUploadSetting.mmvcv15Model.filename })
|
||||||
|
|
||||||
|
if (fileUploadSetting.soVitsSvc40Config?.filename) files.push({ kind: "soVitsSvc40Config", name: fileUploadSetting.soVitsSvc40Config.filename })
|
||||||
|
if (fileUploadSetting.soVitsSvc40Model?.filename) files.push({ kind: "soVitsSvc40Model", name: fileUploadSetting.soVitsSvc40Model.filename })
|
||||||
|
if (fileUploadSetting.soVitsSvc40Cluster?.filename) files.push({ kind: "soVitsSvc40Cluster", name: fileUploadSetting.soVitsSvc40Cluster.filename })
|
||||||
|
|
||||||
|
if (fileUploadSetting.rvcModel?.filename) files.push({ kind: "rvcModel", name: fileUploadSetting.rvcModel.filename })
|
||||||
|
if (fileUploadSetting.rvcIndex?.filename) files.push({ kind: "rvcIndex", name: fileUploadSetting.rvcIndex.filename })
|
||||||
|
|
||||||
|
if (fileUploadSetting.ddspSvcModel?.filename) files.push({ kind: "ddspSvcModel", name: fileUploadSetting.ddspSvcModel.filename })
|
||||||
|
if (fileUploadSetting.ddspSvcModelConfig?.filename) files.push({ kind: "ddspSvcModelConfig", name: fileUploadSetting.ddspSvcModelConfig.filename })
|
||||||
|
if (fileUploadSetting.ddspSvcDiffusion?.filename) files.push({ kind: "ddspSvcDiffusion", name: fileUploadSetting.ddspSvcDiffusion.filename })
|
||||||
|
if (fileUploadSetting.ddspSvcDiffusionConfig?.filename) files.push({ kind: "ddspSvcDiffusionConfig", name: fileUploadSetting.ddspSvcDiffusionConfig.filename })
|
||||||
|
|
||||||
|
|
||||||
const params = JSON.stringify({
|
const params = JSON.stringify({
|
||||||
defaultTune: fileUploadSetting.defaultTune || 0,
|
|
||||||
defaultIndexRatio: fileUploadSetting.defaultIndexRatio || 1,
|
|
||||||
defaultProtect: fileUploadSetting.defaultProtect || 0.5,
|
|
||||||
sampleId: fileUploadSetting.isSampleMode ? fileUploadSetting.sampleId || "" : "",
|
sampleId: fileUploadSetting.isSampleMode ? fileUploadSetting.sampleId || "" : "",
|
||||||
rvcIndexDownload: fileUploadSetting.rvcIndexDownload || false,
|
rvcIndexDownload: fileUploadSetting.rvcIndexDownload || false,
|
||||||
files: fileUploadSetting.isSampleMode ? {} : {
|
files: fileUploadSetting.isSampleMode ? [] : files
|
||||||
mmvcv13Config: fileUploadSetting.mmvcv13Config?.filename || "",
|
|
||||||
mmvcv13Model: fileUploadSetting.mmvcv13Model?.filename || "",
|
|
||||||
mmvcv15Config: fileUploadSetting.mmvcv15Config?.filename || "",
|
|
||||||
mmvcv15Model: fileUploadSetting.mmvcv15Model?.filename || "",
|
|
||||||
soVitsSvc40Config: fileUploadSetting.soVitsSvc40Config?.filename || "",
|
|
||||||
soVitsSvc40Model: fileUploadSetting.soVitsSvc40Model?.filename || "",
|
|
||||||
soVitsSvc40Cluster: fileUploadSetting.soVitsSvc40Cluster?.filename || "",
|
|
||||||
soVitsSvc40v2Config: fileUploadSetting.soVitsSvc40v2Config?.filename || "",
|
|
||||||
soVitsSvc40v2Model: fileUploadSetting.soVitsSvc40v2Model?.filename || "",
|
|
||||||
soVitsSvc40v2Cluster: fileUploadSetting.soVitsSvc40v2Cluster?.filename || "",
|
|
||||||
rvcModel: fileUploadSetting.rvcModel?.filename || "",
|
|
||||||
rvcIndex: fileUploadSetting.rvcIndex?.filename || "",
|
|
||||||
rvcFeature: fileUploadSetting.rvcFeature?.filename || "",
|
|
||||||
|
|
||||||
ddspSvcModel: fileUploadSetting.ddspSvcModel?.filename ? "ddsp_mod/" + fileUploadSetting.ddspSvcModel?.filename : "",
|
|
||||||
ddspSvcModelConfig: fileUploadSetting.ddspSvcModelConfig?.filename ? "ddsp_mod/" + fileUploadSetting.ddspSvcModelConfig?.filename : "",
|
|
||||||
ddspSvcDiffusion: fileUploadSetting.ddspSvcDiffusion?.filename ? "ddsp_diff/" + fileUploadSetting.ddspSvcDiffusion?.filename : "",
|
|
||||||
ddspSvcDiffusionConfig: fileUploadSetting.ddspSvcDiffusionConfig?.filename ? "ddsp_diff/" + fileUploadSetting.ddspSvcDiffusionConfig.filename : "",
|
|
||||||
}
|
|
||||||
})
|
})
|
||||||
|
|
||||||
if (fileUploadSetting.isHalf == undefined) {
|
|
||||||
fileUploadSetting.isHalf = false
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log("PARAMS:", params)
|
console.log("PARAMS:", params)
|
||||||
|
const voiceChangerType = VoiceChangerType.RVC
|
||||||
|
|
||||||
const loadPromise = props.voiceChangerClient.loadModel(
|
const loadPromise = props.voiceChangerClient.loadModel(
|
||||||
slot,
|
slot,
|
||||||
fileUploadSetting.isHalf,
|
voiceChangerType,
|
||||||
params,
|
params,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -460,12 +446,7 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
|||||||
const storeToCache = (slot: number, fileUploadSetting: FileUploadSetting) => {
|
const storeToCache = (slot: number, fileUploadSetting: FileUploadSetting) => {
|
||||||
try {
|
try {
|
||||||
const saveData: FileUploadSetting = {
|
const saveData: FileUploadSetting = {
|
||||||
isHalf: fileUploadSetting.isHalf, // キャッシュとしては不使用。guiで上書きされる。
|
|
||||||
uploaded: false, // キャッシュから読み込まれるときには、まだuploadされていないから。
|
uploaded: false, // キャッシュから読み込まれるときには、まだuploadされていないから。
|
||||||
defaultTune: fileUploadSetting.defaultTune,
|
|
||||||
defaultIndexRatio: fileUploadSetting.defaultIndexRatio,
|
|
||||||
defaultProtect: fileUploadSetting.defaultProtect,
|
|
||||||
framework: fileUploadSetting.framework,
|
|
||||||
params: fileUploadSetting.params,
|
params: fileUploadSetting.params,
|
||||||
|
|
||||||
mmvcv13Config: fileUploadSetting.mmvcv13Config ? { data: fileUploadSetting.mmvcv13Config.data, filename: fileUploadSetting.mmvcv13Config.filename } : null,
|
mmvcv13Config: fileUploadSetting.mmvcv13Config ? { data: fileUploadSetting.mmvcv13Config.data, filename: fileUploadSetting.mmvcv13Config.filename } : null,
|
||||||
|
3
server/.vscode/settings.json
vendored
3
server/.vscode/settings.json
vendored
@ -8,9 +8,10 @@
|
|||||||
"editor.defaultFormatter": null, // Prettier を使わないようにする
|
"editor.defaultFormatter": null, // Prettier を使わないようにする
|
||||||
"editor.formatOnSave": true // ファイル保存時に自動フォーマット
|
"editor.formatOnSave": true // ファイル保存時に自動フォーマット
|
||||||
},
|
},
|
||||||
|
"python.formatting.blackArgs": ["--line-length", "550"],
|
||||||
"flake8.args": [
|
"flake8.args": [
|
||||||
"--ignore=E501,E402,E722,E741,E203,W503"
|
"--ignore=E501,E402,E722,E741,E203,W503"
|
||||||
// "--max-line-length=150",
|
// "--max-line-length=150"
|
||||||
// "--max-complexity=20"
|
// "--max-complexity=20"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
@ -3,9 +3,7 @@ class NoModeLoadedException(Exception):
|
|||||||
self.framework = framework
|
self.framework = framework
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return repr(
|
return repr(f"No model for {self.framework} loaded. Please confirm the model uploaded.")
|
||||||
f"No model for {self.framework} loaded. Please confirm the model uploaded."
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class HalfPrecisionChangingException(Exception):
|
class HalfPrecisionChangingException(Exception):
|
||||||
@ -36,3 +34,17 @@ class DeviceCannotSupportHalfPrecisionException(Exception):
|
|||||||
class VoiceChangerIsNotSelectedException(Exception):
|
class VoiceChangerIsNotSelectedException(Exception):
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return repr("Voice Changer is not selected.")
|
return repr("Voice Changer is not selected.")
|
||||||
|
|
||||||
|
|
||||||
|
class SlotConfigNotFoundException(Exception):
|
||||||
|
def __init__(self, modelDir, slotIndex):
|
||||||
|
self.modelDir = modelDir
|
||||||
|
self.slotIndex = slotIndex
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return repr(f"Config for slot {self.slotIndex} is not found. (modelDir:{self.modelDir})")
|
||||||
|
|
||||||
|
|
||||||
|
class WeightDownladException(Exception):
|
||||||
|
def __str__(self):
|
||||||
|
return repr("Failed to download weight.")
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
from concurrent.futures import ThreadPoolExecutor
|
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from distutils.util import strtobool
|
from distutils.util import strtobool
|
||||||
@ -7,24 +6,24 @@ import socket
|
|||||||
import platform
|
import platform
|
||||||
import os
|
import os
|
||||||
import argparse
|
import argparse
|
||||||
from Downloader import download, download_no_tqdm
|
|
||||||
from voice_changer.RVC.SampleDownloader import (
|
from Exceptions import WeightDownladException
|
||||||
checkRvcModelExist,
|
from utils.downloader.SampleDownloader import downloadInitialSamples
|
||||||
downloadInitialSampleModels,
|
from utils.downloader.WeightDownloader import downloadWeight
|
||||||
)
|
|
||||||
|
|
||||||
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
||||||
|
|
||||||
import uvicorn
|
import uvicorn
|
||||||
from mods.ssl import create_self_signed_cert
|
from mods.ssl import create_self_signed_cert
|
||||||
|
|
||||||
from voice_changer.VoiceChangerManager import VoiceChangerManager
|
from voice_changer.VoiceChangerManager import VoiceChangerManager
|
||||||
|
|
||||||
from sio.MMVC_SocketIOApp import MMVC_SocketIOApp
|
from sio.MMVC_SocketIOApp import MMVC_SocketIOApp
|
||||||
from restapi.MMVC_Rest import MMVC_Rest
|
from restapi.MMVC_Rest import MMVC_Rest
|
||||||
from const import (
|
from const import (
|
||||||
NATIVE_CLIENT_FILE_MAC,
|
NATIVE_CLIENT_FILE_MAC,
|
||||||
NATIVE_CLIENT_FILE_WIN,
|
NATIVE_CLIENT_FILE_WIN,
|
||||||
SSL_KEY_DIR,
|
SSL_KEY_DIR,
|
||||||
getRVCSampleJsonAndModelIds,
|
|
||||||
)
|
)
|
||||||
import subprocess
|
import subprocess
|
||||||
import multiprocessing as mp
|
import multiprocessing as mp
|
||||||
@ -35,56 +34,23 @@ setup_loggers()
|
|||||||
|
|
||||||
def setupArgParser():
|
def setupArgParser():
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument(
|
parser.add_argument("--logLevel", type=str, default="critical", help="Log level info|critical. (default: critical)")
|
||||||
"--logLevel",
|
|
||||||
type=str,
|
|
||||||
default="critical",
|
|
||||||
help="Log level info|critical. (default: critical)",
|
|
||||||
)
|
|
||||||
parser.add_argument("-p", type=int, default=18888, help="port")
|
parser.add_argument("-p", type=int, default=18888, help="port")
|
||||||
parser.add_argument("--https", type=strtobool, default=False, help="use https")
|
parser.add_argument("--https", type=strtobool, default=False, help="use https")
|
||||||
parser.add_argument(
|
parser.add_argument("--httpsKey", type=str, default="ssl.key", help="path for the key of https")
|
||||||
"--httpsKey", type=str, default="ssl.key", help="path for the key of https"
|
parser.add_argument("--httpsCert", type=str, default="ssl.cert", help="path for the cert of https")
|
||||||
)
|
parser.add_argument("--httpsSelfSigned", type=strtobool, default=True, help="generate self-signed certificate")
|
||||||
parser.add_argument(
|
|
||||||
"--httpsCert", type=str, default="ssl.cert", help="path for the cert of https"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--httpsSelfSigned",
|
|
||||||
type=strtobool,
|
|
||||||
default=True,
|
|
||||||
help="generate self-signed certificate",
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument("--model_dir", type=str, help="path to model files")
|
parser.add_argument("--model_dir", type=str, help="path to model files")
|
||||||
parser.add_argument(
|
parser.add_argument("--sample_mode", type=str, default="production", help="sample_mode")
|
||||||
"--rvc_sample_mode", type=str, default="production", help="rvc_sample_mode"
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument("--content_vec_500", type=str, help="path to content_vec_500 model(pytorch)")
|
||||||
"--content_vec_500", type=str, help="path to content_vec_500 model(pytorch)"
|
parser.add_argument("--content_vec_500_onnx", type=str, help="path to content_vec_500 model(onnx)")
|
||||||
)
|
parser.add_argument("--content_vec_500_onnx_on", type=strtobool, default=False, help="use or not onnx for content_vec_500")
|
||||||
parser.add_argument(
|
parser.add_argument("--hubert_base", type=str, help="path to hubert_base model(pytorch)")
|
||||||
"--content_vec_500_onnx", type=str, help="path to content_vec_500 model(onnx)"
|
parser.add_argument("--hubert_base_jp", type=str, help="path to hubert_base_jp model(pytorch)")
|
||||||
)
|
parser.add_argument("--hubert_soft", type=str, help="path to hubert_soft model(pytorch)")
|
||||||
parser.add_argument(
|
parser.add_argument("--nsf_hifigan", type=str, help="path to nsf_hifigan model(pytorch)")
|
||||||
"--content_vec_500_onnx_on",
|
|
||||||
type=strtobool,
|
|
||||||
default=False,
|
|
||||||
help="use or not onnx for content_vec_500",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--hubert_base", type=str, help="path to hubert_base model(pytorch)"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--hubert_base_jp", type=str, help="path to hubert_base_jp model(pytorch)"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--hubert_soft", type=str, help="path to hubert_soft model(pytorch)"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--nsf_hifigan", type=str, help="path to nsf_hifigan model(pytorch)"
|
|
||||||
)
|
|
||||||
|
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
@ -111,76 +77,19 @@ def printMessage(message, level=0):
|
|||||||
print(f"\033[47m {message}\033[0m")
|
print(f"\033[47m {message}\033[0m")
|
||||||
|
|
||||||
|
|
||||||
def downloadWeight():
|
|
||||||
# content_vec_500 = (args.content_vec_500,)
|
|
||||||
# content_vec_500_onnx = (args.content_vec_500_onnx,)
|
|
||||||
# content_vec_500_onnx_on = (args.content_vec_500_onnx_on,)
|
|
||||||
hubert_base = args.hubert_base
|
|
||||||
hubert_base_jp = args.hubert_base_jp
|
|
||||||
hubert_soft = args.hubert_soft
|
|
||||||
nsf_hifigan = args.nsf_hifigan
|
|
||||||
|
|
||||||
# file exists check (currently only for rvc)
|
|
||||||
downloadParams = []
|
|
||||||
if os.path.exists(hubert_base) is False:
|
|
||||||
downloadParams.append(
|
|
||||||
{
|
|
||||||
"url": "https://huggingface.co/ddPn08/rvc-webui-models/resolve/main/embeddings/hubert_base.pt",
|
|
||||||
"saveTo": hubert_base,
|
|
||||||
"position": 0,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
if os.path.exists(hubert_base_jp) is False:
|
|
||||||
downloadParams.append(
|
|
||||||
{
|
|
||||||
"url": "https://huggingface.co/rinna/japanese-hubert-base/resolve/main/fairseq/model.pt",
|
|
||||||
"saveTo": hubert_base_jp,
|
|
||||||
"position": 1,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
if os.path.exists(hubert_soft) is False:
|
|
||||||
downloadParams.append(
|
|
||||||
{
|
|
||||||
"url": "https://huggingface.co/wok000/weights/resolve/main/ddsp-svc30/embedder/hubert-soft-0d54a1f4.pt",
|
|
||||||
"saveTo": hubert_soft,
|
|
||||||
"position": 2,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
if os.path.exists(nsf_hifigan) is False:
|
|
||||||
downloadParams.append(
|
|
||||||
{
|
|
||||||
"url": "https://huggingface.co/wok000/weights/resolve/main/ddsp-svc30/nsf_hifigan_20221211/model.bin",
|
|
||||||
"saveTo": nsf_hifigan,
|
|
||||||
"position": 3,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
nsf_hifigan_config = os.path.join(os.path.dirname(nsf_hifigan), "config.json")
|
|
||||||
|
|
||||||
if os.path.exists(nsf_hifigan_config) is False:
|
|
||||||
downloadParams.append(
|
|
||||||
{
|
|
||||||
"url": "https://huggingface.co/wok000/weights/raw/main/ddsp-svc30/nsf_hifigan_20221211/config.json",
|
|
||||||
"saveTo": nsf_hifigan_config,
|
|
||||||
"position": 4,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
with ThreadPoolExecutor() as pool:
|
|
||||||
pool.map(download, downloadParams)
|
|
||||||
|
|
||||||
if (
|
|
||||||
os.path.exists(hubert_base) is False
|
|
||||||
or os.path.exists(hubert_base_jp) is False
|
|
||||||
or os.path.exists(hubert_soft) is False
|
|
||||||
or os.path.exists(nsf_hifigan) is False
|
|
||||||
or os.path.exists(nsf_hifigan_config) is False
|
|
||||||
):
|
|
||||||
printMessage("RVC用のモデルファイルのダウンロードに失敗しました。", level=2)
|
|
||||||
printMessage("failed to download weight for rvc", level=2)
|
|
||||||
|
|
||||||
|
|
||||||
parser = setupArgParser()
|
parser = setupArgParser()
|
||||||
args, unknown = parser.parse_known_args()
|
args, unknown = parser.parse_known_args()
|
||||||
|
voiceChangerParams = VoiceChangerParams(
|
||||||
|
model_dir=args.model_dir,
|
||||||
|
content_vec_500=args.content_vec_500,
|
||||||
|
content_vec_500_onnx=args.content_vec_500_onnx,
|
||||||
|
content_vec_500_onnx_on=args.content_vec_500_onnx_on,
|
||||||
|
hubert_base=args.hubert_base,
|
||||||
|
hubert_base_jp=args.hubert_base_jp,
|
||||||
|
hubert_soft=args.hubert_soft,
|
||||||
|
nsf_hifigan=args.nsf_hifigan,
|
||||||
|
sample_mode=args.sample_mode,
|
||||||
|
)
|
||||||
|
|
||||||
printMessage(f"Booting PHASE :{__name__}", level=2)
|
printMessage(f"Booting PHASE :{__name__}", level=2)
|
||||||
|
|
||||||
@ -199,24 +108,6 @@ def localServer(logLevel: str = "critical"):
|
|||||||
|
|
||||||
if __name__ == "MMVCServerSIO":
|
if __name__ == "MMVCServerSIO":
|
||||||
mp.freeze_support()
|
mp.freeze_support()
|
||||||
voiceChangerParams = VoiceChangerParams(
|
|
||||||
model_dir=args.model_dir,
|
|
||||||
content_vec_500=args.content_vec_500,
|
|
||||||
content_vec_500_onnx=args.content_vec_500_onnx,
|
|
||||||
content_vec_500_onnx_on=args.content_vec_500_onnx_on,
|
|
||||||
hubert_base=args.hubert_base,
|
|
||||||
hubert_base_jp=args.hubert_base_jp,
|
|
||||||
hubert_soft=args.hubert_soft,
|
|
||||||
nsf_hifigan=args.nsf_hifigan,
|
|
||||||
rvc_sample_mode=args.rvc_sample_mode,
|
|
||||||
)
|
|
||||||
|
|
||||||
if (
|
|
||||||
os.path.exists(voiceChangerParams.hubert_base) is False
|
|
||||||
or os.path.exists(voiceChangerParams.hubert_base_jp) is False
|
|
||||||
):
|
|
||||||
printMessage("RVC用のモデルファイルのダウンロードに失敗しました。", level=2)
|
|
||||||
printMessage("failed to download weight for rvc", level=2)
|
|
||||||
|
|
||||||
voiceChangerManager = VoiceChangerManager.get_instance(voiceChangerParams)
|
voiceChangerManager = VoiceChangerManager.get_instance(voiceChangerParams)
|
||||||
app_fastapi = MMVC_Rest.get_instance(voiceChangerManager, voiceChangerParams)
|
app_fastapi = MMVC_Rest.get_instance(voiceChangerManager, voiceChangerParams)
|
||||||
@ -230,20 +121,16 @@ if __name__ == "__main__":
|
|||||||
mp.freeze_support()
|
mp.freeze_support()
|
||||||
|
|
||||||
printMessage("Voice Changerを起動しています。", level=2)
|
printMessage("Voice Changerを起動しています。", level=2)
|
||||||
|
# ダウンロード(Weight)
|
||||||
# ダウンロード
|
|
||||||
downloadWeight()
|
|
||||||
os.makedirs(args.model_dir, exist_ok=True)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
sampleJsons = []
|
downloadWeight(voiceChangerParams)
|
||||||
sampleJsonUrls, sampleModels = getRVCSampleJsonAndModelIds(args.rvc_sample_mode)
|
except WeightDownladException:
|
||||||
for url in sampleJsonUrls:
|
printMessage("RVC用のモデルファイルのダウンロードに失敗しました。", level=2)
|
||||||
filename = os.path.basename(url)
|
printMessage("failed to download weight for rvc", level=2)
|
||||||
download_no_tqdm({"url": url, "saveTo": filename, "position": 0})
|
|
||||||
sampleJsons.append(filename)
|
# ダウンロード(Sample)
|
||||||
if checkRvcModelExist(args.model_dir) is False:
|
try:
|
||||||
downloadInitialSampleModels(sampleJsons, sampleModels, args.model_dir)
|
downloadInitialSamples(args.sample_mode, args.model_dir)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("[Voice Changer] loading sample failed", e)
|
print("[Voice Changer] loading sample failed", e)
|
||||||
|
|
||||||
@ -280,9 +167,7 @@ if __name__ == "__main__":
|
|||||||
)
|
)
|
||||||
key_path = os.path.join(SSL_KEY_DIR, keyname)
|
key_path = os.path.join(SSL_KEY_DIR, keyname)
|
||||||
cert_path = os.path.join(SSL_KEY_DIR, certname)
|
cert_path = os.path.join(SSL_KEY_DIR, certname)
|
||||||
printMessage(
|
printMessage(f"protocol: HTTPS(self-signed), key:{key_path}, cert:{cert_path}", level=1)
|
||||||
f"protocol: HTTPS(self-signed), key:{key_path}, cert:{cert_path}", level=1
|
|
||||||
)
|
|
||||||
|
|
||||||
elif args.https and args.httpsSelfSigned == 0:
|
elif args.https and args.httpsSelfSigned == 0:
|
||||||
# HTTPS
|
# HTTPS
|
||||||
@ -336,16 +221,12 @@ if __name__ == "__main__":
|
|||||||
p.start()
|
p.start()
|
||||||
try:
|
try:
|
||||||
if sys.platform.startswith("win"):
|
if sys.platform.startswith("win"):
|
||||||
process = subprocess.Popen(
|
process = subprocess.Popen([NATIVE_CLIENT_FILE_WIN, "-u", f"http://localhost:{PORT}/"])
|
||||||
[NATIVE_CLIENT_FILE_WIN, "-u", f"http://localhost:{PORT}/"]
|
|
||||||
)
|
|
||||||
return_code = process.wait()
|
return_code = process.wait()
|
||||||
print("client closed.")
|
print("client closed.")
|
||||||
p.terminate()
|
p.terminate()
|
||||||
elif sys.platform.startswith("darwin"):
|
elif sys.platform.startswith("darwin"):
|
||||||
process = subprocess.Popen(
|
process = subprocess.Popen([NATIVE_CLIENT_FILE_MAC, "-u", f"http://localhost:{PORT}/"])
|
||||||
[NATIVE_CLIENT_FILE_MAC, "-u", f"http://localhost:{PORT}/"]
|
|
||||||
)
|
|
||||||
return_code = process.wait()
|
return_code = process.wait()
|
||||||
print("client closed.")
|
print("client closed.")
|
||||||
p.terminate()
|
p.terminate()
|
||||||
|
@ -1,44 +0,0 @@
|
|||||||
from dataclasses import dataclass, field
|
|
||||||
import json
|
|
||||||
|
|
||||||
from const import ModelType
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class RVCModelSample:
|
|
||||||
id: str = ""
|
|
||||||
lang: str = ""
|
|
||||||
tag: list[str] = field(default_factory=lambda: [])
|
|
||||||
name: str = ""
|
|
||||||
modelUrl: str = ""
|
|
||||||
indexUrl: str = ""
|
|
||||||
termsOfUseUrl: str = ""
|
|
||||||
icon: str = ""
|
|
||||||
credit: str = ""
|
|
||||||
description: str = ""
|
|
||||||
|
|
||||||
sampleRate: int = 48000
|
|
||||||
modelType: str = ""
|
|
||||||
f0: bool = True
|
|
||||||
|
|
||||||
|
|
||||||
def getModelSamples(jsonFiles: list[str], modelType: ModelType):
|
|
||||||
try:
|
|
||||||
samples: list[RVCModelSample] = []
|
|
||||||
for file in jsonFiles:
|
|
||||||
with open(file, "r", encoding="utf-8") as f:
|
|
||||||
jsonDict = json.load(f)
|
|
||||||
|
|
||||||
modelList = jsonDict[modelType]
|
|
||||||
if modelType == "RVC":
|
|
||||||
for s in modelList:
|
|
||||||
modelSample = RVCModelSample(**s)
|
|
||||||
samples.append(modelSample)
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise RuntimeError(f"Unknown model type {modelType}")
|
|
||||||
return samples
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print("[Voice Changer] loading sample info error:", e)
|
|
||||||
return None
|
|
137
server/const.py
137
server/const.py
@ -5,33 +5,21 @@ import tempfile
|
|||||||
from typing import Literal, TypeAlias
|
from typing import Literal, TypeAlias
|
||||||
|
|
||||||
|
|
||||||
ModelType: TypeAlias = Literal[
|
VoiceChangerType: TypeAlias = Literal[
|
||||||
"MMVCv15",
|
|
||||||
"MMVCv13",
|
"MMVCv13",
|
||||||
"so-vits-svc-40v2",
|
"MMVCv15",
|
||||||
"so-vits-svc-40",
|
"so-vits-svc-40",
|
||||||
"so-vits-svc-40_c",
|
|
||||||
"DDSP-SVC",
|
"DDSP-SVC",
|
||||||
"RVC",
|
"RVC",
|
||||||
]
|
]
|
||||||
|
|
||||||
ERROR_NO_ONNX_SESSION = "ERROR_NO_ONNX_SESSION"
|
ERROR_NO_ONNX_SESSION = "ERROR_NO_ONNX_SESSION"
|
||||||
|
|
||||||
|
|
||||||
tmpdir = tempfile.TemporaryDirectory()
|
tmpdir = tempfile.TemporaryDirectory()
|
||||||
# print("generate tmpdir:::",tmpdir)
|
|
||||||
SSL_KEY_DIR = os.path.join(tmpdir.name, "keys") if hasattr(sys, "_MEIPASS") else "keys"
|
SSL_KEY_DIR = os.path.join(tmpdir.name, "keys") if hasattr(sys, "_MEIPASS") else "keys"
|
||||||
MODEL_DIR = os.path.join(tmpdir.name, "logs") if hasattr(sys, "_MEIPASS") else "logs"
|
MODEL_DIR = os.path.join(tmpdir.name, "logs") if hasattr(sys, "_MEIPASS") else "logs"
|
||||||
UPLOAD_DIR = (
|
UPLOAD_DIR = os.path.join(tmpdir.name, "upload_dir") if hasattr(sys, "_MEIPASS") else "upload_dir"
|
||||||
os.path.join(tmpdir.name, "upload_dir")
|
NATIVE_CLIENT_FILE_WIN = os.path.join(sys._MEIPASS, "voice-changer-native-client.exe") if hasattr(sys, "_MEIPASS") else "voice-changer-native-client" # type: ignore
|
||||||
if hasattr(sys, "_MEIPASS")
|
|
||||||
else "upload_dir"
|
|
||||||
)
|
|
||||||
NATIVE_CLIENT_FILE_WIN = (
|
|
||||||
os.path.join(sys._MEIPASS, "voice-changer-native-client.exe") # type: ignore
|
|
||||||
if hasattr(sys, "_MEIPASS")
|
|
||||||
else "voice-changer-native-client"
|
|
||||||
)
|
|
||||||
NATIVE_CLIENT_FILE_MAC = (
|
NATIVE_CLIENT_FILE_MAC = (
|
||||||
os.path.join(
|
os.path.join(
|
||||||
sys._MEIPASS, # type: ignore
|
sys._MEIPASS, # type: ignore
|
||||||
@ -44,25 +32,12 @@ NATIVE_CLIENT_FILE_MAC = (
|
|||||||
else "voice-changer-native-client"
|
else "voice-changer-native-client"
|
||||||
)
|
)
|
||||||
|
|
||||||
HUBERT_ONNX_MODEL_PATH = (
|
TMP_DIR = os.path.join(tmpdir.name, "tmp_dir") if hasattr(sys, "_MEIPASS") else "tmp_dir"
|
||||||
os.path.join(sys._MEIPASS, "model_hubert/hubert_simple.onnx") # type: ignore
|
|
||||||
if hasattr(sys, "_MEIPASS")
|
|
||||||
else "model_hubert/hubert_simple.onnx"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
TMP_DIR = (
|
|
||||||
os.path.join(tmpdir.name, "tmp_dir") if hasattr(sys, "_MEIPASS") else "tmp_dir"
|
|
||||||
)
|
|
||||||
os.makedirs(TMP_DIR, exist_ok=True)
|
os.makedirs(TMP_DIR, exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
def getFrontendPath():
|
def getFrontendPath():
|
||||||
frontend_path = (
|
frontend_path = os.path.join(sys._MEIPASS, "dist") if hasattr(sys, "_MEIPASS") else "../client/demo/dist"
|
||||||
os.path.join(sys._MEIPASS, "dist")
|
|
||||||
if hasattr(sys, "_MEIPASS")
|
|
||||||
else "../client/demo/dist"
|
|
||||||
)
|
|
||||||
return frontend_path
|
return frontend_path
|
||||||
|
|
||||||
|
|
||||||
@ -100,84 +75,84 @@ class ServerAudioDeviceTypes(Enum):
|
|||||||
audiooutput = "audiooutput"
|
audiooutput = "audiooutput"
|
||||||
|
|
||||||
|
|
||||||
class RVCSampleMode(Enum):
|
RVCSampleMode: TypeAlias = Literal[
|
||||||
production = "production"
|
"production",
|
||||||
testOfficial = "testOfficial"
|
"testOfficial",
|
||||||
testDDPNTorch = "testDDPNTorch"
|
"testDDPNTorch",
|
||||||
testDDPNONNX = "testDDPNONNX"
|
"testDDPNONNX",
|
||||||
testONNXFull = "testONNXFull"
|
"testONNXFull",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def getRVCSampleJsonAndModelIds(mode: RVCSampleMode):
|
def getSampleJsonAndModelIds(mode: RVCSampleMode):
|
||||||
if mode == RVCSampleMode.production.value:
|
if mode == "production":
|
||||||
return [
|
return [
|
||||||
# "https://huggingface.co/wok000/vcclient_model/raw/main/samples_0001.json",
|
# "https://huggingface.co/wok000/vcclient_model/raw/main/samples_0001.json",
|
||||||
# "https://huggingface.co/wok000/vcclient_model/raw/main/samples_0002.json",
|
# "https://huggingface.co/wok000/vcclient_model/raw/main/samples_0002.json",
|
||||||
"https://huggingface.co/wok000/vcclient_model/raw/main/samples_0003_t.json",
|
"https://huggingface.co/wok000/vcclient_model/raw/main/samples_0003_t2.json",
|
||||||
"https://huggingface.co/wok000/vcclient_model/raw/main/samples_0003_o.json",
|
"https://huggingface.co/wok000/vcclient_model/raw/main/samples_0003_o2.json",
|
||||||
], [
|
], [
|
||||||
("TokinaShigure_o", True),
|
("TokinaShigure_o", {"useIndex": True}),
|
||||||
("KikotoMahiro_o", False),
|
("KikotoMahiro_o", {"useIndex": False}),
|
||||||
("Amitaro_o", False),
|
("Amitaro_o", {"useIndex": False}),
|
||||||
("Tsukuyomi-chan_o", False),
|
("Tsukuyomi-chan_o", {"useIndex": False}),
|
||||||
]
|
]
|
||||||
elif mode == RVCSampleMode.testOfficial.value:
|
elif mode == "testOfficial":
|
||||||
return [
|
return [
|
||||||
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_official_v1_v2.json",
|
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_official_v1_v2.json",
|
||||||
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_ddpn_v1_v2.json",
|
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_ddpn_v1_v2.json",
|
||||||
], [
|
], [
|
||||||
("test-official-v1-f0-48k-l9-hubert_t", True),
|
("test-official-v1-f0-48k-l9-hubert_t", {"useIndex": True}),
|
||||||
("test-official-v1-nof0-48k-l9-hubert_t", False),
|
("test-official-v1-nof0-48k-l9-hubert_t", {"useIndex": False}),
|
||||||
("test-official-v2-f0-40k-l12-hubert_t", False),
|
("test-official-v2-f0-40k-l12-hubert_t", {"useIndex": False}),
|
||||||
("test-official-v2-nof0-40k-l12-hubert_t", False),
|
("test-official-v2-nof0-40k-l12-hubert_t", {"useIndex": False}),
|
||||||
("test-official-v1-f0-48k-l9-hubert_o", True),
|
("test-official-v1-f0-48k-l9-hubert_o", {"useIndex": True}),
|
||||||
("test-official-v1-nof0-48k-l9-hubert_o", False),
|
("test-official-v1-nof0-48k-l9-hubert_o", {"useIndex": False}),
|
||||||
("test-official-v2-f0-40k-l12-hubert_o", False),
|
("test-official-v2-f0-40k-l12-hubert_o", {"useIndex": False}),
|
||||||
("test-official-v2-nof0-40k-l12-hubert_o", False),
|
("test-official-v2-nof0-40k-l12-hubert_o", {"useIndex": False}),
|
||||||
]
|
]
|
||||||
elif mode == RVCSampleMode.testDDPNTorch.value:
|
elif mode == "testDDPNTorch":
|
||||||
return [
|
return [
|
||||||
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_official_v1_v2.json",
|
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_official_v1_v2.json",
|
||||||
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_ddpn_v1_v2.json",
|
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_ddpn_v1_v2.json",
|
||||||
], [
|
], [
|
||||||
("test-ddpn-v1-f0-48k-l9-hubert_t", False),
|
("test-ddpn-v1-f0-48k-l9-hubert_t", {"useIndex": False}),
|
||||||
("test-ddpn-v1-nof0-48k-l9-hubert_t", False),
|
("test-ddpn-v1-nof0-48k-l9-hubert_t", {"useIndex": False}),
|
||||||
("test-ddpn-v2-f0-40k-l12-hubert_t", False),
|
("test-ddpn-v2-f0-40k-l12-hubert_t", {"useIndex": False}),
|
||||||
("test-ddpn-v2-nof0-40k-l12-hubert_t", False),
|
("test-ddpn-v2-nof0-40k-l12-hubert_t", {"useIndex": False}),
|
||||||
("test-ddpn-v2-f0-40k-l12-hubert_jp_t", False),
|
("test-ddpn-v2-f0-40k-l12-hubert_jp_t", {"useIndex": False}),
|
||||||
("test-ddpn-v2-nof0-40k-l12-hubert_jp_t", False),
|
("test-ddpn-v2-nof0-40k-l12-hubert_jp_t", {"useIndex": False}),
|
||||||
]
|
]
|
||||||
elif mode == RVCSampleMode.testDDPNONNX.value:
|
elif mode == "testDDPNONNX":
|
||||||
return [
|
return [
|
||||||
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_official_v1_v2.json",
|
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_official_v1_v2.json",
|
||||||
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_ddpn_v1_v2.json",
|
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_ddpn_v1_v2.json",
|
||||||
], [
|
], [
|
||||||
("test-ddpn-v1-f0-48k-l9-hubert_o", False),
|
("test-ddpn-v1-f0-48k-l9-hubert_o", {"useIndex": False}),
|
||||||
("test-ddpn-v1-nof0-48k-l9-hubert_o", False),
|
("test-ddpn-v1-nof0-48k-l9-hubert_o", {"useIndex": False}),
|
||||||
("test-ddpn-v2-f0-40k-l12-hubert_o", False),
|
("test-ddpn-v2-f0-40k-l12-hubert_o", {"useIndex": False}),
|
||||||
("test-ddpn-v2-nof0-40k-l12-hubert_o", False),
|
("test-ddpn-v2-nof0-40k-l12-hubert_o", {"useIndex": False}),
|
||||||
("test-ddpn-v2-f0-40k-l12-hubert_jp_o", False),
|
("test-ddpn-v2-f0-40k-l12-hubert_jp_o", {"useIndex": False}),
|
||||||
("test-ddpn-v2-nof0-40k-l12-hubert_jp_o", False),
|
("test-ddpn-v2-nof0-40k-l12-hubert_jp_o", {"useIndex": False}),
|
||||||
]
|
]
|
||||||
elif mode == RVCSampleMode.testONNXFull.value:
|
elif mode == "testONNXFull":
|
||||||
return [
|
return [
|
||||||
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_official_v1_v2.json",
|
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_official_v1_v2.json",
|
||||||
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_ddpn_v1_v2.json",
|
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_ddpn_v1_v2.json",
|
||||||
], [
|
], [
|
||||||
("test-official-v1-f0-48k-l9-hubert_o_full", False),
|
("test-official-v1-f0-48k-l9-hubert_o_full", {"useIndex": False}),
|
||||||
("test-official-v1-nof0-48k-l9-hubert_o_full", False),
|
("test-official-v1-nof0-48k-l9-hubert_o_full", {"useIndex": False}),
|
||||||
("test-official-v2-f0-40k-l12-hubert_o_full", False),
|
("test-official-v2-f0-40k-l12-hubert_o_full", {"useIndex": False}),
|
||||||
("test-official-v2-nof0-40k-l12-hubert_o_full", False),
|
("test-official-v2-nof0-40k-l12-hubert_o_full", {"useIndex": False}),
|
||||||
("test-ddpn-v1-f0-48k-l9-hubert_o_full", False),
|
("test-ddpn-v1-f0-48k-l9-hubert_o_full", {"useIndex": False}),
|
||||||
("test-ddpn-v1-nof0-48k-l9-hubert_o_full", False),
|
("test-ddpn-v1-nof0-48k-l9-hubert_o_full", {"useIndex": False}),
|
||||||
("test-ddpn-v2-f0-40k-l12-hubert_o_full", False),
|
("test-ddpn-v2-f0-40k-l12-hubert_o_full", {"useIndex": False}),
|
||||||
("test-ddpn-v2-nof0-40k-l12-hubert_o_full", False),
|
("test-ddpn-v2-nof0-40k-l12-hubert_o_full", {"useIndex": False}),
|
||||||
("test-ddpn-v2-f0-40k-l12-hubert_jp_o_full", False),
|
("test-ddpn-v2-f0-40k-l12-hubert_jp_o_full", {"useIndex": False}),
|
||||||
("test-ddpn-v2-nof0-40k-l12-hubert_jp_o_full", False),
|
("test-ddpn-v2-nof0-40k-l12-hubert_jp_o_full", {"useIndex": False}),
|
||||||
]
|
]
|
||||||
else:
|
else:
|
||||||
return [], []
|
return [], []
|
||||||
|
|
||||||
|
|
||||||
RVC_MODEL_DIRNAME = "rvc"
|
MAX_SLOT_NUM = 10
|
||||||
RVC_MAX_SLOT_NUM = 10
|
|
||||||
|
37
server/data/ModelSample.py
Normal file
37
server/data/ModelSample.py
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import TypeAlias, Union, Any
|
||||||
|
from const import VoiceChangerType
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ModelSample:
|
||||||
|
id: str = ""
|
||||||
|
voiceChangerType: VoiceChangerType | None = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RVCModelSample(ModelSample):
|
||||||
|
voiceChangerType: VoiceChangerType = "RVC"
|
||||||
|
lang: str = ""
|
||||||
|
tag: list[str] = field(default_factory=lambda: [])
|
||||||
|
name: str = ""
|
||||||
|
modelUrl: str = ""
|
||||||
|
indexUrl: str = ""
|
||||||
|
termsOfUseUrl: str = ""
|
||||||
|
icon: str = ""
|
||||||
|
credit: str = ""
|
||||||
|
description: str = ""
|
||||||
|
|
||||||
|
sampleRate: int = 48000
|
||||||
|
modelType: str = ""
|
||||||
|
f0: bool = True
|
||||||
|
|
||||||
|
|
||||||
|
ModelSamples: TypeAlias = Union[ModelSample, RVCModelSample]
|
||||||
|
|
||||||
|
|
||||||
|
def generateModelSample(params: Any) -> ModelSamples:
|
||||||
|
if params["voiceChangerType"] == "RVC":
|
||||||
|
return RVCModelSample(**params)
|
||||||
|
else:
|
||||||
|
return ModelSample(**{k: v for k, v in params.items() if k in ModelSample.__annotations__})
|
59
server/data/ModelSlot.py
Normal file
59
server/data/ModelSlot.py
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
from typing import TypeAlias, Union
|
||||||
|
from const import EnumInferenceTypes, EnumEmbedderTypes, VoiceChangerType
|
||||||
|
|
||||||
|
from dataclasses import dataclass, asdict
|
||||||
|
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ModelSlot:
|
||||||
|
voiceChangerType: VoiceChangerType | None = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RVCModelSlot(ModelSlot):
|
||||||
|
voiceChangerType: VoiceChangerType = "RVC"
|
||||||
|
modelFile: str = ""
|
||||||
|
indexFile: str = ""
|
||||||
|
defaultTune: int = 0
|
||||||
|
defaultIndexRatio: int = 1
|
||||||
|
defaultProtect: float = 0.5
|
||||||
|
isONNX: bool = False
|
||||||
|
modelType: str = EnumInferenceTypes.pyTorchRVC.value
|
||||||
|
samplingRate: int = -1
|
||||||
|
f0: bool = True
|
||||||
|
embChannels: int = 256
|
||||||
|
embOutputLayer: int = 9
|
||||||
|
useFinalProj: bool = True
|
||||||
|
deprecated: bool = False
|
||||||
|
embedder: str = EnumEmbedderTypes.hubert.value
|
||||||
|
|
||||||
|
name: str = ""
|
||||||
|
description: str = ""
|
||||||
|
credit: str = ""
|
||||||
|
termsOfUseUrl: str = ""
|
||||||
|
sampleId: str = ""
|
||||||
|
iconFile: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
ModelSlots: TypeAlias = Union[ModelSlot, RVCModelSlot]
|
||||||
|
|
||||||
|
|
||||||
|
def loadSlotInfo(model_dir: str, slotIndex: int) -> ModelSlots:
|
||||||
|
slotDir = os.path.join(model_dir, str(slotIndex))
|
||||||
|
jsonFile = os.path.join(slotDir, "params.json")
|
||||||
|
if not os.path.exists(jsonFile):
|
||||||
|
return ModelSlot()
|
||||||
|
jsonDict = json.load(open(os.path.join(slotDir, "params.json")))
|
||||||
|
slotInfo = ModelSlot(**{k: v for k, v in jsonDict.items() if k in ModelSlot.__annotations__})
|
||||||
|
if slotInfo.voiceChangerType == "RVC":
|
||||||
|
return RVCModelSlot(**jsonDict)
|
||||||
|
else:
|
||||||
|
return ModelSlot()
|
||||||
|
|
||||||
|
|
||||||
|
def saveSlotInfo(model_dir: str, slotIndex: int, slotInfo: ModelSlots):
|
||||||
|
slotDir = os.path.join(model_dir, str(slotIndex))
|
||||||
|
json.dump(asdict(slotInfo), open(os.path.join(slotDir, "params.json"), "w"))
|
@ -69,19 +69,14 @@ class MMVC_Rest:
|
|||||||
StaticFiles(directory=f"{getFrontendPath()}", html=True),
|
StaticFiles(directory=f"{getFrontendPath()}", html=True),
|
||||||
name="static",
|
name="static",
|
||||||
)
|
)
|
||||||
app_fastapi.mount(
|
app_fastapi.mount("/tmp", StaticFiles(directory=f"{TMP_DIR}"), name="static")
|
||||||
"/tmp", StaticFiles(directory=f"{TMP_DIR}"), name="static"
|
app_fastapi.mount("/upload_dir", StaticFiles(directory=f"{UPLOAD_DIR}"), name="static")
|
||||||
)
|
|
||||||
app_fastapi.mount(
|
|
||||||
"/upload_dir", StaticFiles(directory=f"{UPLOAD_DIR}"), name="static"
|
|
||||||
)
|
|
||||||
|
|
||||||
if sys.platform.startswith("darwin"):
|
if sys.platform.startswith("darwin"):
|
||||||
p1 = os.path.dirname(sys._MEIPASS)
|
p1 = os.path.dirname(sys._MEIPASS)
|
||||||
p2 = os.path.dirname(p1)
|
p2 = os.path.dirname(p1)
|
||||||
p3 = os.path.dirname(p2)
|
p3 = os.path.dirname(p2)
|
||||||
model_dir = os.path.join(p3, voiceChangerParams.model_dir)
|
model_dir = os.path.join(p3, voiceChangerParams.model_dir)
|
||||||
print("mac model_dir:", model_dir)
|
|
||||||
app_fastapi.mount(
|
app_fastapi.mount(
|
||||||
f"/{voiceChangerParams.model_dir}",
|
f"/{voiceChangerParams.model_dir}",
|
||||||
StaticFiles(directory=model_dir),
|
StaticFiles(directory=model_dir),
|
||||||
|
@ -1,6 +1,4 @@
|
|||||||
import json
|
|
||||||
import os
|
import os
|
||||||
import shutil
|
|
||||||
from typing import Union
|
from typing import Union
|
||||||
from fastapi import APIRouter
|
from fastapi import APIRouter
|
||||||
from fastapi.encoders import jsonable_encoder
|
from fastapi.encoders import jsonable_encoder
|
||||||
@ -10,8 +8,7 @@ from fastapi import UploadFile, File, Form
|
|||||||
from restapi.mods.FileUploader import upload_file, concat_file_chunks
|
from restapi.mods.FileUploader import upload_file, concat_file_chunks
|
||||||
from voice_changer.VoiceChangerManager import VoiceChangerManager
|
from voice_changer.VoiceChangerManager import VoiceChangerManager
|
||||||
|
|
||||||
from const import MODEL_DIR, UPLOAD_DIR, ModelType
|
from const import MODEL_DIR, UPLOAD_DIR, VoiceChangerType
|
||||||
from voice_changer.utils.LoadModelParams import LoadModelParams
|
|
||||||
|
|
||||||
|
|
||||||
os.makedirs(UPLOAD_DIR, exist_ok=True)
|
os.makedirs(UPLOAD_DIR, exist_ok=True)
|
||||||
@ -24,123 +21,130 @@ class MMVC_Rest_Fileuploader:
|
|||||||
self.router = APIRouter()
|
self.router = APIRouter()
|
||||||
self.router.add_api_route("/info", self.get_info, methods=["GET"])
|
self.router.add_api_route("/info", self.get_info, methods=["GET"])
|
||||||
self.router.add_api_route("/performance", self.get_performance, methods=["GET"])
|
self.router.add_api_route("/performance", self.get_performance, methods=["GET"])
|
||||||
self.router.add_api_route(
|
self.router.add_api_route("/upload_file", self.post_upload_file, methods=["POST"])
|
||||||
"/upload_file", self.post_upload_file, methods=["POST"]
|
self.router.add_api_route("/concat_uploaded_file", self.post_concat_uploaded_file, methods=["POST"])
|
||||||
)
|
self.router.add_api_route("/update_settings", self.post_update_settings, methods=["POST"])
|
||||||
self.router.add_api_route(
|
|
||||||
"/concat_uploaded_file", self.post_concat_uploaded_file, methods=["POST"]
|
|
||||||
)
|
|
||||||
self.router.add_api_route(
|
|
||||||
"/update_settings", self.post_update_settings, methods=["POST"]
|
|
||||||
)
|
|
||||||
self.router.add_api_route("/load_model", self.post_load_model, methods=["POST"])
|
self.router.add_api_route("/load_model", self.post_load_model, methods=["POST"])
|
||||||
self.router.add_api_route("/model_type", self.post_model_type, methods=["POST"])
|
self.router.add_api_route("/model_type", self.post_model_type, methods=["POST"])
|
||||||
self.router.add_api_route("/model_type", self.get_model_type, methods=["GET"])
|
# self.router.add_api_route("/model_type", self.get_model_type, methods=["GET"])
|
||||||
self.router.add_api_route("/onnx", self.get_onnx, methods=["GET"])
|
self.router.add_api_route("/onnx", self.get_onnx, methods=["GET"])
|
||||||
self.router.add_api_route(
|
self.router.add_api_route("/merge_model", self.post_merge_models, methods=["POST"])
|
||||||
"/merge_model", self.post_merge_models, methods=["POST"]
|
self.router.add_api_route("/update_model_default", self.post_update_model_default, methods=["POST"])
|
||||||
)
|
self.router.add_api_route("/update_model_info", self.post_update_model_info, methods=["POST"])
|
||||||
self.router.add_api_route(
|
self.router.add_api_route("/upload_model_assets", self.post_upload_model_assets, methods=["POST"])
|
||||||
"/update_model_default", self.post_update_model_default, methods=["POST"]
|
|
||||||
)
|
|
||||||
self.router.add_api_route(
|
|
||||||
"/update_model_info", self.post_update_model_info, methods=["POST"]
|
|
||||||
)
|
|
||||||
self.router.add_api_route(
|
|
||||||
"/upload_model_assets", self.post_upload_model_assets, methods=["POST"]
|
|
||||||
)
|
|
||||||
|
|
||||||
def post_upload_file(self, file: UploadFile = File(...), filename: str = Form(...)):
|
def post_upload_file(self, file: UploadFile = File(...), filename: str = Form(...)):
|
||||||
res = upload_file(UPLOAD_DIR, file, filename)
|
try:
|
||||||
json_compatible_item_data = jsonable_encoder(res)
|
res = upload_file(UPLOAD_DIR, file, filename)
|
||||||
return JSONResponse(content=json_compatible_item_data)
|
json_compatible_item_data = jsonable_encoder(res)
|
||||||
|
return JSONResponse(content=json_compatible_item_data)
|
||||||
|
except Exception as e:
|
||||||
|
print("[Voice Changer] ", e)
|
||||||
|
|
||||||
def post_concat_uploaded_file(
|
def post_concat_uploaded_file(self, filename: str = Form(...), filenameChunkNum: int = Form(...)):
|
||||||
self, filename: str = Form(...), filenameChunkNum: int = Form(...)
|
try:
|
||||||
):
|
res = concat_file_chunks(UPLOAD_DIR, filename, filenameChunkNum, UPLOAD_DIR)
|
||||||
res = concat_file_chunks(UPLOAD_DIR, filename, filenameChunkNum, UPLOAD_DIR)
|
json_compatible_item_data = jsonable_encoder(res)
|
||||||
json_compatible_item_data = jsonable_encoder(res)
|
return JSONResponse(content=json_compatible_item_data)
|
||||||
return JSONResponse(content=json_compatible_item_data)
|
except Exception as e:
|
||||||
|
print("[Voice Changer] ", e)
|
||||||
|
|
||||||
def get_info(self):
|
def get_info(self):
|
||||||
info = self.voiceChangerManager.get_info()
|
try:
|
||||||
json_compatible_item_data = jsonable_encoder(info)
|
info = self.voiceChangerManager.get_info()
|
||||||
return JSONResponse(content=json_compatible_item_data)
|
json_compatible_item_data = jsonable_encoder(info)
|
||||||
|
return JSONResponse(content=json_compatible_item_data)
|
||||||
|
except Exception as e:
|
||||||
|
print("[Voice Changer] ", e)
|
||||||
|
|
||||||
def get_performance(self):
|
def get_performance(self):
|
||||||
info = self.voiceChangerManager.get_performance()
|
try:
|
||||||
json_compatible_item_data = jsonable_encoder(info)
|
info = self.voiceChangerManager.get_performance()
|
||||||
return JSONResponse(content=json_compatible_item_data)
|
json_compatible_item_data = jsonable_encoder(info)
|
||||||
|
return JSONResponse(content=json_compatible_item_data)
|
||||||
|
except Exception as e:
|
||||||
|
print("[Voice Changer] ", e)
|
||||||
|
|
||||||
def post_update_settings(
|
def post_update_settings(self, key: str = Form(...), val: Union[int, str, float] = Form(...)):
|
||||||
self, key: str = Form(...), val: Union[int, str, float] = Form(...)
|
try:
|
||||||
):
|
print("[Voice Changer] update configuration:", key, val)
|
||||||
print("[Voice Changer] update configuration:", key, val)
|
info = self.voiceChangerManager.update_settings(key, val)
|
||||||
info = self.voiceChangerManager.update_settings(key, val)
|
json_compatible_item_data = jsonable_encoder(info)
|
||||||
json_compatible_item_data = jsonable_encoder(info)
|
return JSONResponse(content=json_compatible_item_data)
|
||||||
return JSONResponse(content=json_compatible_item_data)
|
except Exception as e:
|
||||||
|
print("[Voice Changer] ", e)
|
||||||
|
|
||||||
def post_load_model(
|
def post_load_model(
|
||||||
self,
|
self,
|
||||||
slot: int = Form(...),
|
slot: int = Form(...),
|
||||||
isHalf: bool = Form(...),
|
voiceChangerType: str = Form(...),
|
||||||
params: str = Form(...),
|
params: str = Form(...),
|
||||||
):
|
):
|
||||||
paramDict = json.loads(params)
|
try:
|
||||||
# print("paramDict", paramDict)
|
info = self.voiceChangerManager.loadModel(slot, voiceChangerType, params)
|
||||||
|
json_compatible_item_data = jsonable_encoder(info)
|
||||||
|
return JSONResponse(content=json_compatible_item_data)
|
||||||
|
except Exception as e:
|
||||||
|
print("[Voice Changer] ", e)
|
||||||
|
|
||||||
# Change Filepath
|
def post_model_type(self, modelType: VoiceChangerType = Form(...)):
|
||||||
newFilesDict = {}
|
try:
|
||||||
for key, val in paramDict["files"].items():
|
# info = self.voiceChangerManager.switchModelType(modelType)
|
||||||
if val != "-" and val != "":
|
# json_compatible_item_data = jsonable_encoder(info)
|
||||||
uploadPath = os.path.join(UPLOAD_DIR, val)
|
json_compatible_item_data = jsonable_encoder({"status": "ok"})
|
||||||
storePath = os.path.join(UPLOAD_DIR, f"{slot}", val)
|
return JSONResponse(content=json_compatible_item_data)
|
||||||
storeDir = os.path.dirname(storePath)
|
except Exception as e:
|
||||||
os.makedirs(storeDir, exist_ok=True)
|
print("[Voice Changer] ", e)
|
||||||
shutil.move(uploadPath, storePath)
|
|
||||||
newFilesDict[key] = storePath
|
|
||||||
paramDict["files"] = newFilesDict
|
|
||||||
|
|
||||||
props: LoadModelParams = LoadModelParams(
|
# def get_model_type(self):
|
||||||
slot=slot, isHalf=isHalf, params=paramDict
|
# try:
|
||||||
)
|
# # info = self.voiceChangerManager.getModelType()
|
||||||
|
# # json_compatible_item_data = jsonable_encoder(info)
|
||||||
info = self.voiceChangerManager.loadModel(props)
|
# print(
|
||||||
json_compatible_item_data = jsonable_encoder(info)
|
# "-------------- get_model_type",
|
||||||
return JSONResponse(content=json_compatible_item_data)
|
# )
|
||||||
|
# json_compatible_item_data = jsonable_encoder({"status": "ok"})
|
||||||
def post_model_type(self, modelType: ModelType = Form(...)):
|
# return JSONResponse(content=json_compatible_item_data)
|
||||||
info = self.voiceChangerManager.switchModelType(modelType)
|
# except Exception as e:
|
||||||
json_compatible_item_data = jsonable_encoder(info)
|
# print("[Voice Changer] ", e)
|
||||||
return JSONResponse(content=json_compatible_item_data)
|
|
||||||
|
|
||||||
def get_model_type(self):
|
|
||||||
info = self.voiceChangerManager.getModelType()
|
|
||||||
json_compatible_item_data = jsonable_encoder(info)
|
|
||||||
return JSONResponse(content=json_compatible_item_data)
|
|
||||||
|
|
||||||
def get_onnx(self):
|
def get_onnx(self):
|
||||||
info = self.voiceChangerManager.export2onnx()
|
try:
|
||||||
json_compatible_item_data = jsonable_encoder(info)
|
info = self.voiceChangerManager.export2onnx()
|
||||||
return JSONResponse(content=json_compatible_item_data)
|
json_compatible_item_data = jsonable_encoder(info)
|
||||||
|
return JSONResponse(content=json_compatible_item_data)
|
||||||
|
except Exception as e:
|
||||||
|
print("[Voice Changer] ", e)
|
||||||
|
|
||||||
def post_merge_models(self, request: str = Form(...)):
|
def post_merge_models(self, request: str = Form(...)):
|
||||||
print(request)
|
try:
|
||||||
info = self.voiceChangerManager.merge_models(request)
|
print(request)
|
||||||
json_compatible_item_data = jsonable_encoder(info)
|
info = self.voiceChangerManager.merge_models(request)
|
||||||
return JSONResponse(content=json_compatible_item_data)
|
json_compatible_item_data = jsonable_encoder(info)
|
||||||
|
return JSONResponse(content=json_compatible_item_data)
|
||||||
|
except Exception as e:
|
||||||
|
print("[Voice Changer] ", e)
|
||||||
|
|
||||||
def post_update_model_default(self):
|
def post_update_model_default(self):
|
||||||
info = self.voiceChangerManager.update_model_default()
|
try:
|
||||||
json_compatible_item_data = jsonable_encoder(info)
|
info = self.voiceChangerManager.update_model_default()
|
||||||
return JSONResponse(content=json_compatible_item_data)
|
json_compatible_item_data = jsonable_encoder(info)
|
||||||
|
return JSONResponse(content=json_compatible_item_data)
|
||||||
|
except Exception as e:
|
||||||
|
print("[Voice Changer] ", e)
|
||||||
|
|
||||||
def post_update_model_info(self, newData: str = Form(...)):
|
def post_update_model_info(self, newData: str = Form(...)):
|
||||||
info = self.voiceChangerManager.update_model_info(newData)
|
try:
|
||||||
json_compatible_item_data = jsonable_encoder(info)
|
info = self.voiceChangerManager.update_model_info(newData)
|
||||||
return JSONResponse(content=json_compatible_item_data)
|
json_compatible_item_data = jsonable_encoder(info)
|
||||||
|
return JSONResponse(content=json_compatible_item_data)
|
||||||
|
except Exception as e:
|
||||||
|
print("[Voice Changer] ", e)
|
||||||
|
|
||||||
def post_upload_model_assets(self, params: str = Form(...)):
|
def post_upload_model_assets(self, params: str = Form(...)):
|
||||||
info = self.voiceChangerManager.upload_model_assets(params)
|
try:
|
||||||
json_compatible_item_data = jsonable_encoder(info)
|
info = self.voiceChangerManager.upload_model_assets(params)
|
||||||
return JSONResponse(content=json_compatible_item_data)
|
json_compatible_item_data = jsonable_encoder(info)
|
||||||
|
return JSONResponse(content=json_compatible_item_data)
|
||||||
|
except Exception as e:
|
||||||
|
print("[Voice Changer] ", e)
|
||||||
|
@ -1,14 +0,0 @@
|
|||||||
|
|
||||||
from fastapi.responses import FileResponse
|
|
||||||
import os
|
|
||||||
|
|
||||||
|
|
||||||
def mod_get_model(modelFile: str):
|
|
||||||
modelPath = os.path.join("MMVC_Trainer/logs", modelFile)
|
|
||||||
return FileResponse(path=modelPath)
|
|
||||||
|
|
||||||
|
|
||||||
def mod_delete_model(modelFile: str):
|
|
||||||
modelPath = os.path.join("MMVC_Trainer/logs", modelFile)
|
|
||||||
os.remove(modelPath)
|
|
||||||
return {"Model deleted": f"{modelFile}"}
|
|
@ -1,23 +0,0 @@
|
|||||||
|
|
||||||
from fastapi.responses import JSONResponse
|
|
||||||
from fastapi.encoders import jsonable_encoder
|
|
||||||
from restapi.utils.files import get_file_list
|
|
||||||
import os
|
|
||||||
|
|
||||||
def mod_get_models():
|
|
||||||
gModels = get_file_list(f'MMVC_Trainer/logs/G*.pth')
|
|
||||||
dModels = get_file_list(f'MMVC_Trainer/logs/D*.pth')
|
|
||||||
configs = get_file_list(f'MMVC_Trainer/logs/config.json')
|
|
||||||
models = []
|
|
||||||
models.extend(gModels)
|
|
||||||
models.extend(dModels)
|
|
||||||
models.extend(configs)
|
|
||||||
models = [ os.path.basename(x) for x in models]
|
|
||||||
|
|
||||||
models = sorted(models)
|
|
||||||
data = {
|
|
||||||
"models":models
|
|
||||||
}
|
|
||||||
json_compatible_item_data = jsonable_encoder(data)
|
|
||||||
return JSONResponse(content=json_compatible_item_data)
|
|
||||||
|
|
@ -1,26 +0,0 @@
|
|||||||
from fastapi.responses import JSONResponse
|
|
||||||
from fastapi.encoders import jsonable_encoder
|
|
||||||
import os
|
|
||||||
|
|
||||||
MULTI_SPEAKER_SETTING_PATH = "MMVC_Trainer/dataset/multi_speaker_correspondence.txt"
|
|
||||||
def mod_get_multi_speaker_setting():
|
|
||||||
data = {}
|
|
||||||
if os.path.isfile(MULTI_SPEAKER_SETTING_PATH) == False:
|
|
||||||
with open(MULTI_SPEAKER_SETTING_PATH, "w") as f:
|
|
||||||
f.write("")
|
|
||||||
f.flush()
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
with open(MULTI_SPEAKER_SETTING_PATH, "r") as f:
|
|
||||||
setting = f.read()
|
|
||||||
data["multi_speaker_setting"] = setting
|
|
||||||
json_compatible_item_data = jsonable_encoder(data)
|
|
||||||
return JSONResponse(content=json_compatible_item_data)
|
|
||||||
|
|
||||||
|
|
||||||
def mod_post_multi_speaker_setting(setting:str):
|
|
||||||
with open(MULTI_SPEAKER_SETTING_PATH, "w") as f:
|
|
||||||
f.write(setting)
|
|
||||||
f.flush()
|
|
||||||
f.close()
|
|
||||||
return {"Write Multispeaker setting": f"{setting}"}
|
|
@ -1,15 +0,0 @@
|
|||||||
import shutil
|
|
||||||
from restapi.mods.Trainer_MultiSpeakerSetting import MULTI_SPEAKER_SETTING_PATH
|
|
||||||
|
|
||||||
def mod_delete_speaker(speaker:str):
|
|
||||||
shutil.rmtree(f"MMVC_Trainer/dataset/textful/{speaker}")
|
|
||||||
|
|
||||||
with open(MULTI_SPEAKER_SETTING_PATH, "r") as f:
|
|
||||||
setting = f.readlines()
|
|
||||||
|
|
||||||
filtered = filter(lambda x: x.startswith(f"{speaker}|")==False, setting)
|
|
||||||
with open(MULTI_SPEAKER_SETTING_PATH, "w") as f:
|
|
||||||
f.writelines(list(filtered))
|
|
||||||
f.flush()
|
|
||||||
f.close()
|
|
||||||
return {"Speaker deleted": f"{speaker}"}
|
|
@ -1,28 +0,0 @@
|
|||||||
from fastapi.responses import JSONResponse
|
|
||||||
from fastapi.encoders import jsonable_encoder
|
|
||||||
import os, base64
|
|
||||||
|
|
||||||
def mod_get_speaker_voice(speaker:str, voice:str):
|
|
||||||
wav_file = f'MMVC_Trainer/dataset/textful/{speaker}/wav/{voice}.wav'
|
|
||||||
text_file = f'MMVC_Trainer/dataset/textful/{speaker}/text/{voice}.txt'
|
|
||||||
readable_text_file = f'MMVC_Trainer/dataset/textful/{speaker}/readable_text/{voice}.txt'
|
|
||||||
|
|
||||||
data = {}
|
|
||||||
if os.path.exists(wav_file):
|
|
||||||
with open(wav_file, "rb") as f:
|
|
||||||
wav_data = f.read()
|
|
||||||
wav_data_base64 = base64.b64encode(wav_data).decode('utf-8')
|
|
||||||
data["wav"] = wav_data_base64
|
|
||||||
|
|
||||||
|
|
||||||
if os.path.exists(text_file):
|
|
||||||
with open(text_file, "r") as f:
|
|
||||||
text_data = f.read()
|
|
||||||
data["text"] = text_data
|
|
||||||
|
|
||||||
if os.path.exists(readable_text_file):
|
|
||||||
with open(readable_text_file, "r") as f:
|
|
||||||
text_data = f.read()
|
|
||||||
data["readable_text"] = text_data
|
|
||||||
json_compatible_item_data = jsonable_encoder(data)
|
|
||||||
return JSONResponse(content=json_compatible_item_data)
|
|
@ -1,22 +0,0 @@
|
|||||||
from fastapi.responses import JSONResponse
|
|
||||||
from fastapi.encoders import jsonable_encoder
|
|
||||||
from restapi.utils.files import get_file_list
|
|
||||||
import os
|
|
||||||
|
|
||||||
def mod_get_speaker_voices(speaker:str):
|
|
||||||
voices = get_file_list(f'MMVC_Trainer/dataset/textful/{speaker}/wav/*.wav')
|
|
||||||
|
|
||||||
texts = get_file_list(f'MMVC_Trainer/dataset/textful/{speaker}/text/*.txt')
|
|
||||||
|
|
||||||
readable_texts = get_file_list(f'MMVC_Trainer/dataset/textful/{speaker}/readable_text/*.txt')
|
|
||||||
|
|
||||||
items = voices
|
|
||||||
items.extend(texts)
|
|
||||||
items.extend(readable_texts)
|
|
||||||
items = [ os.path.splitext(os.path.basename(x))[0] for x in items]
|
|
||||||
items = sorted(set(items))
|
|
||||||
data = {
|
|
||||||
"voices":items
|
|
||||||
}
|
|
||||||
json_compatible_item_data = jsonable_encoder(data)
|
|
||||||
return JSONResponse(content=json_compatible_item_data)
|
|
@ -1,15 +0,0 @@
|
|||||||
from fastapi.responses import JSONResponse
|
|
||||||
from fastapi.encoders import jsonable_encoder
|
|
||||||
from restapi.utils.files import get_dir_list
|
|
||||||
import os
|
|
||||||
# CreateはFileUploaderで実装。
|
|
||||||
|
|
||||||
def mod_get_speakers():
|
|
||||||
os.makedirs("MMVC_Trainer/dataset/textful", exist_ok=True)
|
|
||||||
speakers = get_dir_list("MMVC_Trainer/dataset/textful/")
|
|
||||||
|
|
||||||
data = {
|
|
||||||
"speakers":sorted(speakers)
|
|
||||||
}
|
|
||||||
json_compatible_item_data = jsonable_encoder(data)
|
|
||||||
return JSONResponse(content=json_compatible_item_data)
|
|
@ -1,176 +0,0 @@
|
|||||||
import subprocess,os
|
|
||||||
from restapi.utils.files import get_file_list
|
|
||||||
from fastapi.responses import JSONResponse
|
|
||||||
from fastapi.encoders import jsonable_encoder
|
|
||||||
|
|
||||||
LOG_DIR = "info"
|
|
||||||
train_proc = None
|
|
||||||
|
|
||||||
SUCCESS = 0
|
|
||||||
ERROR = -1
|
|
||||||
### Submodule for Pre train
|
|
||||||
def sync_exec(cmd:str, log_path:str, cwd=None):
|
|
||||||
shortCmdStr = cmd[:20]
|
|
||||||
try:
|
|
||||||
with open(log_path, 'w') as log_file:
|
|
||||||
if cwd == None:
|
|
||||||
proc = subprocess.run(cmd, shell=True, text=True, stdout=log_file, stderr=log_file)
|
|
||||||
else:
|
|
||||||
proc = subprocess.run(cmd, shell=True, text=True, stdout=log_file, stderr=log_file, cwd=cwd)
|
|
||||||
print(f"{shortCmdStr} returncode:{proc.returncode}")
|
|
||||||
if proc.returncode != 0:
|
|
||||||
print(f"{shortCmdStr} exception:")
|
|
||||||
return (ERROR, f"returncode:{proc.returncode}")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"{shortCmdStr} exception:", str(e))
|
|
||||||
return (ERROR, str(e))
|
|
||||||
return (SUCCESS, "success")
|
|
||||||
|
|
||||||
def sync_exec_with_stdout(cmd:str, log_path:str):
|
|
||||||
shortCmdStr = cmd[:20]
|
|
||||||
try:
|
|
||||||
with open(log_path, 'w') as log_file:
|
|
||||||
proc = subprocess.run(cmd, shell=True, text=True, stdout=subprocess.PIPE,
|
|
||||||
stderr=log_file, cwd="MMVC_Trainer")
|
|
||||||
print(f"STDOUT{shortCmdStr}",proc.stdout)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"{shortCmdStr} exception:", str(e))
|
|
||||||
return (ERROR, str(e))
|
|
||||||
return (SUCCESS, proc.stdout)
|
|
||||||
|
|
||||||
|
|
||||||
def create_dataset():
|
|
||||||
cmd = "python3 create_dataset_jtalk.py -f train_config -s 24000 -m dataset/multi_speaker_correspondence.txt"
|
|
||||||
log_file = os.path.join(LOG_DIR, "log_create_dataset_jtalk.txt")
|
|
||||||
res = sync_exec(cmd, log_file, "MMVC_Trainer")
|
|
||||||
return res
|
|
||||||
|
|
||||||
def set_batch_size(batch:int):
|
|
||||||
cmd = "sed -i 's/\"batch_size\": [0-9]*/\"batch_size\": " + str(batch) + "/' MMVC_Trainer/configs/baseconfig.json"
|
|
||||||
log_file = os.path.join(LOG_DIR, "log_set_batch_size.txt")
|
|
||||||
res = sync_exec(cmd, log_file)
|
|
||||||
return res
|
|
||||||
|
|
||||||
def set_dummy_device_count():
|
|
||||||
cmd = 'sed -ie "s/torch.cuda.device_count()/1/" MMVC_Trainer/train_ms.py'
|
|
||||||
log_file = os.path.join(LOG_DIR, "log_set_dummy_device_count.txt")
|
|
||||||
res = sync_exec(cmd, log_file)
|
|
||||||
return res
|
|
||||||
|
|
||||||
### Submodule for Train
|
|
||||||
def exec_training(enable_finetuning:bool, GModel:str, DModel:str):
|
|
||||||
global train_proc
|
|
||||||
log_file = os.path.join(LOG_DIR, "training.txt")
|
|
||||||
|
|
||||||
# トレーニング開始確認(二重起動回避)
|
|
||||||
if train_proc != None:
|
|
||||||
status = train_proc.poll()
|
|
||||||
if status != None:
|
|
||||||
print("Training have ended.", status)
|
|
||||||
train_proc = None
|
|
||||||
else:
|
|
||||||
print("Training have stated.")
|
|
||||||
return (ERROR, "Training have started")
|
|
||||||
|
|
||||||
try:
|
|
||||||
with open(log_file, 'w') as log_file:
|
|
||||||
if enable_finetuning == True:
|
|
||||||
GModelPath = os.path.join("logs", GModel) # 実行時にcwdを指定しているのでフォルダはlogsでよい。
|
|
||||||
DModelPath = os.path.join("logs", DModel)
|
|
||||||
cmd = f'python3 train_ms.py -c configs/train_config.json -m ./ -fg {GModelPath} -fd {DModelPath}'
|
|
||||||
else:
|
|
||||||
cmd = 'python3 train_ms.py -c configs/train_config.json -m ./'
|
|
||||||
print("exec:",cmd)
|
|
||||||
train_proc = subprocess.Popen("exec "+cmd, shell=True, text=True, stdout=log_file, stderr=log_file, cwd="MMVC_Trainer")
|
|
||||||
print("Training stated")
|
|
||||||
print(f"returncode:{train_proc.returncode}")
|
|
||||||
except Exception as e:
|
|
||||||
print("start training exception:", str(e))
|
|
||||||
return (ERROR, str(e))
|
|
||||||
|
|
||||||
return (SUCCESS, "success")
|
|
||||||
|
|
||||||
def stop_training():
|
|
||||||
global train_proc
|
|
||||||
if train_proc == None:
|
|
||||||
print("Training have not stated.")
|
|
||||||
return (ERROR, "Training have not stated.")
|
|
||||||
|
|
||||||
status = train_proc.poll()
|
|
||||||
if status != None:
|
|
||||||
print("Training have already ended.", status)
|
|
||||||
train_proc = None
|
|
||||||
return (ERROR, "Training have already ended. " + status)
|
|
||||||
else:
|
|
||||||
train_proc.kill()
|
|
||||||
print("Training have stoped.")
|
|
||||||
return (SUCCESS, "success")
|
|
||||||
|
|
||||||
### Main
|
|
||||||
def mod_post_pre_training(batch:int):
|
|
||||||
res = set_batch_size(batch)
|
|
||||||
if res[0] == ERROR:
|
|
||||||
return {"result":"failed", "detail": f"Preprocess(set_batch_size) failed. {res[1]}"}
|
|
||||||
|
|
||||||
res = set_dummy_device_count()
|
|
||||||
if res[0] == ERROR:
|
|
||||||
return {"result":"failed", "detail": f"Preprocess(set_dummy_device_count) failed. {res[1]}"}
|
|
||||||
|
|
||||||
res = create_dataset()
|
|
||||||
if res[0] == ERROR:
|
|
||||||
return {"result":"failed", "detail": f"Preprocess failed(create_dataset). {res[1]}"}
|
|
||||||
|
|
||||||
return {"result":"success", "detail": f"Preprocess succeeded. {res[1]}"}
|
|
||||||
|
|
||||||
|
|
||||||
def mod_post_start_training(enable_finetuning:str, GModel:str, DModel:str):
|
|
||||||
print("START_TRAINING:::::::", enable_finetuning, GModel, DModel)
|
|
||||||
res = exec_training(enable_finetuning, GModel, DModel)
|
|
||||||
if res[0] == ERROR:
|
|
||||||
return {"result":"failed", "detail": f"Start training failed. {res[1]}"}
|
|
||||||
|
|
||||||
return {"result":"success", "detail": f"Start training succeeded. {res[1]}"}
|
|
||||||
|
|
||||||
def mod_post_stop_training():
|
|
||||||
res = stop_training()
|
|
||||||
if res[0] == ERROR:
|
|
||||||
return {"result":"failed", "detail": f"Stop training failed. {res[1]}"}
|
|
||||||
|
|
||||||
return {"result":"success", "detail": f"Stop training succeeded. {res[1]}"}
|
|
||||||
|
|
||||||
### DEBUG
|
|
||||||
def mod_get_related_files():
|
|
||||||
files = get_file_list(os.path.join(LOG_DIR,"*"))
|
|
||||||
files.extend([
|
|
||||||
"MMVC_Trainer/dataset/multi_speaker_correspondence.txt",
|
|
||||||
"MMVC_Trainer/train_ms.py",
|
|
||||||
])
|
|
||||||
files.extend(
|
|
||||||
get_file_list("MMVC_Trainer/configs/*")
|
|
||||||
)
|
|
||||||
|
|
||||||
res = []
|
|
||||||
for f in files:
|
|
||||||
size = os.path.getsize(f)
|
|
||||||
data = ""
|
|
||||||
if size < 1024*1024:
|
|
||||||
with open(f, "r") as input:
|
|
||||||
data = input.read()
|
|
||||||
|
|
||||||
res.append({
|
|
||||||
"name":f,
|
|
||||||
"size":size,
|
|
||||||
"data":data
|
|
||||||
})
|
|
||||||
|
|
||||||
json_compatible_item_data = jsonable_encoder(res)
|
|
||||||
return JSONResponse(content=json_compatible_item_data)
|
|
||||||
|
|
||||||
def mod_get_tail_training_log(num:int):
|
|
||||||
training_log_file = os.path.join(LOG_DIR, "training.txt")
|
|
||||||
res = sync_exec(f"cat {training_log_file} | sed -e 's/.*\r//' > /tmp/out","/dev/null")
|
|
||||||
cmd = f'tail -n {num} /tmp/out'
|
|
||||||
res = sync_exec_with_stdout(cmd, "/dev/null")
|
|
||||||
if res[0] == ERROR:
|
|
||||||
return {"result":"failed", "detail": f"Tail training log failed. {res[1]}"}
|
|
||||||
return {"result":"success", "detail":res[1]}
|
|
@ -1,26 +0,0 @@
|
|||||||
import os
|
|
||||||
import glob
|
|
||||||
|
|
||||||
|
|
||||||
# def get_file_list(top_dir):
|
|
||||||
# for root, dirs, files in os.walk(top_dir):
|
|
||||||
# for dir in dirs:
|
|
||||||
# dirPath = os.path.join(root, dir)
|
|
||||||
# print(f'dirPath = {dirPath}')
|
|
||||||
|
|
||||||
# for file in files:
|
|
||||||
# filePath = os.path.join(root, file)
|
|
||||||
# print(f'filePath = {filePath}')
|
|
||||||
|
|
||||||
|
|
||||||
def get_dir_list(top_dir):
|
|
||||||
dirlist = []
|
|
||||||
files = os.listdir(top_dir)
|
|
||||||
for filename in files:
|
|
||||||
if os.path.isdir(os.path.join(top_dir, filename)):
|
|
||||||
dirlist.append(filename)
|
|
||||||
return dirlist
|
|
||||||
|
|
||||||
|
|
||||||
def get_file_list(top_dir):
|
|
||||||
return glob.glob(top_dir)
|
|
165
server/utils/downloader/SampleDownloader.py
Normal file
165
server/utils/downloader/SampleDownloader.py
Normal file
@ -0,0 +1,165 @@
|
|||||||
|
import json
|
||||||
|
import os
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
from typing import Any, Tuple
|
||||||
|
|
||||||
|
from const import RVCSampleMode, getSampleJsonAndModelIds
|
||||||
|
from data.ModelSample import ModelSamples, generateModelSample
|
||||||
|
from data.ModelSlot import RVCModelSlot, loadSlotInfo, saveSlotInfo
|
||||||
|
from voice_changer.RVC.ModelSlotGenerator import _setInfoByONNX, _setInfoByPytorch
|
||||||
|
from utils.downloader.Downloader import download, download_no_tqdm
|
||||||
|
|
||||||
|
|
||||||
|
def downloadInitialSamples(mode: RVCSampleMode, model_dir: str):
|
||||||
|
sampleJsonUrls, sampleModels = getSampleJsonAndModelIds(mode)
|
||||||
|
sampleJsons = _downloadSampleJsons(sampleJsonUrls)
|
||||||
|
if os.path.exists(model_dir):
|
||||||
|
print("[Voice Changer] model_dir is already exists. skil download samples.")
|
||||||
|
return
|
||||||
|
samples = _generateSampleList(sampleJsons)
|
||||||
|
slotIndex = list(range(len(sampleModels)))
|
||||||
|
_downloadSamples(samples, sampleModels, model_dir, slotIndex)
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def downloadSample(mode: RVCSampleMode, modelId: str, model_dir: str, slotIndex: int, params: Any):
|
||||||
|
sampleJsonUrls, _sampleModels = getSampleJsonAndModelIds(mode)
|
||||||
|
sampleJsons = _generateSampleJsons(sampleJsonUrls)
|
||||||
|
samples = _generateSampleList(sampleJsons)
|
||||||
|
_downloadSamples(samples, [(modelId, params)], model_dir, [slotIndex])
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def getSampleInfos(mode: RVCSampleMode):
|
||||||
|
sampleJsonUrls, _sampleModels = getSampleJsonAndModelIds(mode)
|
||||||
|
sampleJsons = _generateSampleJsons(sampleJsonUrls)
|
||||||
|
samples = _generateSampleList(sampleJsons)
|
||||||
|
return samples
|
||||||
|
|
||||||
|
|
||||||
|
def _downloadSampleJsons(sampleJsonUrls: list[str]):
|
||||||
|
sampleJsons = []
|
||||||
|
for url in sampleJsonUrls:
|
||||||
|
filename = os.path.basename(url)
|
||||||
|
download_no_tqdm({"url": url, "saveTo": filename, "position": 0})
|
||||||
|
sampleJsons.append(filename)
|
||||||
|
return sampleJsons
|
||||||
|
|
||||||
|
|
||||||
|
def _generateSampleJsons(sampleJsonUrls: list[str]):
|
||||||
|
sampleJsons = []
|
||||||
|
for url in sampleJsonUrls:
|
||||||
|
filename = os.path.basename(url)
|
||||||
|
sampleJsons.append(filename)
|
||||||
|
return sampleJsons
|
||||||
|
|
||||||
|
|
||||||
|
def _generateSampleList(sampleJsons: list[str]):
|
||||||
|
samples: list[ModelSamples] = []
|
||||||
|
for file in sampleJsons:
|
||||||
|
with open(file, "r", encoding="utf-8") as f:
|
||||||
|
jsonDict = json.load(f)
|
||||||
|
for vcType in jsonDict:
|
||||||
|
for sampleParams in jsonDict[vcType]:
|
||||||
|
sample = generateModelSample(sampleParams)
|
||||||
|
samples.append(sample)
|
||||||
|
return samples
|
||||||
|
|
||||||
|
|
||||||
|
def _downloadSamples(samples: list[ModelSamples], sampleModelIds: list[Tuple[str, Any]], model_dir: str, slotIndex: list[int]):
|
||||||
|
downloadParams = []
|
||||||
|
line_num = 0
|
||||||
|
|
||||||
|
for i, initSampleId in enumerate(sampleModelIds):
|
||||||
|
targetSampleId = initSampleId[0]
|
||||||
|
targetSampleParams = initSampleId[1]
|
||||||
|
tagetSlotIndex = slotIndex[i]
|
||||||
|
|
||||||
|
# 初期サンプルをサーチ
|
||||||
|
match = False
|
||||||
|
for sample in samples:
|
||||||
|
print("sample", sample)
|
||||||
|
if sample.id == targetSampleId:
|
||||||
|
match = True
|
||||||
|
break
|
||||||
|
if match is False:
|
||||||
|
print(f"[Voice Changer] initiail sample not found. {targetSampleId}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 検出されたら、、、
|
||||||
|
slotDir = os.path.join(model_dir, str(tagetSlotIndex))
|
||||||
|
if sample.voiceChangerType == "RVC":
|
||||||
|
slotInfo: RVCModelSlot = RVCModelSlot()
|
||||||
|
|
||||||
|
os.makedirs(slotDir, exist_ok=True)
|
||||||
|
modelFilePath = os.path.join(
|
||||||
|
slotDir,
|
||||||
|
os.path.basename(sample.modelUrl),
|
||||||
|
)
|
||||||
|
downloadParams.append(
|
||||||
|
{
|
||||||
|
"url": sample.modelUrl,
|
||||||
|
"saveTo": modelFilePath,
|
||||||
|
"position": line_num,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
slotInfo.modelFile = modelFilePath
|
||||||
|
line_num += 1
|
||||||
|
|
||||||
|
if targetSampleParams["useIndex"] is True and hasattr(sample, "indexUrl") and sample.indexUrl != "":
|
||||||
|
indexPath = os.path.join(
|
||||||
|
slotDir,
|
||||||
|
os.path.basename(sample.indexUrl),
|
||||||
|
)
|
||||||
|
downloadParams.append(
|
||||||
|
{
|
||||||
|
"url": sample.indexUrl,
|
||||||
|
"saveTo": indexPath,
|
||||||
|
"position": line_num,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
slotInfo.indexFile = indexPath
|
||||||
|
line_num += 1
|
||||||
|
|
||||||
|
if hasattr(sample, "icon") and sample.icon != "":
|
||||||
|
iconPath = os.path.join(
|
||||||
|
slotDir,
|
||||||
|
os.path.basename(sample.icon),
|
||||||
|
)
|
||||||
|
downloadParams.append(
|
||||||
|
{
|
||||||
|
"url": sample.icon,
|
||||||
|
"saveTo": iconPath,
|
||||||
|
"position": line_num,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
slotInfo.iconFile = iconPath
|
||||||
|
line_num += 1
|
||||||
|
|
||||||
|
slotInfo.sampleId = sample.id
|
||||||
|
slotInfo.credit = sample.credit
|
||||||
|
slotInfo.description = sample.description
|
||||||
|
slotInfo.name = sample.name
|
||||||
|
slotInfo.termsOfUseUrl = sample.termsOfUseUrl
|
||||||
|
slotInfo.defaultTune = 0
|
||||||
|
slotInfo.defaultIndexRatio = 1
|
||||||
|
slotInfo.defaultProtect = 0.5
|
||||||
|
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
|
||||||
|
saveSlotInfo(model_dir, tagetSlotIndex, slotInfo)
|
||||||
|
|
||||||
|
# ダウンロード
|
||||||
|
print("[Voice Changer] Downloading model files...")
|
||||||
|
with ThreadPoolExecutor() as pool:
|
||||||
|
pool.map(download, downloadParams)
|
||||||
|
|
||||||
|
# メタデータ作成
|
||||||
|
print("[Voice Changer] Generating metadata...")
|
||||||
|
for targetSlotIndex in slotIndex:
|
||||||
|
slotInfo = loadSlotInfo(model_dir, targetSlotIndex)
|
||||||
|
if slotInfo.voiceChangerType == "RVC":
|
||||||
|
if slotInfo.isONNX:
|
||||||
|
_setInfoByONNX(slotInfo)
|
||||||
|
else:
|
||||||
|
_setInfoByPytorch(slotInfo)
|
||||||
|
saveSlotInfo(model_dir, targetSlotIndex, slotInfo)
|
64
server/utils/downloader/WeightDownloader.py
Normal file
64
server/utils/downloader/WeightDownloader.py
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
import os
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
|
||||||
|
from utils.downloader.Downloader import download
|
||||||
|
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
||||||
|
from Exceptions import WeightDownladException
|
||||||
|
|
||||||
|
|
||||||
|
def downloadWeight(voiceChangerParams: VoiceChangerParams):
|
||||||
|
hubert_base = voiceChangerParams.hubert_base
|
||||||
|
hubert_base_jp = voiceChangerParams.hubert_base_jp
|
||||||
|
hubert_soft = voiceChangerParams.hubert_soft
|
||||||
|
nsf_hifigan = voiceChangerParams.nsf_hifigan
|
||||||
|
|
||||||
|
# file exists check (currently only for rvc)
|
||||||
|
downloadParams = []
|
||||||
|
if os.path.exists(hubert_base) is False:
|
||||||
|
downloadParams.append(
|
||||||
|
{
|
||||||
|
"url": "https://huggingface.co/ddPn08/rvc-webui-models/resolve/main/embeddings/hubert_base.pt",
|
||||||
|
"saveTo": hubert_base,
|
||||||
|
"position": 0,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
if os.path.exists(hubert_base_jp) is False:
|
||||||
|
downloadParams.append(
|
||||||
|
{
|
||||||
|
"url": "https://huggingface.co/rinna/japanese-hubert-base/resolve/main/fairseq/model.pt",
|
||||||
|
"saveTo": hubert_base_jp,
|
||||||
|
"position": 1,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
if os.path.exists(hubert_soft) is False:
|
||||||
|
downloadParams.append(
|
||||||
|
{
|
||||||
|
"url": "https://huggingface.co/wok000/weights/resolve/main/ddsp-svc30/embedder/hubert-soft-0d54a1f4.pt",
|
||||||
|
"saveTo": hubert_soft,
|
||||||
|
"position": 2,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
if os.path.exists(nsf_hifigan) is False:
|
||||||
|
downloadParams.append(
|
||||||
|
{
|
||||||
|
"url": "https://huggingface.co/wok000/weights/resolve/main/ddsp-svc30/nsf_hifigan_20221211/model.bin",
|
||||||
|
"saveTo": nsf_hifigan,
|
||||||
|
"position": 3,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
nsf_hifigan_config = os.path.join(os.path.dirname(nsf_hifigan), "config.json")
|
||||||
|
|
||||||
|
if os.path.exists(nsf_hifigan_config) is False:
|
||||||
|
downloadParams.append(
|
||||||
|
{
|
||||||
|
"url": "https://huggingface.co/wok000/weights/raw/main/ddsp-svc30/nsf_hifigan_20221211/config.json",
|
||||||
|
"saveTo": nsf_hifigan_config,
|
||||||
|
"position": 4,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
with ThreadPoolExecutor() as pool:
|
||||||
|
pool.map(download, downloadParams)
|
||||||
|
|
||||||
|
if os.path.exists(hubert_base) is False or os.path.exists(hubert_base_jp) is False or os.path.exists(hubert_soft) is False or os.path.exists(nsf_hifigan) is False or os.path.exists(nsf_hifigan_config) is False:
|
||||||
|
raise WeightDownladException()
|
134
server/voice_changer/Local/ServerDevice.py
Normal file
134
server/voice_changer/Local/ServerDevice.py
Normal file
@ -0,0 +1,134 @@
|
|||||||
|
import time
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import librosa
|
||||||
|
import sounddevice as sd
|
||||||
|
|
||||||
|
from voice_changer.Local.AudioDeviceList import ServerAudioDevice
|
||||||
|
from voice_changer.VoiceChanger import VoiceChanger
|
||||||
|
from voice_changer.utils.Timer import Timer
|
||||||
|
|
||||||
|
|
||||||
|
class ServerDevice:
|
||||||
|
def __init__(self):
|
||||||
|
self.voiceChanger: VoiceChanger | None = None
|
||||||
|
pass
|
||||||
|
|
||||||
|
def audio_callback(self, indata: np.ndarray, outdata: np.ndarray, frames, times, status):
|
||||||
|
if self.voiceChanger is None:
|
||||||
|
print("[Voice Changer] voiceChanger is None")
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
indata = indata * self.voiceChanger.settings.serverInputAudioGain
|
||||||
|
with Timer("all_inference_time") as t:
|
||||||
|
unpackedData = librosa.to_mono(indata.T) * 32768.0
|
||||||
|
out_wav, times = self.voiceChanger.on_request(unpackedData)
|
||||||
|
outputChunnels = outdata.shape[1]
|
||||||
|
outdata[:] = np.repeat(out_wav, outputChunnels).reshape(-1, outputChunnels) / 32768.0
|
||||||
|
outdata[:] = outdata * self.voiceChanger.settings.serverOutputAudioGain
|
||||||
|
all_inference_time = t.secs
|
||||||
|
performance = [all_inference_time] + times
|
||||||
|
if self.voiceChanger.emitTo is not None:
|
||||||
|
self.voiceChanger.emitTo(performance)
|
||||||
|
self.voiceChanger.settings.performance = [round(x * 1000) for x in performance]
|
||||||
|
except Exception as e:
|
||||||
|
print("[Voice Changer] ex:", e)
|
||||||
|
|
||||||
|
def getServerAudioDevice(self, audioDeviceList: list[ServerAudioDevice], index: int):
|
||||||
|
serverAudioDevice = [x for x in audioDeviceList if x.index == index]
|
||||||
|
if len(serverAudioDevice) > 0:
|
||||||
|
return serverAudioDevice[0]
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def serverLocal(self, _vc: VoiceChanger):
|
||||||
|
self.voiceChanger = _vc
|
||||||
|
vc = self.voiceChanger
|
||||||
|
|
||||||
|
currentInputDeviceId = -1
|
||||||
|
currentModelSamplingRate = -1
|
||||||
|
currentOutputDeviceId = -1
|
||||||
|
currentInputChunkNum = -1
|
||||||
|
while True:
|
||||||
|
if vc.settings.serverAudioStated == 0 or vc.settings.serverInputDeviceId == -1 or vc is None:
|
||||||
|
vc.settings.inputSampleRate = 48000
|
||||||
|
time.sleep(2)
|
||||||
|
else:
|
||||||
|
sd._terminate()
|
||||||
|
sd._initialize()
|
||||||
|
|
||||||
|
sd.default.device[0] = vc.settings.serverInputDeviceId
|
||||||
|
currentInputDeviceId = vc.settings.serverInputDeviceId
|
||||||
|
sd.default.device[1] = vc.settings.serverOutputDeviceId
|
||||||
|
currentOutputDeviceId = vc.settings.serverOutputDeviceId
|
||||||
|
|
||||||
|
currentInputChannelNum = vc.settings.serverAudioInputDevices
|
||||||
|
|
||||||
|
serverInputAudioDevice = self.getServerAudioDevice(vc.settings.serverAudioInputDevices, currentInputDeviceId)
|
||||||
|
serverOutputAudioDevice = self.getServerAudioDevice(vc.settings.serverAudioOutputDevices, currentOutputDeviceId)
|
||||||
|
print(serverInputAudioDevice, serverOutputAudioDevice)
|
||||||
|
if serverInputAudioDevice is None or serverOutputAudioDevice is None:
|
||||||
|
time.sleep(2)
|
||||||
|
print("serverInputAudioDevice or serverOutputAudioDevice is None")
|
||||||
|
continue
|
||||||
|
|
||||||
|
currentInputChannelNum = serverInputAudioDevice.maxInputChannels
|
||||||
|
currentOutputChannelNum = serverOutputAudioDevice.maxOutputChannels
|
||||||
|
|
||||||
|
currentInputChunkNum = vc.settings.serverReadChunkSize
|
||||||
|
block_frame = currentInputChunkNum * 128
|
||||||
|
|
||||||
|
# sample rate precheck(alsa cannot use 40000?)
|
||||||
|
try:
|
||||||
|
currentModelSamplingRate = self.voiceChanger.voiceChangerModel.get_processing_sampling_rate()
|
||||||
|
except Exception as e:
|
||||||
|
print("[Voice Changer] ex: get_processing_sampling_rate", e)
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
with sd.Stream(
|
||||||
|
callback=self.audio_callback,
|
||||||
|
blocksize=block_frame,
|
||||||
|
samplerate=currentModelSamplingRate,
|
||||||
|
dtype="float32",
|
||||||
|
channels=[currentInputChannelNum, currentOutputChannelNum],
|
||||||
|
):
|
||||||
|
pass
|
||||||
|
vc.settings.serverInputAudioSampleRate = currentModelSamplingRate
|
||||||
|
vc.settings.inputSampleRate = currentModelSamplingRate
|
||||||
|
print(f"[Voice Changer] sample rate {vc.settings.serverInputAudioSampleRate}")
|
||||||
|
except Exception as e:
|
||||||
|
print(
|
||||||
|
"[Voice Changer] ex: fallback to device default samplerate",
|
||||||
|
e,
|
||||||
|
)
|
||||||
|
vc.settings.serverInputAudioSampleRate = serverInputAudioDevice.default_samplerate
|
||||||
|
vc.settings.inputSampleRate = vc.settings.serverInputAudioSampleRate
|
||||||
|
|
||||||
|
# main loop
|
||||||
|
try:
|
||||||
|
with sd.Stream(
|
||||||
|
callback=self.audio_callback,
|
||||||
|
blocksize=block_frame,
|
||||||
|
samplerate=vc.settings.serverInputAudioSampleRate,
|
||||||
|
dtype="float32",
|
||||||
|
channels=[currentInputChannelNum, currentOutputChannelNum],
|
||||||
|
):
|
||||||
|
while vc.settings.serverAudioStated == 1 and currentInputDeviceId == vc.settings.serverInputDeviceId and currentOutputDeviceId == vc.settings.serverOutputDeviceId and currentModelSamplingRate == self.voiceChanger.voiceChangerModel.get_processing_sampling_rate() and currentInputChunkNum == vc.settings.serverReadChunkSize:
|
||||||
|
time.sleep(2)
|
||||||
|
print(
|
||||||
|
"[Voice Changer] server audio",
|
||||||
|
vc.settings.performance,
|
||||||
|
)
|
||||||
|
print(
|
||||||
|
"[Voice Changer] info:",
|
||||||
|
vc.settings.serverAudioStated,
|
||||||
|
currentInputDeviceId,
|
||||||
|
currentOutputDeviceId,
|
||||||
|
vc.settings.serverInputAudioSampleRate,
|
||||||
|
currentInputChunkNum,
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print("[Voice Changer] ex:", e)
|
||||||
|
time.sleep(2)
|
@ -3,26 +3,26 @@ from const import EnumInferenceTypes, EnumEmbedderTypes
|
|||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
# @dataclass
|
||||||
class ModelSlot:
|
# class ModelSlot:
|
||||||
modelFile: str = ""
|
# modelFile: str = ""
|
||||||
indexFile: str = ""
|
# indexFile: str = ""
|
||||||
defaultTune: int = 0
|
# defaultTune: int = 0
|
||||||
defaultIndexRatio: int = 1
|
# defaultIndexRatio: int = 1
|
||||||
defaultProtect: float = 0.5
|
# defaultProtect: float = 0.5
|
||||||
isONNX: bool = False
|
# isONNX: bool = False
|
||||||
modelType: str = EnumInferenceTypes.pyTorchRVC.value
|
# modelType: str = EnumInferenceTypes.pyTorchRVC.value
|
||||||
samplingRate: int = -1
|
# samplingRate: int = -1
|
||||||
f0: bool = True
|
# f0: bool = True
|
||||||
embChannels: int = 256
|
# embChannels: int = 256
|
||||||
embOutputLayer: int = 9
|
# embOutputLayer: int = 9
|
||||||
useFinalProj: bool = True
|
# useFinalProj: bool = True
|
||||||
deprecated: bool = False
|
# deprecated: bool = False
|
||||||
embedder: str = EnumEmbedderTypes.hubert.value
|
# embedder: str = EnumEmbedderTypes.hubert.value
|
||||||
|
|
||||||
name: str = ""
|
# name: str = ""
|
||||||
description: str = ""
|
# description: str = ""
|
||||||
credit: str = ""
|
# credit: str = ""
|
||||||
termsOfUseUrl: str = ""
|
# termsOfUseUrl: str = ""
|
||||||
sampleId: str = ""
|
# sampleId: str = ""
|
||||||
iconFile: str = ""
|
# iconFile: str = ""
|
||||||
|
@ -1,9 +1,45 @@
|
|||||||
from const import EnumEmbedderTypes, EnumInferenceTypes
|
from const import UPLOAD_DIR, EnumEmbedderTypes, EnumInferenceTypes
|
||||||
from voice_changer.RVC.ModelSlot import ModelSlot
|
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
import onnxruntime
|
import onnxruntime
|
||||||
import json
|
import json
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
from data.ModelSlot import ModelSlot, RVCModelSlot, saveSlotInfo
|
||||||
|
|
||||||
|
|
||||||
|
def setSlotAsRVC(model_dir: str, slot: int, paramDict):
|
||||||
|
slotInfo: RVCModelSlot = RVCModelSlot()
|
||||||
|
slotDir = os.path.join(model_dir, str(slot))
|
||||||
|
os.makedirs(slotDir, exist_ok=True)
|
||||||
|
|
||||||
|
print("RVC SLot Load", slot, paramDict)
|
||||||
|
for f in paramDict["files"]:
|
||||||
|
srcPath = os.path.join(UPLOAD_DIR, f["name"])
|
||||||
|
dstPath = os.path.join(slotDir, f["name"])
|
||||||
|
if f["kind"] == "rvcModel":
|
||||||
|
slotInfo.modelFile = dstPath
|
||||||
|
slotInfo.name = os.path.splitext(f["name"])[0]
|
||||||
|
elif f["kind"] == "rvcIndex":
|
||||||
|
slotInfo.indexFile = dstPath
|
||||||
|
else:
|
||||||
|
print(f"[Voice Changer] unknown file kind {f['kind']}")
|
||||||
|
|
||||||
|
shutil.move(srcPath, dstPath)
|
||||||
|
|
||||||
|
slotInfo.defaultTune = 0
|
||||||
|
slotInfo.defaultIndexRatio = 1
|
||||||
|
slotInfo.defaultProtect = 0.5
|
||||||
|
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
|
||||||
|
|
||||||
|
if slotInfo.isONNX:
|
||||||
|
_setInfoByONNX(slotInfo)
|
||||||
|
else:
|
||||||
|
_setInfoByPytorch(slotInfo)
|
||||||
|
|
||||||
|
saveSlotInfo(model_dir, slot, slotInfo)
|
||||||
|
|
||||||
|
print("[Voice Changer] new model added:", slotInfo)
|
||||||
|
|
||||||
|
|
||||||
def _setInfoByPytorch(slot: ModelSlot):
|
def _setInfoByPytorch(slot: ModelSlot):
|
||||||
@ -15,22 +51,14 @@ def _setInfoByPytorch(slot: ModelSlot):
|
|||||||
slot.f0 = True if cpt["f0"] == 1 else False
|
slot.f0 = True if cpt["f0"] == 1 else False
|
||||||
version = cpt.get("version", "v1")
|
version = cpt.get("version", "v1")
|
||||||
if version is None or version == "v1":
|
if version is None or version == "v1":
|
||||||
slot.modelType = (
|
slot.modelType = EnumInferenceTypes.pyTorchRVC.value if slot.f0 else EnumInferenceTypes.pyTorchRVCNono.value
|
||||||
EnumInferenceTypes.pyTorchRVC.value
|
|
||||||
if slot.f0
|
|
||||||
else EnumInferenceTypes.pyTorchRVCNono.value
|
|
||||||
)
|
|
||||||
slot.embChannels = 256
|
slot.embChannels = 256
|
||||||
slot.embOutputLayer = 9
|
slot.embOutputLayer = 9
|
||||||
slot.useFinalProj = True
|
slot.useFinalProj = True
|
||||||
slot.embedder = EnumEmbedderTypes.hubert.value
|
slot.embedder = EnumEmbedderTypes.hubert.value
|
||||||
print("[Voice Changer] Official Model(pyTorch) : v1")
|
print("[Voice Changer] Official Model(pyTorch) : v1")
|
||||||
else:
|
else:
|
||||||
slot.modelType = (
|
slot.modelType = EnumInferenceTypes.pyTorchRVCv2.value if slot.f0 else EnumInferenceTypes.pyTorchRVCv2Nono.value
|
||||||
EnumInferenceTypes.pyTorchRVCv2.value
|
|
||||||
if slot.f0
|
|
||||||
else EnumInferenceTypes.pyTorchRVCv2Nono.value
|
|
||||||
)
|
|
||||||
slot.embChannels = 768
|
slot.embChannels = 768
|
||||||
slot.embOutputLayer = 12
|
slot.embOutputLayer = 12
|
||||||
slot.useFinalProj = False
|
slot.useFinalProj = False
|
||||||
@ -40,37 +68,21 @@ def _setInfoByPytorch(slot: ModelSlot):
|
|||||||
else:
|
else:
|
||||||
# DDPN RVC
|
# DDPN RVC
|
||||||
slot.f0 = True if cpt["f0"] == 1 else False
|
slot.f0 = True if cpt["f0"] == 1 else False
|
||||||
slot.modelType = (
|
slot.modelType = EnumInferenceTypes.pyTorchWebUI.value if slot.f0 else EnumInferenceTypes.pyTorchWebUINono.value
|
||||||
EnumInferenceTypes.pyTorchWebUI.value
|
|
||||||
if slot.f0
|
|
||||||
else EnumInferenceTypes.pyTorchWebUINono.value
|
|
||||||
)
|
|
||||||
slot.embChannels = cpt["config"][17]
|
slot.embChannels = cpt["config"][17]
|
||||||
slot.embOutputLayer = (
|
slot.embOutputLayer = cpt["embedder_output_layer"] if "embedder_output_layer" in cpt else 9
|
||||||
cpt["embedder_output_layer"] if "embedder_output_layer" in cpt else 9
|
|
||||||
)
|
|
||||||
if slot.embChannels == 256:
|
if slot.embChannels == 256:
|
||||||
slot.useFinalProj = True
|
slot.useFinalProj = True
|
||||||
else:
|
else:
|
||||||
slot.useFinalProj = False
|
slot.useFinalProj = False
|
||||||
|
|
||||||
# DDPNモデルの情報を表示
|
# DDPNモデルの情報を表示
|
||||||
if (
|
if slot.embChannels == 256 and slot.embOutputLayer == 9 and slot.useFinalProj is True:
|
||||||
slot.embChannels == 256
|
|
||||||
and slot.embOutputLayer == 9
|
|
||||||
and slot.useFinalProj is True
|
|
||||||
):
|
|
||||||
print("[Voice Changer] DDPN Model(pyTorch) : Official v1 like")
|
print("[Voice Changer] DDPN Model(pyTorch) : Official v1 like")
|
||||||
elif (
|
elif slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False:
|
||||||
slot.embChannels == 768
|
|
||||||
and slot.embOutputLayer == 12
|
|
||||||
and slot.useFinalProj is False
|
|
||||||
):
|
|
||||||
print("[Voice Changer] DDPN Model(pyTorch): Official v2 like")
|
print("[Voice Changer] DDPN Model(pyTorch): Official v2 like")
|
||||||
else:
|
else:
|
||||||
print(
|
print(f"[Voice Changer] DDPN Model(pyTorch): ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}")
|
||||||
f"[Voice Changer] DDPN Model(pyTorch): ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}"
|
|
||||||
)
|
|
||||||
|
|
||||||
slot.embedder = cpt["embedder_name"]
|
slot.embedder = cpt["embedder_name"]
|
||||||
if slot.embedder.endswith("768"):
|
if slot.embedder.endswith("768"):
|
||||||
@ -91,9 +103,8 @@ def _setInfoByPytorch(slot: ModelSlot):
|
|||||||
|
|
||||||
|
|
||||||
def _setInfoByONNX(slot: ModelSlot):
|
def _setInfoByONNX(slot: ModelSlot):
|
||||||
tmp_onnx_session = onnxruntime.InferenceSession(
|
print("......................................_setInfoByONNX")
|
||||||
slot.modelFile, providers=["CPUExecutionProvider"]
|
tmp_onnx_session = onnxruntime.InferenceSession(slot.modelFile, providers=["CPUExecutionProvider"])
|
||||||
)
|
|
||||||
modelmeta = tmp_onnx_session.get_modelmeta()
|
modelmeta = tmp_onnx_session.get_modelmeta()
|
||||||
try:
|
try:
|
||||||
metadata = json.loads(modelmeta.custom_metadata_map["metadata"])
|
metadata = json.loads(modelmeta.custom_metadata_map["metadata"])
|
||||||
@ -101,16 +112,8 @@ def _setInfoByONNX(slot: ModelSlot):
|
|||||||
# slot.modelType = metadata["modelType"]
|
# slot.modelType = metadata["modelType"]
|
||||||
slot.embChannels = metadata["embChannels"]
|
slot.embChannels = metadata["embChannels"]
|
||||||
|
|
||||||
slot.embOutputLayer = (
|
slot.embOutputLayer = metadata["embOutputLayer"] if "embOutputLayer" in metadata else 9
|
||||||
metadata["embOutputLayer"] if "embOutputLayer" in metadata else 9
|
slot.useFinalProj = metadata["useFinalProj"] if "useFinalProj" in metadata else True if slot.embChannels == 256 else False
|
||||||
)
|
|
||||||
slot.useFinalProj = (
|
|
||||||
metadata["useFinalProj"]
|
|
||||||
if "useFinalProj" in metadata
|
|
||||||
else True
|
|
||||||
if slot.embChannels == 256
|
|
||||||
else False
|
|
||||||
)
|
|
||||||
|
|
||||||
if slot.embChannels == 256:
|
if slot.embChannels == 256:
|
||||||
slot.useFinalProj = True
|
slot.useFinalProj = True
|
||||||
@ -118,22 +121,12 @@ def _setInfoByONNX(slot: ModelSlot):
|
|||||||
slot.useFinalProj = False
|
slot.useFinalProj = False
|
||||||
|
|
||||||
# ONNXモデルの情報を表示
|
# ONNXモデルの情報を表示
|
||||||
if (
|
if slot.embChannels == 256 and slot.embOutputLayer == 9 and slot.useFinalProj is True:
|
||||||
slot.embChannels == 256
|
|
||||||
and slot.embOutputLayer == 9
|
|
||||||
and slot.useFinalProj is True
|
|
||||||
):
|
|
||||||
print("[Voice Changer] ONNX Model: Official v1 like")
|
print("[Voice Changer] ONNX Model: Official v1 like")
|
||||||
elif (
|
elif slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False:
|
||||||
slot.embChannels == 768
|
|
||||||
and slot.embOutputLayer == 12
|
|
||||||
and slot.useFinalProj is False
|
|
||||||
):
|
|
||||||
print("[Voice Changer] ONNX Model: Official v2 like")
|
print("[Voice Changer] ONNX Model: Official v2 like")
|
||||||
else:
|
else:
|
||||||
print(
|
print(f"[Voice Changer] ONNX Model: ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}")
|
||||||
f"[Voice Changer] ONNX Model: ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}"
|
|
||||||
)
|
|
||||||
|
|
||||||
if "embedder" not in metadata:
|
if "embedder" not in metadata:
|
||||||
slot.embedder = EnumEmbedderTypes.hubert.value
|
slot.embedder = EnumEmbedderTypes.hubert.value
|
||||||
@ -149,11 +142,9 @@ def _setInfoByONNX(slot: ModelSlot):
|
|||||||
# raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder")
|
# raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder")
|
||||||
|
|
||||||
slot.f0 = metadata["f0"]
|
slot.f0 = metadata["f0"]
|
||||||
slot.modelType = (
|
print("slot.modelType1", slot.modelType)
|
||||||
EnumInferenceTypes.onnxRVC.value
|
slot.modelType = EnumInferenceTypes.onnxRVC.value if slot.f0 else EnumInferenceTypes.onnxRVCNono.value
|
||||||
if slot.f0
|
print("slot.modelType2", slot.modelType)
|
||||||
else EnumInferenceTypes.onnxRVCNono.value
|
|
||||||
)
|
|
||||||
slot.samplingRate = metadata["samplingRate"]
|
slot.samplingRate = metadata["samplingRate"]
|
||||||
slot.deprecated = False
|
slot.deprecated = False
|
||||||
|
|
||||||
|
@ -1,14 +1,12 @@
|
|||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
from dataclasses import asdict
|
from dataclasses import dataclass, asdict
|
||||||
from typing import cast
|
from typing import cast
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
import torchaudio
|
import torchaudio
|
||||||
from ModelSample import getModelSamples
|
from data.ModelSlot import loadSlotInfo
|
||||||
from voice_changer.RVC.ModelSlot import ModelSlot
|
from voice_changer.RVC.RVCSlotInfo import RVCSlotInfo
|
||||||
from voice_changer.RVC.SampleDownloader import downloadModelFiles
|
|
||||||
|
|
||||||
|
|
||||||
# avoiding parse arg error in RVC
|
# avoiding parse arg error in RVC
|
||||||
sys.argv = ["MMVCServerSIO.py"]
|
sys.argv = ["MMVCServerSIO.py"]
|
||||||
@ -31,7 +29,6 @@ from voice_changer.RVC.ModelSlotGenerator import (
|
|||||||
)
|
)
|
||||||
from voice_changer.RVC.RVCSettings import RVCSettings
|
from voice_changer.RVC.RVCSettings import RVCSettings
|
||||||
from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager
|
from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager
|
||||||
from voice_changer.utils.LoadModelParams import LoadModelParams
|
|
||||||
from voice_changer.utils.VoiceChangerModel import AudioInOut
|
from voice_changer.utils.VoiceChangerModel import AudioInOut
|
||||||
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
||||||
from voice_changer.RVC.onnxExporter.export2onnx import export2onnx
|
from voice_changer.RVC.onnxExporter.export2onnx import export2onnx
|
||||||
@ -40,147 +37,104 @@ from voice_changer.RVC.pipeline.PipelineGenerator import createPipeline
|
|||||||
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
|
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
|
||||||
from voice_changer.RVC.pipeline.Pipeline import Pipeline
|
from voice_changer.RVC.pipeline.Pipeline import Pipeline
|
||||||
|
|
||||||
from Exceptions import DeviceCannotSupportHalfPrecisionException, NoModeLoadedException
|
from Exceptions import DeviceCannotSupportHalfPrecisionException
|
||||||
from const import (
|
from const import (
|
||||||
RVC_MODEL_DIRNAME,
|
|
||||||
UPLOAD_DIR,
|
UPLOAD_DIR,
|
||||||
getRVCSampleJsonAndModelIds,
|
|
||||||
)
|
)
|
||||||
import shutil
|
import shutil
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
|
||||||
class RVC:
|
class RVC:
|
||||||
initialLoad: bool = True
|
# initialLoad: bool = True
|
||||||
settings: RVCSettings = RVCSettings()
|
# currentSlot: int = 0
|
||||||
|
# needSwitch: bool = False
|
||||||
|
|
||||||
pipeline: Pipeline | None = None
|
def __init__(self, slotIndex: int, params: VoiceChangerParams):
|
||||||
|
self.params: VoiceChangerParams = params
|
||||||
deviceManager = DeviceManager.get_instance()
|
self.settings: RVCSettings = RVCSettings()
|
||||||
|
self.deviceManager = DeviceManager.get_instance()
|
||||||
audio_buffer: AudioInOut | None = None
|
|
||||||
prevVol: float = 0
|
|
||||||
params: VoiceChangerParams
|
|
||||||
currentSlot: int = 0
|
|
||||||
needSwitch: bool = False
|
|
||||||
|
|
||||||
def __init__(self, params: VoiceChangerParams):
|
|
||||||
self.pitchExtractor = PitchExtractorManager.getPitchExtractor(
|
|
||||||
self.settings.f0Detector
|
|
||||||
)
|
|
||||||
self.params = params
|
|
||||||
EmbedderManager.initialize(params)
|
EmbedderManager.initialize(params)
|
||||||
self.loadSlots()
|
|
||||||
print("[Voice Changer] RVC initialization: ", params)
|
|
||||||
|
|
||||||
# サンプルカタログ作成
|
self.pipeline: Pipeline | None = None
|
||||||
sampleJsons: list[str] = []
|
self.pitchExtractor = PitchExtractorManager.getPitchExtractor(self.settings.f0Detector)
|
||||||
sampleJsonUrls, _sampleModels = getRVCSampleJsonAndModelIds(
|
|
||||||
params.rvc_sample_mode
|
|
||||||
)
|
|
||||||
for url in sampleJsonUrls:
|
|
||||||
filename = os.path.basename(url)
|
|
||||||
sampleJsons.append(filename)
|
|
||||||
sampleModels = getModelSamples(sampleJsons, "RVC")
|
|
||||||
if sampleModels is not None:
|
|
||||||
self.settings.sampleModels = sampleModels
|
|
||||||
|
|
||||||
# 起動時にスロットにモデルがある場合はロードしておく
|
self.audio_buffer: AudioInOut | None = None
|
||||||
if len(self.settings.modelSlots) > 0:
|
|
||||||
for i, slot in enumerate(self.settings.modelSlots):
|
print("[Voice Changer] RVC Slot initialization. global params:", params)
|
||||||
if len(slot.modelFile) > 0:
|
|
||||||
self.prepareModel(i)
|
self.slotIndex = slotIndex
|
||||||
self.settings.modelSlotIndex = i
|
self.slotInfo: RVCSlotInfo = loadSlotInfo(params.model_dir, self.slotIndex)
|
||||||
self.switchModel(self.settings.modelSlotIndex)
|
|
||||||
self.initialLoad = False
|
|
||||||
break
|
|
||||||
self.prevVol = 0.0
|
self.prevVol = 0.0
|
||||||
|
print("[Voice Changer] RVC Slot initialization. slot info:", self.slotInfo)
|
||||||
|
|
||||||
def getSampleInfo(self, id: str):
|
# def loadModel(self, props: LoadModelParams):
|
||||||
sampleInfos = list(filter(lambda x: x.id == id, self.settings.sampleModels))
|
# target_slot_idx = props.slot
|
||||||
if len(sampleInfos) > 0:
|
# params = props.params
|
||||||
return sampleInfos[0]
|
# slotInfo: ModelSlot = ModelSlot()
|
||||||
else:
|
|
||||||
None
|
|
||||||
|
|
||||||
def moveToModelDir(self, file: str, dstDir: str):
|
# print("loadModel", params)
|
||||||
dst = os.path.join(dstDir, os.path.basename(file))
|
# # サンプルが指定されたときはダウンロードしてメタデータをでっちあげる
|
||||||
if os.path.exists(dst):
|
# if len(params["sampleId"]) > 0:
|
||||||
os.remove(dst)
|
# sampleId = params["sampleId"]
|
||||||
shutil.move(file, dst)
|
# sampleInfo = self.getSampleInfo(sampleId)
|
||||||
return dst
|
# useIndex = params["rvcIndexDownload"]
|
||||||
|
|
||||||
def loadModel(self, props: LoadModelParams):
|
# if sampleInfo is None:
|
||||||
target_slot_idx = props.slot
|
# print("[Voice Changer] sampleInfo is None")
|
||||||
params = props.params
|
# return
|
||||||
slotInfo: ModelSlot = ModelSlot()
|
# modelPath, indexPath, iconPath = downloadModelFiles(sampleInfo, useIndex)
|
||||||
|
# slotInfo.modelFile = modelPath
|
||||||
|
# if indexPath is not None:
|
||||||
|
# slotInfo.indexFile = indexPath
|
||||||
|
# if iconPath is not None:
|
||||||
|
# slotInfo.iconFile = iconPath
|
||||||
|
|
||||||
print("loadModel", params)
|
# slotInfo.sampleId = sampleInfo.id
|
||||||
# サンプルが指定されたときはダウンロードしてメタデータをでっちあげる
|
# slotInfo.credit = sampleInfo.credit
|
||||||
if len(params["sampleId"]) > 0:
|
# slotInfo.description = sampleInfo.description
|
||||||
sampleId = params["sampleId"]
|
# slotInfo.name = sampleInfo.name
|
||||||
sampleInfo = self.getSampleInfo(sampleId)
|
# slotInfo.termsOfUseUrl = sampleInfo.termsOfUseUrl
|
||||||
useIndex = params["rvcIndexDownload"]
|
|
||||||
|
|
||||||
if sampleInfo is None:
|
# # slotInfo.samplingRate = sampleInfo.sampleRate
|
||||||
print("[Voice Changer] sampleInfo is None")
|
# # slotInfo.modelType = sampleInfo.modelType
|
||||||
return
|
# # slotInfo.f0 = sampleInfo.f0
|
||||||
modelPath, indexPath, iconPath = downloadModelFiles(sampleInfo, useIndex)
|
# else:
|
||||||
slotInfo.modelFile = modelPath
|
# slotInfo.modelFile = params["files"]["rvcModel"]
|
||||||
if indexPath is not None:
|
# slotInfo.indexFile = params["files"]["rvcIndex"] if "rvcIndex" in params["files"] else None
|
||||||
slotInfo.indexFile = indexPath
|
|
||||||
if iconPath is not None:
|
|
||||||
slotInfo.iconFile = iconPath
|
|
||||||
|
|
||||||
slotInfo.sampleId = sampleInfo.id
|
# slotInfo.defaultTune = params["defaultTune"]
|
||||||
slotInfo.credit = sampleInfo.credit
|
# slotInfo.defaultIndexRatio = params["defaultIndexRatio"]
|
||||||
slotInfo.description = sampleInfo.description
|
# slotInfo.defaultProtect = params["defaultProtect"]
|
||||||
slotInfo.name = sampleInfo.name
|
# slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
|
||||||
slotInfo.termsOfUseUrl = sampleInfo.termsOfUseUrl
|
|
||||||
|
|
||||||
# slotInfo.samplingRate = sampleInfo.sampleRate
|
# if slotInfo.isONNX:
|
||||||
# slotInfo.modelType = sampleInfo.modelType
|
# _setInfoByONNX(slotInfo)
|
||||||
# slotInfo.f0 = sampleInfo.f0
|
# else:
|
||||||
else:
|
# _setInfoByPytorch(slotInfo)
|
||||||
slotInfo.modelFile = params["files"]["rvcModel"]
|
|
||||||
slotInfo.indexFile = (
|
|
||||||
params["files"]["rvcIndex"] if "rvcIndex" in params["files"] else None
|
|
||||||
)
|
|
||||||
|
|
||||||
slotInfo.defaultTune = params["defaultTune"]
|
# # メタデータを見て、永続化モデルフォルダに移動させる
|
||||||
slotInfo.defaultIndexRatio = params["defaultIndexRatio"]
|
# # その際に、メタデータのファイル格納場所も書き換える
|
||||||
slotInfo.defaultProtect = params["defaultProtect"]
|
# slotDir = os.path.join(self.params.model_dir, RVC_MODEL_DIRNAME, str(target_slot_idx))
|
||||||
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
|
# os.makedirs(slotDir, exist_ok=True)
|
||||||
|
# slotInfo.modelFile = self.moveToModelDir(slotInfo.modelFile, slotDir)
|
||||||
|
# if slotInfo.indexFile is not None and len(slotInfo.indexFile) > 0:
|
||||||
|
# slotInfo.indexFile = self.moveToModelDir(slotInfo.indexFile, slotDir)
|
||||||
|
# if slotInfo.iconFile is not None and len(slotInfo.iconFile) > 0:
|
||||||
|
# slotInfo.iconFile = self.moveToModelDir(slotInfo.iconFile, slotDir)
|
||||||
|
# json.dump(asdict(slotInfo), open(os.path.join(slotDir, "params.json"), "w"))
|
||||||
|
# self.loadSlots()
|
||||||
|
|
||||||
if slotInfo.isONNX:
|
# # 初回のみロード(起動時にスロットにモデルがあった場合はinitialLoadはFalseになっている)
|
||||||
_setInfoByONNX(slotInfo)
|
# if self.initialLoad:
|
||||||
else:
|
# self.prepareModel(target_slot_idx)
|
||||||
_setInfoByPytorch(slotInfo)
|
# self.settings.modelSlotIndex = target_slot_idx
|
||||||
|
# self.switchModel(self.settings.modelSlotIndex)
|
||||||
|
# self.initialLoad = False
|
||||||
|
# elif target_slot_idx == self.currentSlot:
|
||||||
|
# self.prepareModel(target_slot_idx)
|
||||||
|
|
||||||
# メタデータを見て、永続化モデルフォルダに移動させる
|
# return self.get_info()
|
||||||
# その際に、メタデータのファイル格納場所も書き換える
|
|
||||||
slotDir = os.path.join(
|
|
||||||
self.params.model_dir, RVC_MODEL_DIRNAME, str(target_slot_idx)
|
|
||||||
)
|
|
||||||
os.makedirs(slotDir, exist_ok=True)
|
|
||||||
slotInfo.modelFile = self.moveToModelDir(slotInfo.modelFile, slotDir)
|
|
||||||
if slotInfo.indexFile is not None and len(slotInfo.indexFile) > 0:
|
|
||||||
slotInfo.indexFile = self.moveToModelDir(slotInfo.indexFile, slotDir)
|
|
||||||
if slotInfo.iconFile is not None and len(slotInfo.iconFile) > 0:
|
|
||||||
slotInfo.iconFile = self.moveToModelDir(slotInfo.iconFile, slotDir)
|
|
||||||
json.dump(asdict(slotInfo), open(os.path.join(slotDir, "params.json"), "w"))
|
|
||||||
self.loadSlots()
|
|
||||||
|
|
||||||
# 初回のみロード(起動時にスロットにモデルがあった場合はinitialLoadはFalseになっている)
|
|
||||||
if self.initialLoad:
|
|
||||||
self.prepareModel(target_slot_idx)
|
|
||||||
self.settings.modelSlotIndex = target_slot_idx
|
|
||||||
self.switchModel(self.settings.modelSlotIndex)
|
|
||||||
self.initialLoad = False
|
|
||||||
elif target_slot_idx == self.currentSlot:
|
|
||||||
self.prepareModel(target_slot_idx)
|
|
||||||
|
|
||||||
return self.get_info()
|
|
||||||
|
|
||||||
def loadSlots(self):
|
def loadSlots(self):
|
||||||
dirname = os.path.join(self.params.model_dir, RVC_MODEL_DIRNAME)
|
dirname = os.path.join(self.params.model_dir, RVC_MODEL_DIRNAME)
|
||||||
@ -189,9 +143,7 @@ class RVC:
|
|||||||
|
|
||||||
modelSlots: list[ModelSlot] = []
|
modelSlots: list[ModelSlot] = []
|
||||||
for slot_idx in range(len(self.settings.modelSlots)):
|
for slot_idx in range(len(self.settings.modelSlots)):
|
||||||
slotDir = os.path.join(
|
slotDir = os.path.join(self.params.model_dir, RVC_MODEL_DIRNAME, str(slot_idx))
|
||||||
self.params.model_dir, RVC_MODEL_DIRNAME, str(slot_idx)
|
|
||||||
)
|
|
||||||
jsonDict = os.path.join(slotDir, "params.json")
|
jsonDict = os.path.join(slotDir, "params.json")
|
||||||
if os.path.exists(jsonDict):
|
if os.path.exists(jsonDict):
|
||||||
jsonDict = json.load(open(os.path.join(slotDir, "params.json")))
|
jsonDict = json.load(open(os.path.join(slotDir, "params.json")))
|
||||||
@ -205,76 +157,56 @@ class RVC:
|
|||||||
if key in self.settings.intData:
|
if key in self.settings.intData:
|
||||||
# 設定前処理
|
# 設定前処理
|
||||||
val = cast(int, val)
|
val = cast(int, val)
|
||||||
if key == "modelSlotIndex":
|
|
||||||
if val < 0:
|
|
||||||
return True
|
|
||||||
val = val % 1000 # Quick hack for same slot is selected
|
|
||||||
if (
|
|
||||||
self.settings.modelSlots[val].modelFile is None
|
|
||||||
or self.settings.modelSlots[val].modelFile == ""
|
|
||||||
):
|
|
||||||
print("[Voice Changer] slot does not have model.")
|
|
||||||
return True
|
|
||||||
self.prepareModel(val)
|
|
||||||
|
|
||||||
# 設定
|
# 設定
|
||||||
setattr(self.settings, key, val)
|
setattr(self.settings, key, val)
|
||||||
|
|
||||||
if key == "gpu":
|
if key == "gpu":
|
||||||
self.deviceManager.setForceTensor(False)
|
self.deviceManager.setForceTensor(False)
|
||||||
self.prepareModel(self.settings.modelSlotIndex)
|
self.prepareModel()
|
||||||
|
|
||||||
elif key in self.settings.floatData:
|
elif key in self.settings.floatData:
|
||||||
setattr(self.settings, key, float(val))
|
setattr(self.settings, key, float(val))
|
||||||
elif key in self.settings.strData:
|
elif key in self.settings.strData:
|
||||||
setattr(self.settings, key, str(val))
|
setattr(self.settings, key, str(val))
|
||||||
if key == "f0Detector" and self.pipeline is not None:
|
if key == "f0Detector" and self.pipeline is not None:
|
||||||
pitchExtractor = PitchExtractorManager.getPitchExtractor(
|
pitchExtractor = PitchExtractorManager.getPitchExtractor(self.settings.f0Detector)
|
||||||
self.settings.f0Detector
|
|
||||||
)
|
|
||||||
self.pipeline.setPitchExtractor(pitchExtractor)
|
self.pipeline.setPitchExtractor(pitchExtractor)
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def prepareModel(self, slot: int):
|
def prepareModel(self):
|
||||||
if slot < 0:
|
print("[Voice Changer] Prepare Model of slot:", self.slotIndex)
|
||||||
print("[Voice Changer] Prepare Model of slot skip:", slot)
|
|
||||||
return self.get_info()
|
|
||||||
modelSlot = self.settings.modelSlots[slot]
|
|
||||||
|
|
||||||
print("[Voice Changer] Prepare Model of slot:", slot)
|
|
||||||
|
|
||||||
# pipelineの生成
|
# pipelineの生成
|
||||||
self.next_pipeline = createPipeline(
|
self.pipeline = createPipeline(self.slotInfo, self.settings.gpu, self.settings.f0Detector)
|
||||||
modelSlot, self.settings.gpu, self.settings.f0Detector
|
|
||||||
)
|
|
||||||
|
|
||||||
# その他の設定
|
# # その他の設定
|
||||||
self.next_trans = modelSlot.defaultTune
|
# self.next_trans = modelSlot.defaultTune
|
||||||
self.next_index_ratio = modelSlot.defaultIndexRatio
|
# self.next_index_ratio = modelSlot.defaultIndexRatio
|
||||||
self.next_protect = modelSlot.defaultProtect
|
# self.next_protect = modelSlot.defaultProtect
|
||||||
self.next_samplingRate = modelSlot.samplingRate
|
# self.next_samplingRate = modelSlot.samplingRate
|
||||||
self.next_framework = "ONNX" if modelSlot.isONNX else "PyTorch"
|
# self.next_framework = "ONNX" if modelSlot.isONNX else "PyTorch"
|
||||||
# self.needSwitch = True
|
# # self.needSwitch = True
|
||||||
print("[Voice Changer] Prepare done.")
|
# print("[Voice Changer] Prepare done.")
|
||||||
self.switchModel(slot)
|
# self.switchModel(slot)
|
||||||
return self.get_info()
|
return self.get_info()
|
||||||
|
|
||||||
def switchModel(self, slot: int):
|
# def switchModel(self, slot: int):
|
||||||
print("[Voice Changer] Switching model..")
|
# print("[Voice Changer] Switching model..")
|
||||||
self.pipeline = self.next_pipeline
|
# self.pipeline = self.next_pipeline
|
||||||
self.settings.tran = self.next_trans
|
# self.settings.tran = self.next_trans
|
||||||
self.settings.indexRatio = self.next_index_ratio
|
# self.settings.indexRatio = self.next_index_ratio
|
||||||
self.settings.protect = self.next_protect
|
# self.settings.protect = self.next_protect
|
||||||
self.settings.modelSamplingRate = self.next_samplingRate
|
# self.settings.modelSamplingRate = self.next_samplingRate
|
||||||
self.settings.framework = self.next_framework
|
# self.settings.framework = self.next_framework
|
||||||
|
|
||||||
# self.currentSlot = self.settings.modelSlotIndex # prepareModelから呼ばれるということはupdate_settingsの中で呼ばれるということなので、まだmodelSlotIndexは更新されていない
|
# # self.currentSlot = self.settings.modelSlotIndex # prepareModelから呼ばれるということはupdate_settingsの中で呼ばれるということなので、まだmodelSlotIndexは更新されていない
|
||||||
self.currentSlot = slot
|
# self.currentSlot = slot
|
||||||
print(
|
# print(
|
||||||
"[Voice Changer] Switching model..done",
|
# "[Voice Changer] Switching model..done",
|
||||||
)
|
# )
|
||||||
|
|
||||||
def get_info(self):
|
def get_info(self):
|
||||||
data = asdict(self.settings)
|
data = asdict(self.settings)
|
||||||
@ -293,9 +225,7 @@ class RVC:
|
|||||||
crossfadeSize: int,
|
crossfadeSize: int,
|
||||||
solaSearchFrame: int = 0,
|
solaSearchFrame: int = 0,
|
||||||
):
|
):
|
||||||
newData = (
|
newData = newData.astype(np.float32) / 32768.0 # RVCのモデルのサンプリングレートで入ってきている。(extraDataLength, Crossfade等も同じSRで処理)(★1)
|
||||||
newData.astype(np.float32) / 32768.0
|
|
||||||
) # RVCのモデルのサンプリングレートで入ってきている。(extraDataLength, Crossfade等も同じSRで処理)(★1)
|
|
||||||
|
|
||||||
if self.audio_buffer is not None:
|
if self.audio_buffer is not None:
|
||||||
# 過去のデータに連結
|
# 過去のデータに連結
|
||||||
@ -303,18 +233,14 @@ class RVC:
|
|||||||
else:
|
else:
|
||||||
self.audio_buffer = newData
|
self.audio_buffer = newData
|
||||||
|
|
||||||
convertSize = (
|
convertSize = inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
|
||||||
inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
|
|
||||||
)
|
|
||||||
|
|
||||||
if convertSize % 128 != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
|
if convertSize % 128 != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
|
||||||
convertSize = convertSize + (128 - (convertSize % 128))
|
convertSize = convertSize + (128 - (convertSize % 128))
|
||||||
|
|
||||||
# バッファがたまっていない場合はzeroで補う
|
# バッファがたまっていない場合はzeroで補う
|
||||||
if self.audio_buffer.shape[0] < convertSize:
|
if self.audio_buffer.shape[0] < convertSize:
|
||||||
self.audio_buffer = np.concatenate(
|
self.audio_buffer = np.concatenate([np.zeros([convertSize]), self.audio_buffer])
|
||||||
[np.zeros([convertSize]), self.audio_buffer]
|
|
||||||
)
|
|
||||||
|
|
||||||
convertOffset = -1 * convertSize
|
convertOffset = -1 * convertSize
|
||||||
self.audio_buffer = self.audio_buffer[convertOffset:] # 変換対象の部分だけ抽出
|
self.audio_buffer = self.audio_buffer[convertOffset:] # 変換対象の部分だけ抽出
|
||||||
@ -324,9 +250,7 @@ class RVC:
|
|||||||
else:
|
else:
|
||||||
device = torch.device("cpu")
|
device = torch.device("cpu")
|
||||||
|
|
||||||
audio_buffer = torch.from_numpy(self.audio_buffer).to(
|
audio_buffer = torch.from_numpy(self.audio_buffer).to(device=device, dtype=torch.float32)
|
||||||
device=device, dtype=torch.float32
|
|
||||||
)
|
|
||||||
|
|
||||||
# 出力部分だけ切り出して音量を確認。(TODO:段階的消音にする)
|
# 出力部分だけ切り出して音量を確認。(TODO:段階的消音にする)
|
||||||
cropOffset = -1 * (inputSize + crossfadeSize)
|
cropOffset = -1 * (inputSize + crossfadeSize)
|
||||||
@ -339,21 +263,14 @@ class RVC:
|
|||||||
return (audio_buffer, convertSize, vol)
|
return (audio_buffer, convertSize, vol)
|
||||||
|
|
||||||
def inference(self, data):
|
def inference(self, data):
|
||||||
if self.settings.modelSlotIndex < 0:
|
# if self.settings.modelSlotIndex < 0:
|
||||||
print(
|
|
||||||
"[Voice Changer] wait for loading model...",
|
|
||||||
self.settings.modelSlotIndex,
|
|
||||||
self.currentSlot,
|
|
||||||
)
|
|
||||||
raise NoModeLoadedException("model_common")
|
|
||||||
# if self.needSwitch:
|
|
||||||
# print(
|
# print(
|
||||||
# f"[Voice Changer] Switch model {self.currentSlot} -> {self.settings.modelSlotIndex}"
|
# "[Voice Changer] wait for loading model...",
|
||||||
|
# self.settings.modelSlotIndex,
|
||||||
|
# self.currentSlot,
|
||||||
# )
|
# )
|
||||||
# self.switchModel()
|
# raise NoModeLoadedException("model_common")
|
||||||
# self.needSwitch = False
|
|
||||||
|
|
||||||
# half = self.deviceManager.halfPrecisionAvailable(self.settings.gpu)
|
|
||||||
half = self.pipeline.isHalf
|
half = self.pipeline.isHalf
|
||||||
|
|
||||||
audio = data[0]
|
audio = data[0]
|
||||||
@ -363,18 +280,16 @@ class RVC:
|
|||||||
if vol < self.settings.silentThreshold:
|
if vol < self.settings.silentThreshold:
|
||||||
return np.zeros(convertSize).astype(np.int16)
|
return np.zeros(convertSize).astype(np.int16)
|
||||||
|
|
||||||
audio = torchaudio.functional.resample(
|
audio = torchaudio.functional.resample(audio, self.settings.modelSamplingRate, 16000, rolloff=0.99)
|
||||||
audio, self.settings.modelSamplingRate, 16000, rolloff=0.99
|
|
||||||
)
|
|
||||||
repeat = 3 if half else 1
|
repeat = 3 if half else 1
|
||||||
repeat *= self.settings.rvcQuality # 0 or 3
|
repeat *= self.settings.rvcQuality # 0 or 3
|
||||||
sid = 0
|
sid = 0
|
||||||
f0_up_key = self.settings.tran
|
f0_up_key = self.settings.tran
|
||||||
index_rate = self.settings.indexRatio
|
index_rate = self.settings.indexRatio
|
||||||
protect = self.settings.protect
|
protect = self.settings.protect
|
||||||
if_f0 = 1 if self.settings.modelSlots[self.currentSlot].f0 else 0
|
if_f0 = 1 if self.slotInfo.f0 else 0
|
||||||
embOutputLayer = self.settings.modelSlots[self.currentSlot].embOutputLayer
|
embOutputLayer = self.slotInfo.embOutputLayer
|
||||||
useFinalProj = self.settings.modelSlots[self.currentSlot].useFinalProj
|
useFinalProj = self.slotInfo.useFinalProj
|
||||||
try:
|
try:
|
||||||
audio_out = self.pipeline.exec(
|
audio_out = self.pipeline.exec(
|
||||||
sid,
|
sid,
|
||||||
@ -382,8 +297,7 @@ class RVC:
|
|||||||
f0_up_key,
|
f0_up_key,
|
||||||
index_rate,
|
index_rate,
|
||||||
if_f0,
|
if_f0,
|
||||||
self.settings.extraConvertSize
|
self.settings.extraConvertSize / self.settings.modelSamplingRate, # extaraDataSizeの秒数。RVCのモデルのサンプリングレートで処理(★1)。
|
||||||
/ self.settings.modelSamplingRate, # extaraDataSizeの秒数。RVCのモデルのサンプリングレートで処理(★1)。
|
|
||||||
embOutputLayer,
|
embOutputLayer,
|
||||||
useFinalProj,
|
useFinalProj,
|
||||||
repeat,
|
repeat,
|
||||||
@ -393,9 +307,7 @@ class RVC:
|
|||||||
|
|
||||||
return result
|
return result
|
||||||
except DeviceCannotSupportHalfPrecisionException as e:
|
except DeviceCannotSupportHalfPrecisionException as e:
|
||||||
print(
|
print("[Device Manager] Device cannot support half precision. Fallback to float....")
|
||||||
"[Device Manager] Device cannot support half precision. Fallback to float...."
|
|
||||||
)
|
|
||||||
self.deviceManager.setForceTensor(True)
|
self.deviceManager.setForceTensor(True)
|
||||||
self.prepareModel(self.settings.modelSlotIndex)
|
self.prepareModel(self.settings.modelSlotIndex)
|
||||||
raise e
|
raise e
|
||||||
@ -405,7 +317,7 @@ class RVC:
|
|||||||
def __del__(self):
|
def __del__(self):
|
||||||
del self.pipeline
|
del self.pipeline
|
||||||
|
|
||||||
# print("---------- REMOVING ---------------")
|
print("---------- REMOVING ---------------")
|
||||||
|
|
||||||
remove_path = os.path.join("RVC")
|
remove_path = os.path.join("RVC")
|
||||||
sys.path = [x for x in sys.path if x.endswith(remove_path) is False]
|
sys.path = [x for x in sys.path if x.endswith(remove_path) is False]
|
||||||
@ -461,9 +373,7 @@ class RVC:
|
|||||||
"sampleId": "",
|
"sampleId": "",
|
||||||
"files": {"rvcModel": storeFile},
|
"files": {"rvcModel": storeFile},
|
||||||
}
|
}
|
||||||
props: LoadModelParams = LoadModelParams(
|
props: LoadModelParams = LoadModelParams(slot=targetSlot, isHalf=True, params=params)
|
||||||
slot=targetSlot, isHalf=True, params=params
|
|
||||||
)
|
|
||||||
self.loadModel(props)
|
self.loadModel(props)
|
||||||
self.prepareModel(targetSlot)
|
self.prepareModel(targetSlot)
|
||||||
self.settings.modelSlotIndex = targetSlot
|
self.settings.modelSlotIndex = targetSlot
|
||||||
@ -471,12 +381,8 @@ class RVC:
|
|||||||
|
|
||||||
def update_model_default(self):
|
def update_model_default(self):
|
||||||
print("[Voice Changer] UPDATE MODEL DEFAULT!!")
|
print("[Voice Changer] UPDATE MODEL DEFAULT!!")
|
||||||
slotDir = os.path.join(
|
slotDir = os.path.join(self.params.model_dir, RVC_MODEL_DIRNAME, str(self.currentSlot))
|
||||||
self.params.model_dir, RVC_MODEL_DIRNAME, str(self.currentSlot)
|
params = json.load(open(os.path.join(slotDir, "params.json"), "r", encoding="utf-8"))
|
||||||
)
|
|
||||||
params = json.load(
|
|
||||||
open(os.path.join(slotDir, "params.json"), "r", encoding="utf-8")
|
|
||||||
)
|
|
||||||
params["defaultTune"] = self.settings.tran
|
params["defaultTune"] = self.settings.tran
|
||||||
params["defaultIndexRatio"] = self.settings.indexRatio
|
params["defaultIndexRatio"] = self.settings.indexRatio
|
||||||
params["defaultProtect"] = self.settings.protect
|
params["defaultProtect"] = self.settings.protect
|
||||||
@ -488,14 +394,10 @@ class RVC:
|
|||||||
print("[Voice Changer] UPDATE MODEL INFO", newData)
|
print("[Voice Changer] UPDATE MODEL INFO", newData)
|
||||||
newDataDict = json.loads(newData)
|
newDataDict = json.loads(newData)
|
||||||
try:
|
try:
|
||||||
slotDir = os.path.join(
|
slotDir = os.path.join(self.params.model_dir, RVC_MODEL_DIRNAME, str(newDataDict["slot"]))
|
||||||
self.params.model_dir, RVC_MODEL_DIRNAME, str(newDataDict["slot"])
|
|
||||||
)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Exception::::", e)
|
print("Exception::::", e)
|
||||||
params = json.load(
|
params = json.load(open(os.path.join(slotDir, "params.json"), "r", encoding="utf-8"))
|
||||||
open(os.path.join(slotDir, "params.json"), "r", encoding="utf-8")
|
|
||||||
)
|
|
||||||
params[newDataDict["key"]] = newDataDict["val"]
|
params[newDataDict["key"]] = newDataDict["val"]
|
||||||
json.dump(params, open(os.path.join(slotDir, "params.json"), "w"))
|
json.dump(params, open(os.path.join(slotDir, "params.json"), "w"))
|
||||||
self.loadSlots()
|
self.loadSlots()
|
||||||
@ -504,9 +406,7 @@ class RVC:
|
|||||||
print("[Voice Changer] UPLOAD ASSETS", params)
|
print("[Voice Changer] UPLOAD ASSETS", params)
|
||||||
paramsDict = json.loads(params)
|
paramsDict = json.loads(params)
|
||||||
uploadPath = os.path.join(UPLOAD_DIR, paramsDict["file"])
|
uploadPath = os.path.join(UPLOAD_DIR, paramsDict["file"])
|
||||||
storeDir = os.path.join(
|
storeDir = os.path.join(self.params.model_dir, RVC_MODEL_DIRNAME, str(paramsDict["slot"]))
|
||||||
self.params.model_dir, RVC_MODEL_DIRNAME, str(paramsDict["slot"])
|
|
||||||
)
|
|
||||||
storePath = os.path.join(
|
storePath = os.path.join(
|
||||||
storeDir,
|
storeDir,
|
||||||
paramsDict["file"],
|
paramsDict["file"],
|
||||||
|
@ -1,8 +1,4 @@
|
|||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from ModelSample import RVCModelSample
|
|
||||||
from const import RVC_MAX_SLOT_NUM
|
|
||||||
|
|
||||||
from voice_changer.RVC.ModelSlot import ModelSlot
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@ -17,11 +13,6 @@ class RVCSettings:
|
|||||||
clusterInferRatio: float = 0.1
|
clusterInferRatio: float = 0.1
|
||||||
|
|
||||||
framework: str = "PyTorch" # PyTorch or ONNX
|
framework: str = "PyTorch" # PyTorch or ONNX
|
||||||
modelSlots: list[ModelSlot] = field(
|
|
||||||
default_factory=lambda: [ModelSlot() for _x in range(RVC_MAX_SLOT_NUM)]
|
|
||||||
)
|
|
||||||
|
|
||||||
sampleModels: list[RVCModelSample] = field(default_factory=lambda: [])
|
|
||||||
|
|
||||||
indexRatio: float = 0
|
indexRatio: float = 0
|
||||||
protect: float = 0.5
|
protect: float = 0.5
|
||||||
|
28
server/voice_changer/RVC/RVCSlotInfo.py
Normal file
28
server/voice_changer/RVC/RVCSlotInfo.py
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
from voice_changer.VoiceChanger import SlotInfo
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RVCSlotInfo(SlotInfo):
|
||||||
|
modelFile: str = ""
|
||||||
|
indexFile: str = ""
|
||||||
|
defaultTune: int = 0
|
||||||
|
defaultIndexRatio: float = 0
|
||||||
|
defaultProtect: float = 1
|
||||||
|
isONNX: bool = False
|
||||||
|
modelType: str = ""
|
||||||
|
samplingRate: int = 40000
|
||||||
|
f0: bool = True
|
||||||
|
embChannels: int = 256
|
||||||
|
embOutputLayer: int = 12
|
||||||
|
useFinalProj: bool = False
|
||||||
|
deprecated: bool = False
|
||||||
|
embedder: str = ""
|
||||||
|
|
||||||
|
name: str = ""
|
||||||
|
description: str = ""
|
||||||
|
credit: str = ""
|
||||||
|
termsOfUseUrl: str = ""
|
||||||
|
sampleId: str = ""
|
||||||
|
iconFile: str = ""
|
@ -1,174 +1,52 @@
|
|||||||
from concurrent.futures import ThreadPoolExecutor
|
# from concurrent.futures import ThreadPoolExecutor
|
||||||
from dataclasses import asdict
|
# from dataclasses import asdict
|
||||||
import os
|
# import os
|
||||||
from const import RVC_MODEL_DIRNAME, TMP_DIR
|
# from const import RVC_MODEL_DIRNAME, TMP_DIR
|
||||||
from Downloader import download, download_no_tqdm
|
# from Downloader import download, download_no_tqdm
|
||||||
from ModelSample import RVCModelSample, getModelSamples
|
# from ModelSample import RVCModelSample, getModelSamples
|
||||||
import json
|
# import json
|
||||||
|
|
||||||
from voice_changer.RVC.ModelSlot import ModelSlot
|
# from voice_changer.RVC.ModelSlot import ModelSlot
|
||||||
from voice_changer.RVC.ModelSlotGenerator import _setInfoByONNX, _setInfoByPytorch
|
# from voice_changer.RVC.ModelSlotGenerator import _setInfoByONNX, _setInfoByPytorch
|
||||||
|
|
||||||
|
|
||||||
def checkRvcModelExist(model_dir: str):
|
# def downloadModelFiles(sampleInfo: RVCModelSample, useIndex: bool = True):
|
||||||
rvcModelDir = os.path.join(model_dir, RVC_MODEL_DIRNAME)
|
# downloadParams = []
|
||||||
if not os.path.exists(rvcModelDir):
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
# modelPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.modelUrl))
|
||||||
|
# downloadParams.append(
|
||||||
|
# {
|
||||||
|
# "url": sampleInfo.modelUrl,
|
||||||
|
# "saveTo": modelPath,
|
||||||
|
# "position": 0,
|
||||||
|
# }
|
||||||
|
# )
|
||||||
|
|
||||||
def downloadInitialSampleModels(
|
# indexPath = None
|
||||||
sampleJsons: list[str], sampleModelIds: list[str], model_dir: str
|
# if useIndex is True and hasattr(sampleInfo, "indexUrl") and sampleInfo.indexUrl != "":
|
||||||
):
|
# print("[Voice Changer] Download sample with index.")
|
||||||
sampleModels = getModelSamples(sampleJsons, "RVC")
|
# indexPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.indexUrl))
|
||||||
if sampleModels is None:
|
# downloadParams.append(
|
||||||
return
|
# {
|
||||||
|
# "url": sampleInfo.indexUrl,
|
||||||
|
# "saveTo": indexPath,
|
||||||
|
# "position": 1,
|
||||||
|
# }
|
||||||
|
# )
|
||||||
|
|
||||||
downloadParams = []
|
# iconPath = None
|
||||||
slot_count = 0
|
# if hasattr(sampleInfo, "icon") and sampleInfo.icon != "":
|
||||||
line_num = 0
|
# iconPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.icon))
|
||||||
for initSampleId in sampleModelIds:
|
# downloadParams.append(
|
||||||
# 初期サンプルをサーチ
|
# {
|
||||||
match = False
|
# "url": sampleInfo.icon,
|
||||||
for sample in sampleModels:
|
# "saveTo": iconPath,
|
||||||
if sample.id == initSampleId[0]:
|
# "position": 2,
|
||||||
match = True
|
# }
|
||||||
break
|
# )
|
||||||
if match is False:
|
|
||||||
print(f"[Voice Changer] initiail sample not found. {initSampleId[0]}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 検出されたら、、、
|
# print("[Voice Changer] Downloading model files...", end="")
|
||||||
slotInfo: ModelSlot = ModelSlot()
|
# with ThreadPoolExecutor() as pool:
|
||||||
# sampleParams: Any = {"files": {}}
|
# pool.map(download_no_tqdm, downloadParams)
|
||||||
|
# print("")
|
||||||
slotDir = os.path.join(model_dir, RVC_MODEL_DIRNAME, str(slot_count))
|
# return modelPath, indexPath, iconPath
|
||||||
os.makedirs(slotDir, exist_ok=True)
|
|
||||||
modelFilePath = os.path.join(
|
|
||||||
slotDir,
|
|
||||||
os.path.basename(sample.modelUrl),
|
|
||||||
)
|
|
||||||
downloadParams.append(
|
|
||||||
{
|
|
||||||
"url": sample.modelUrl,
|
|
||||||
"saveTo": modelFilePath,
|
|
||||||
"position": line_num,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
slotInfo.modelFile = modelFilePath
|
|
||||||
line_num += 1
|
|
||||||
|
|
||||||
if (
|
|
||||||
initSampleId[1] is True
|
|
||||||
and hasattr(sample, "indexUrl")
|
|
||||||
and sample.indexUrl != ""
|
|
||||||
):
|
|
||||||
indexPath = os.path.join(
|
|
||||||
slotDir,
|
|
||||||
os.path.basename(sample.indexUrl),
|
|
||||||
)
|
|
||||||
downloadParams.append(
|
|
||||||
{
|
|
||||||
"url": sample.indexUrl,
|
|
||||||
"saveTo": indexPath,
|
|
||||||
"position": line_num,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
slotInfo.indexFile = indexPath
|
|
||||||
line_num += 1
|
|
||||||
if hasattr(sample, "icon") and sample.icon != "":
|
|
||||||
iconPath = os.path.join(
|
|
||||||
slotDir,
|
|
||||||
os.path.basename(sample.icon),
|
|
||||||
)
|
|
||||||
downloadParams.append(
|
|
||||||
{
|
|
||||||
"url": sample.icon,
|
|
||||||
"saveTo": iconPath,
|
|
||||||
"position": line_num,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
slotInfo.iconFile = iconPath
|
|
||||||
line_num += 1
|
|
||||||
|
|
||||||
slotInfo.sampleId = sample.id
|
|
||||||
slotInfo.credit = sample.credit
|
|
||||||
slotInfo.description = sample.description
|
|
||||||
slotInfo.name = sample.name
|
|
||||||
slotInfo.termsOfUseUrl = sample.termsOfUseUrl
|
|
||||||
slotInfo.defaultTune = 0
|
|
||||||
slotInfo.defaultIndexRatio = 1
|
|
||||||
slotInfo.defaultProtect = 0.5
|
|
||||||
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
|
|
||||||
|
|
||||||
# この時点ではまだファイルはダウンロードされていない
|
|
||||||
# if slotInfo.isONNX:
|
|
||||||
# _setInfoByONNX(slotInfo)
|
|
||||||
# else:
|
|
||||||
# _setInfoByPytorch(slotInfo)
|
|
||||||
|
|
||||||
json.dump(asdict(slotInfo), open(os.path.join(slotDir, "params.json"), "w"))
|
|
||||||
slot_count += 1
|
|
||||||
|
|
||||||
# ダウンロード
|
|
||||||
print("[Voice Changer] Downloading model files...")
|
|
||||||
with ThreadPoolExecutor() as pool:
|
|
||||||
pool.map(download, downloadParams)
|
|
||||||
|
|
||||||
# メタデータ作成
|
|
||||||
print("[Voice Changer] Generating metadata...")
|
|
||||||
for slotId in range(slot_count):
|
|
||||||
slotDir = os.path.join(model_dir, RVC_MODEL_DIRNAME, str(slotId))
|
|
||||||
jsonDict = json.load(open(os.path.join(slotDir, "params.json")))
|
|
||||||
slotInfo = ModelSlot(**jsonDict)
|
|
||||||
if slotInfo.isONNX:
|
|
||||||
_setInfoByONNX(slotInfo)
|
|
||||||
else:
|
|
||||||
_setInfoByPytorch(slotInfo)
|
|
||||||
json.dump(asdict(slotInfo), open(os.path.join(slotDir, "params.json"), "w"))
|
|
||||||
|
|
||||||
|
|
||||||
def downloadModelFiles(sampleInfo: RVCModelSample, useIndex: bool = True):
|
|
||||||
downloadParams = []
|
|
||||||
|
|
||||||
modelPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.modelUrl))
|
|
||||||
downloadParams.append(
|
|
||||||
{
|
|
||||||
"url": sampleInfo.modelUrl,
|
|
||||||
"saveTo": modelPath,
|
|
||||||
"position": 0,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
indexPath = None
|
|
||||||
if (
|
|
||||||
useIndex is True
|
|
||||||
and hasattr(sampleInfo, "indexUrl")
|
|
||||||
and sampleInfo.indexUrl != ""
|
|
||||||
):
|
|
||||||
print("[Voice Changer] Download sample with index.")
|
|
||||||
indexPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.indexUrl))
|
|
||||||
downloadParams.append(
|
|
||||||
{
|
|
||||||
"url": sampleInfo.indexUrl,
|
|
||||||
"saveTo": indexPath,
|
|
||||||
"position": 1,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
iconPath = None
|
|
||||||
if hasattr(sampleInfo, "icon") and sampleInfo.icon != "":
|
|
||||||
iconPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.icon))
|
|
||||||
downloadParams.append(
|
|
||||||
{
|
|
||||||
"url": sampleInfo.icon,
|
|
||||||
"saveTo": iconPath,
|
|
||||||
"position": 2,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
print("[Voice Changer] Downloading model files...", end="")
|
|
||||||
with ThreadPoolExecutor() as pool:
|
|
||||||
pool.map(download_no_tqdm, downloadParams)
|
|
||||||
print("")
|
|
||||||
return modelPath, indexPath, iconPath
|
|
||||||
|
@ -14,7 +14,7 @@ class RVCInferencer(Inferencer):
|
|||||||
|
|
||||||
dev = DeviceManager.get_instance().getDevice(gpu)
|
dev = DeviceManager.get_instance().getDevice(gpu)
|
||||||
isHalf = DeviceManager.get_instance().halfPrecisionAvailable(gpu)
|
isHalf = DeviceManager.get_instance().halfPrecisionAvailable(gpu)
|
||||||
|
print("LLLLLLLLLLLLLLLOOOOOOOOOOOOOOOOOOOOOOO", file)
|
||||||
cpt = torch.load(file, map_location="cpu")
|
cpt = torch.load(file, map_location="cpu")
|
||||||
model = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=isHalf)
|
model = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=isHalf)
|
||||||
|
|
||||||
|
@ -4,7 +4,7 @@ import torch
|
|||||||
from onnxsim import simplify
|
from onnxsim import simplify
|
||||||
import onnx
|
import onnx
|
||||||
from const import TMP_DIR, EnumInferenceTypes
|
from const import TMP_DIR, EnumInferenceTypes
|
||||||
from voice_changer.RVC.ModelSlot import ModelSlot
|
from data.ModelSlot import ModelSlot
|
||||||
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
|
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
|
||||||
from voice_changer.RVC.onnxExporter.SynthesizerTrnMs256NSFsid_ONNX import (
|
from voice_changer.RVC.onnxExporter.SynthesizerTrnMs256NSFsid_ONNX import (
|
||||||
SynthesizerTrnMs256NSFsid_ONNX,
|
SynthesizerTrnMs256NSFsid_ONNX,
|
||||||
@ -30,9 +30,7 @@ def export2onnx(gpu: int, modelSlot: ModelSlot):
|
|||||||
modelFile = modelSlot.modelFile
|
modelFile = modelSlot.modelFile
|
||||||
|
|
||||||
output_file = os.path.splitext(os.path.basename(modelFile))[0] + ".onnx"
|
output_file = os.path.splitext(os.path.basename(modelFile))[0] + ".onnx"
|
||||||
output_file_simple = (
|
output_file_simple = os.path.splitext(os.path.basename(modelFile))[0] + "_simple.onnx"
|
||||||
os.path.splitext(os.path.basename(modelFile))[0] + "_simple.onnx"
|
|
||||||
)
|
|
||||||
output_path = os.path.join(TMP_DIR, output_file)
|
output_path = os.path.join(TMP_DIR, output_file)
|
||||||
output_path_simple = os.path.join(TMP_DIR, output_file_simple)
|
output_path_simple = os.path.join(TMP_DIR, output_file_simple)
|
||||||
metadata = {
|
metadata = {
|
||||||
@ -52,9 +50,7 @@ def export2onnx(gpu: int, modelSlot: ModelSlot):
|
|||||||
if gpuMomory > 0:
|
if gpuMomory > 0:
|
||||||
_export2onnx(modelFile, output_path, output_path_simple, True, metadata)
|
_export2onnx(modelFile, output_path, output_path_simple, True, metadata)
|
||||||
else:
|
else:
|
||||||
print(
|
print("[Voice Changer] Warning!!! onnx export with float32. maybe size is doubled.")
|
||||||
"[Voice Changer] Warning!!! onnx export with float32. maybe size is doubled."
|
|
||||||
)
|
|
||||||
_export2onnx(modelFile, output_path, output_path_simple, False, metadata)
|
_export2onnx(modelFile, output_path, output_path_simple, False, metadata)
|
||||||
return output_file_simple
|
return output_file_simple
|
||||||
|
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
import os
|
import os
|
||||||
import traceback
|
import traceback
|
||||||
import faiss
|
import faiss
|
||||||
|
from voice_changer.RVC.RVCSlotInfo import RVCSlotInfo
|
||||||
|
|
||||||
from voice_changer.RVC.ModelSlot import ModelSlot
|
|
||||||
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
|
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
|
||||||
from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager
|
from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager
|
||||||
from voice_changer.RVC.inferencer.InferencerManager import InferencerManager
|
from voice_changer.RVC.inferencer.InferencerManager import InferencerManager
|
||||||
@ -10,15 +10,13 @@ from voice_changer.RVC.pipeline.Pipeline import Pipeline
|
|||||||
from voice_changer.RVC.pitchExtractor.PitchExtractorManager import PitchExtractorManager
|
from voice_changer.RVC.pitchExtractor.PitchExtractorManager import PitchExtractorManager
|
||||||
|
|
||||||
|
|
||||||
def createPipeline(modelSlot: ModelSlot, gpu: int, f0Detector: str):
|
def createPipeline(slotInfo: RVCSlotInfo, gpu: int, f0Detector: str):
|
||||||
dev = DeviceManager.get_instance().getDevice(gpu)
|
dev = DeviceManager.get_instance().getDevice(gpu)
|
||||||
half = DeviceManager.get_instance().halfPrecisionAvailable(gpu)
|
half = DeviceManager.get_instance().halfPrecisionAvailable(gpu)
|
||||||
|
|
||||||
# Inferencer 生成
|
# Inferencer 生成
|
||||||
try:
|
try:
|
||||||
inferencer = InferencerManager.getInferencer(
|
inferencer = InferencerManager.getInferencer(slotInfo.modelType, slotInfo.modelFile, gpu)
|
||||||
modelSlot.modelType, modelSlot.modelFile, gpu
|
|
||||||
)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("[Voice Changer] exception! loading inferencer", e)
|
print("[Voice Changer] exception! loading inferencer", e)
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
@ -26,7 +24,7 @@ def createPipeline(modelSlot: ModelSlot, gpu: int, f0Detector: str):
|
|||||||
# Embedder 生成
|
# Embedder 生成
|
||||||
try:
|
try:
|
||||||
embedder = EmbedderManager.getEmbedder(
|
embedder = EmbedderManager.getEmbedder(
|
||||||
modelSlot.embedder,
|
slotInfo.embedder,
|
||||||
# emmbedderFilename,
|
# emmbedderFilename,
|
||||||
half,
|
half,
|
||||||
dev,
|
dev,
|
||||||
@ -39,14 +37,14 @@ def createPipeline(modelSlot: ModelSlot, gpu: int, f0Detector: str):
|
|||||||
pitchExtractor = PitchExtractorManager.getPitchExtractor(f0Detector)
|
pitchExtractor = PitchExtractorManager.getPitchExtractor(f0Detector)
|
||||||
|
|
||||||
# index, feature
|
# index, feature
|
||||||
index = _loadIndex(modelSlot)
|
index = _loadIndex(slotInfo)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
embedder,
|
embedder,
|
||||||
inferencer,
|
inferencer,
|
||||||
pitchExtractor,
|
pitchExtractor,
|
||||||
index,
|
index,
|
||||||
modelSlot.samplingRate,
|
slotInfo.samplingRate,
|
||||||
dev,
|
dev,
|
||||||
half,
|
half,
|
||||||
)
|
)
|
||||||
@ -54,21 +52,21 @@ def createPipeline(modelSlot: ModelSlot, gpu: int, f0Detector: str):
|
|||||||
return pipeline
|
return pipeline
|
||||||
|
|
||||||
|
|
||||||
def _loadIndex(modelSlot: ModelSlot):
|
def _loadIndex(slotInfo: RVCSlotInfo):
|
||||||
# Indexのロード
|
# Indexのロード
|
||||||
print("[Voice Changer] Loading index...")
|
print("[Voice Changer] Loading index...")
|
||||||
# ファイル指定がない場合はNone
|
# ファイル指定がない場合はNone
|
||||||
if modelSlot.indexFile is None:
|
if slotInfo.indexFile is None:
|
||||||
print("[Voice Changer] Index is None, not used")
|
print("[Voice Changer] Index is None, not used")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# ファイル指定があってもファイルがない場合はNone
|
# ファイル指定があってもファイルがない場合はNone
|
||||||
if os.path.exists(modelSlot.indexFile) is not True:
|
if os.path.exists(slotInfo.indexFile) is not True:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
print("Try loading...", modelSlot.indexFile)
|
print("Try loading...", slotInfo.indexFile)
|
||||||
index = faiss.read_index(modelSlot.indexFile)
|
index = faiss.read_index(slotInfo.indexFile)
|
||||||
except:
|
except:
|
||||||
print("[Voice Changer] load index failed. Use no index.")
|
print("[Voice Changer] load index failed. Use no index.")
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
@ -28,7 +28,7 @@ class DioPitchExtractor(PitchExtractor):
|
|||||||
f0_floor=f0_min,
|
f0_floor=f0_min,
|
||||||
f0_ceil=f0_max,
|
f0_ceil=f0_max,
|
||||||
channels_in_octave=2,
|
channels_in_octave=2,
|
||||||
frame_period=10,
|
frame_period=10, # 10ms, <= 160(window) / 16000(samplerate) * 1000(ms)
|
||||||
)
|
)
|
||||||
f0 = pyworld.stonemask(audio.astype(np.double), _f0, t, sr)
|
f0 = pyworld.stonemask(audio.astype(np.double), _f0, t, sr)
|
||||||
f0 = np.pad(f0.astype("float"), (start_frame, n_frames - len(f0) - start_frame))
|
f0 = np.pad(f0.astype("float"), (start_frame, n_frames - len(f0) - start_frame))
|
||||||
|
@ -27,7 +27,7 @@ class HarvestPitchExtractor(PitchExtractor):
|
|||||||
audio.astype(np.double),
|
audio.astype(np.double),
|
||||||
fs=sr,
|
fs=sr,
|
||||||
f0_ceil=f0_max,
|
f0_ceil=f0_max,
|
||||||
frame_period=10,
|
frame_period=10, # 10ms, <= 160(window) / 16000(samplerate) * 1000(ms)
|
||||||
)
|
)
|
||||||
f0 = pyworld.stonemask(audio.astype(np.double), f0, t, sr)
|
f0 = pyworld.stonemask(audio.astype(np.double), f0, t, sr)
|
||||||
f0 = signal.medfilt(f0, 3)
|
f0 = signal.medfilt(f0, 3)
|
||||||
|
@ -1,21 +1,21 @@
|
|||||||
from typing import Any, Union, cast
|
from typing import Any, Union, cast
|
||||||
|
|
||||||
import socketio
|
import socketio
|
||||||
from const import TMP_DIR, ModelType
|
from const import TMP_DIR, VoiceChangerType
|
||||||
import torch
|
import torch
|
||||||
import os
|
import os
|
||||||
import traceback
|
import traceback
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from dataclasses import dataclass, asdict, field
|
from dataclasses import dataclass, asdict, field
|
||||||
import resampy
|
import resampy
|
||||||
|
from data.ModelSlot import loadSlotInfo
|
||||||
|
|
||||||
|
|
||||||
from voice_changer.IORecorder import IORecorder
|
from voice_changer.IORecorder import IORecorder
|
||||||
from voice_changer.Local.AudioDeviceList import ServerAudioDevice, list_audio_device
|
from voice_changer.Local.AudioDeviceList import ServerAudioDevice, list_audio_device
|
||||||
from voice_changer.utils.LoadModelParams import LoadModelParams
|
|
||||||
|
|
||||||
from voice_changer.utils.Timer import Timer
|
from voice_changer.utils.Timer import Timer
|
||||||
from voice_changer.utils.VoiceChangerModel import VoiceChangerModel, AudioInOut
|
from voice_changer.utils.VoiceChangerModel import AudioInOut
|
||||||
from Exceptions import (
|
from Exceptions import (
|
||||||
DeviceCannotSupportHalfPrecisionException,
|
DeviceCannotSupportHalfPrecisionException,
|
||||||
DeviceChangingException,
|
DeviceChangingException,
|
||||||
@ -26,15 +26,22 @@ from Exceptions import (
|
|||||||
VoiceChangerIsNotSelectedException,
|
VoiceChangerIsNotSelectedException,
|
||||||
)
|
)
|
||||||
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
||||||
import threading
|
|
||||||
import time
|
# import threading
|
||||||
import sounddevice as sd
|
# import time
|
||||||
import librosa
|
# import sounddevice as sd
|
||||||
|
# import librosa
|
||||||
|
import json
|
||||||
|
|
||||||
STREAM_INPUT_FILE = os.path.join(TMP_DIR, "in.wav")
|
STREAM_INPUT_FILE = os.path.join(TMP_DIR, "in.wav")
|
||||||
STREAM_OUTPUT_FILE = os.path.join(TMP_DIR, "out.wav")
|
STREAM_OUTPUT_FILE = os.path.join(TMP_DIR, "out.wav")
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SlotInfo:
|
||||||
|
voiceChangerType: VoiceChangerType | None = None
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class VoiceChangerSettings:
|
class VoiceChangerSettings:
|
||||||
inputSampleRate: int = 48000 # 48000 or 24000
|
inputSampleRate: int = 48000 # 48000 or 24000
|
||||||
@ -45,9 +52,7 @@ class VoiceChangerSettings:
|
|||||||
|
|
||||||
recordIO: int = 0 # 0:off, 1:on
|
recordIO: int = 0 # 0:off, 1:on
|
||||||
serverAudioInputDevices: list[ServerAudioDevice] = field(default_factory=lambda: [])
|
serverAudioInputDevices: list[ServerAudioDevice] = field(default_factory=lambda: [])
|
||||||
serverAudioOutputDevices: list[ServerAudioDevice] = field(
|
serverAudioOutputDevices: list[ServerAudioDevice] = field(default_factory=lambda: [])
|
||||||
default_factory=lambda: []
|
|
||||||
)
|
|
||||||
|
|
||||||
enableServerAudio: int = 0 # 0:off, 1:on
|
enableServerAudio: int = 0 # 0:off, 1:on
|
||||||
serverAudioStated: int = 0 # 0:off, 1:on
|
serverAudioStated: int = 0 # 0:off, 1:on
|
||||||
@ -93,160 +98,131 @@ class VoiceChangerSettings:
|
|||||||
|
|
||||||
|
|
||||||
class VoiceChanger:
|
class VoiceChanger:
|
||||||
settings: VoiceChangerSettings = VoiceChangerSettings()
|
# settings: VoiceChangerSettings = VoiceChangerSettings()
|
||||||
voiceChanger: VoiceChangerModel | None = None
|
# voiceChangerModel: VoiceChangerModel | None = None
|
||||||
ioRecorder: IORecorder
|
#
|
||||||
sola_buffer: AudioInOut
|
#
|
||||||
namespace: socketio.AsyncNamespace | None = None
|
# namespace: socketio.AsyncNamespace | None = None
|
||||||
|
|
||||||
localPerformanceShowTime = 0.0
|
# localPerformanceShowTime = 0.0
|
||||||
|
|
||||||
emitTo = None
|
# emitTo = None
|
||||||
|
|
||||||
def audio_callback(
|
# def audio_callback(self, indata: np.ndarray, outdata: np.ndarray, frames, times, status):
|
||||||
self, indata: np.ndarray, outdata: np.ndarray, frames, times, status
|
# try:
|
||||||
):
|
# indata = indata * self.settings.serverInputAudioGain
|
||||||
try:
|
# with Timer("all_inference_time") as t:
|
||||||
indata = indata * self.settings.serverInputAudioGain
|
# unpackedData = librosa.to_mono(indata.T) * 32768.0
|
||||||
with Timer("all_inference_time") as t:
|
# out_wav, times = self.on_request(unpackedData)
|
||||||
unpackedData = librosa.to_mono(indata.T) * 32768.0
|
# outputChunnels = outdata.shape[1]
|
||||||
out_wav, times = self.on_request(unpackedData)
|
# outdata[:] = np.repeat(out_wav, outputChunnels).reshape(-1, outputChunnels) / 32768.0
|
||||||
outputChunnels = outdata.shape[1]
|
# outdata[:] = outdata * self.settings.serverOutputAudioGain
|
||||||
outdata[:] = (
|
# all_inference_time = t.secs
|
||||||
np.repeat(out_wav, outputChunnels).reshape(-1, outputChunnels)
|
# performance = [all_inference_time] + times
|
||||||
/ 32768.0
|
# if self.emitTo is not None:
|
||||||
)
|
# self.emitTo(performance)
|
||||||
outdata[:] = outdata * self.settings.serverOutputAudioGain
|
# self.settings.performance = [round(x * 1000) for x in performance]
|
||||||
all_inference_time = t.secs
|
# except Exception as e:
|
||||||
performance = [all_inference_time] + times
|
# print("[Voice Changer] ex:", e)
|
||||||
if self.emitTo is not None:
|
|
||||||
self.emitTo(performance)
|
|
||||||
self.settings.performance = [round(x * 1000) for x in performance]
|
|
||||||
except Exception as e:
|
|
||||||
print("[Voice Changer] ex:", e)
|
|
||||||
|
|
||||||
def getServerAudioDevice(
|
# def getServerAudioDevice(self, audioDeviceList: list[ServerAudioDevice], index: int):
|
||||||
self, audioDeviceList: list[ServerAudioDevice], index: int
|
# serverAudioDevice = [x for x in audioDeviceList if x.index == index]
|
||||||
):
|
# if len(serverAudioDevice) > 0:
|
||||||
serverAudioDevice = [x for x in audioDeviceList if x.index == index]
|
# return serverAudioDevice[0]
|
||||||
if len(serverAudioDevice) > 0:
|
# else:
|
||||||
return serverAudioDevice[0]
|
# return None
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
def serverLocal(self, _vc):
|
# def serverLocal(self, _vc):
|
||||||
vc: VoiceChanger = _vc
|
# vc: VoiceChanger = _vc
|
||||||
|
|
||||||
currentInputDeviceId = -1
|
# currentInputDeviceId = -1
|
||||||
currentModelSamplingRate = -1
|
# currentModelSamplingRate = -1
|
||||||
currentOutputDeviceId = -1
|
# currentOutputDeviceId = -1
|
||||||
currentInputChunkNum = -1
|
# currentInputChunkNum = -1
|
||||||
while True:
|
# while True:
|
||||||
if (
|
# if vc.settings.serverAudioStated == 0 or vc.settings.serverInputDeviceId == -1 or vc.voiceChanger is None:
|
||||||
vc.settings.serverAudioStated == 0
|
# vc.settings.inputSampleRate = 48000
|
||||||
or vc.settings.serverInputDeviceId == -1
|
# time.sleep(2)
|
||||||
or vc.voiceChanger is None
|
# else:
|
||||||
):
|
# sd._terminate()
|
||||||
vc.settings.inputSampleRate = 48000
|
# sd._initialize()
|
||||||
time.sleep(2)
|
|
||||||
else:
|
|
||||||
sd._terminate()
|
|
||||||
sd._initialize()
|
|
||||||
|
|
||||||
sd.default.device[0] = vc.settings.serverInputDeviceId
|
# sd.default.device[0] = vc.settings.serverInputDeviceId
|
||||||
currentInputDeviceId = vc.settings.serverInputDeviceId
|
# currentInputDeviceId = vc.settings.serverInputDeviceId
|
||||||
sd.default.device[1] = vc.settings.serverOutputDeviceId
|
# sd.default.device[1] = vc.settings.serverOutputDeviceId
|
||||||
currentOutputDeviceId = vc.settings.serverOutputDeviceId
|
# currentOutputDeviceId = vc.settings.serverOutputDeviceId
|
||||||
|
|
||||||
currentInputChannelNum = vc.settings.serverAudioInputDevices
|
# currentInputChannelNum = vc.settings.serverAudioInputDevices
|
||||||
|
|
||||||
serverInputAudioDevice = self.getServerAudioDevice(
|
# serverInputAudioDevice = self.getServerAudioDevice(vc.settings.serverAudioInputDevices, currentInputDeviceId)
|
||||||
vc.settings.serverAudioInputDevices, currentInputDeviceId
|
# serverOutputAudioDevice = self.getServerAudioDevice(vc.settings.serverAudioOutputDevices, currentOutputDeviceId)
|
||||||
)
|
# print(serverInputAudioDevice, serverOutputAudioDevice)
|
||||||
serverOutputAudioDevice = self.getServerAudioDevice(
|
# if serverInputAudioDevice is None or serverOutputAudioDevice is None:
|
||||||
vc.settings.serverAudioOutputDevices, currentOutputDeviceId
|
# time.sleep(2)
|
||||||
)
|
# print("serverInputAudioDevice or serverOutputAudioDevice is None")
|
||||||
print(serverInputAudioDevice, serverOutputAudioDevice)
|
# continue
|
||||||
if serverInputAudioDevice is None or serverOutputAudioDevice is None:
|
|
||||||
time.sleep(2)
|
|
||||||
print("serverInputAudioDevice or serverOutputAudioDevice is None")
|
|
||||||
continue
|
|
||||||
|
|
||||||
currentInputChannelNum = serverInputAudioDevice.maxInputChannels
|
# currentInputChannelNum = serverInputAudioDevice.maxInputChannels
|
||||||
currentOutputChannelNum = serverOutputAudioDevice.maxOutputChannels
|
# currentOutputChannelNum = serverOutputAudioDevice.maxOutputChannels
|
||||||
|
|
||||||
currentInputChunkNum = vc.settings.serverReadChunkSize
|
# currentInputChunkNum = vc.settings.serverReadChunkSize
|
||||||
block_frame = currentInputChunkNum * 128
|
# block_frame = currentInputChunkNum * 128
|
||||||
|
|
||||||
# sample rate precheck(alsa cannot use 40000?)
|
# # sample rate precheck(alsa cannot use 40000?)
|
||||||
try:
|
# try:
|
||||||
currentModelSamplingRate = (
|
# currentModelSamplingRate = self.voiceChanger.get_processing_sampling_rate()
|
||||||
self.voiceChanger.get_processing_sampling_rate()
|
# except Exception as e:
|
||||||
)
|
# print("[Voice Changer] ex: get_processing_sampling_rate", e)
|
||||||
except Exception as e:
|
# continue
|
||||||
print("[Voice Changer] ex: get_processing_sampling_rate", e)
|
# try:
|
||||||
continue
|
# with sd.Stream(
|
||||||
try:
|
# callback=self.audio_callback,
|
||||||
with sd.Stream(
|
# blocksize=block_frame,
|
||||||
callback=self.audio_callback,
|
# samplerate=currentModelSamplingRate,
|
||||||
blocksize=block_frame,
|
# dtype="float32",
|
||||||
samplerate=currentModelSamplingRate,
|
# channels=[currentInputChannelNum, currentOutputChannelNum],
|
||||||
dtype="float32",
|
# ):
|
||||||
channels=[currentInputChannelNum, currentOutputChannelNum],
|
# pass
|
||||||
):
|
# vc.settings.serverInputAudioSampleRate = currentModelSamplingRate
|
||||||
pass
|
# vc.settings.inputSampleRate = currentModelSamplingRate
|
||||||
vc.settings.serverInputAudioSampleRate = currentModelSamplingRate
|
# print(f"[Voice Changer] sample rate {vc.settings.serverInputAudioSampleRate}")
|
||||||
vc.settings.inputSampleRate = currentModelSamplingRate
|
# except Exception as e:
|
||||||
print(
|
# print(
|
||||||
f"[Voice Changer] sample rate {vc.settings.serverInputAudioSampleRate}"
|
# "[Voice Changer] ex: fallback to device default samplerate",
|
||||||
)
|
# e,
|
||||||
except Exception as e:
|
# )
|
||||||
print(
|
# vc.settings.serverInputAudioSampleRate = serverInputAudioDevice.default_samplerate
|
||||||
"[Voice Changer] ex: fallback to device default samplerate",
|
# vc.settings.inputSampleRate = vc.settings.serverInputAudioSampleRate
|
||||||
e,
|
|
||||||
)
|
|
||||||
vc.settings.serverInputAudioSampleRate = (
|
|
||||||
serverInputAudioDevice.default_samplerate
|
|
||||||
)
|
|
||||||
vc.settings.inputSampleRate = vc.settings.serverInputAudioSampleRate
|
|
||||||
|
|
||||||
# main loop
|
# # main loop
|
||||||
try:
|
# try:
|
||||||
with sd.Stream(
|
# with sd.Stream(
|
||||||
callback=self.audio_callback,
|
# callback=self.audio_callback,
|
||||||
blocksize=block_frame,
|
# blocksize=block_frame,
|
||||||
samplerate=vc.settings.serverInputAudioSampleRate,
|
# samplerate=vc.settings.serverInputAudioSampleRate,
|
||||||
dtype="float32",
|
# dtype="float32",
|
||||||
channels=[currentInputChannelNum, currentOutputChannelNum],
|
# channels=[currentInputChannelNum, currentOutputChannelNum],
|
||||||
):
|
# ):
|
||||||
while (
|
# while vc.settings.serverAudioStated == 1 and currentInputDeviceId == vc.settings.serverInputDeviceId and currentOutputDeviceId == vc.settings.serverOutputDeviceId and currentModelSamplingRate == self.voiceChanger.get_processing_sampling_rate() and currentInputChunkNum == vc.settings.serverReadChunkSize:
|
||||||
vc.settings.serverAudioStated == 1
|
# time.sleep(2)
|
||||||
and currentInputDeviceId == vc.settings.serverInputDeviceId
|
# print(
|
||||||
and currentOutputDeviceId
|
# "[Voice Changer] server audio",
|
||||||
== vc.settings.serverOutputDeviceId
|
# self.settings.performance,
|
||||||
and currentModelSamplingRate
|
# )
|
||||||
== self.voiceChanger.get_processing_sampling_rate()
|
# print(
|
||||||
and currentInputChunkNum == vc.settings.serverReadChunkSize
|
# "[Voice Changer] info:",
|
||||||
):
|
# vc.settings.serverAudioStated,
|
||||||
time.sleep(2)
|
# currentInputDeviceId,
|
||||||
print(
|
# currentOutputDeviceId,
|
||||||
"[Voice Changer] server audio",
|
# vc.settings.serverInputAudioSampleRate,
|
||||||
self.settings.performance,
|
# currentInputChunkNum,
|
||||||
)
|
# )
|
||||||
print(
|
|
||||||
"[Voice Changer] info:",
|
|
||||||
vc.settings.serverAudioStated,
|
|
||||||
currentInputDeviceId,
|
|
||||||
currentOutputDeviceId,
|
|
||||||
vc.settings.serverInputAudioSampleRate,
|
|
||||||
currentInputChunkNum,
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
# except Exception as e:
|
||||||
print("[Voice Changer] ex:", e)
|
# print("[Voice Changer] ex:", e)
|
||||||
time.sleep(2)
|
# time.sleep(2)
|
||||||
|
|
||||||
def __init__(self, params: VoiceChangerParams):
|
def __init__(self, params: VoiceChangerParams, slotIndex: int):
|
||||||
# 初期化
|
# 初期化
|
||||||
self.settings = VoiceChangerSettings()
|
self.settings = VoiceChangerSettings()
|
||||||
self.onnx_session = None
|
self.onnx_session = None
|
||||||
@ -255,147 +231,80 @@ class VoiceChanger:
|
|||||||
self.currentCrossFadeOverlapSize = 0 # setting
|
self.currentCrossFadeOverlapSize = 0 # setting
|
||||||
self.crossfadeSize = 0 # calculated
|
self.crossfadeSize = 0 # calculated
|
||||||
|
|
||||||
self.voiceChanger = None
|
self.voiceChangerModel = None
|
||||||
self.modelType: ModelType | None = None
|
self.modelType: VoiceChangerType | None = None
|
||||||
self.params = params
|
self.params = params
|
||||||
self.gpu_num = torch.cuda.device_count()
|
|
||||||
self.prev_audio = np.zeros(4096)
|
self.prev_audio = np.zeros(4096)
|
||||||
self.mps_enabled: bool = (
|
self.ioRecorder: IORecorder | None = None
|
||||||
getattr(torch.backends, "mps", None) is not None
|
self.sola_buffer: AudioInOut | None = None
|
||||||
and torch.backends.mps.is_available()
|
|
||||||
)
|
|
||||||
|
|
||||||
audioinput, audiooutput = list_audio_device()
|
audioinput, audiooutput = list_audio_device()
|
||||||
self.settings.serverAudioInputDevices = audioinput
|
self.settings.serverAudioInputDevices = audioinput
|
||||||
self.settings.serverAudioOutputDevices = audiooutput
|
self.settings.serverAudioOutputDevices = audiooutput
|
||||||
|
|
||||||
thread = threading.Thread(target=self.serverLocal, args=(self,))
|
self.slotIndex = slotIndex
|
||||||
thread.start()
|
self.slotInfo = loadSlotInfo(params.model_dir, self.slotIndex)
|
||||||
print(
|
if self.slotInfo.voiceChangerType is None:
|
||||||
f"VoiceChanger Initialized (GPU_NUM:{self.gpu_num}, mps_enabled:{self.mps_enabled})"
|
print(f"[Voice Changer] Voice Changer Type is None for slot {slotIndex} is not found.")
|
||||||
)
|
return
|
||||||
|
elif self.slotInfo.voiceChangerType == "RVC":
|
||||||
|
from voice_changer.RVC.RVC import RVC
|
||||||
|
|
||||||
def switchModelType(self, modelType: ModelType):
|
self.voiceChangerModel = RVC(self.slotIndex, self.params)
|
||||||
try:
|
|
||||||
if self.voiceChanger is not None:
|
|
||||||
# return {"status": "ERROR", "msg": "vc is already selected. currently re-select is not implemented"}
|
|
||||||
del self.voiceChanger
|
|
||||||
self.voiceChanger = None
|
|
||||||
|
|
||||||
self.modelType = modelType
|
|
||||||
if self.modelType == "MMVCv15":
|
|
||||||
from voice_changer.MMVCv15.MMVCv15 import MMVCv15
|
|
||||||
|
|
||||||
self.voiceChanger = MMVCv15() # type: ignore
|
|
||||||
elif self.modelType == "MMVCv13":
|
|
||||||
from voice_changer.MMVCv13.MMVCv13 import MMVCv13
|
|
||||||
|
|
||||||
self.voiceChanger = MMVCv13()
|
|
||||||
elif self.modelType == "so-vits-svc-40v2":
|
|
||||||
from voice_changer.SoVitsSvc40v2.SoVitsSvc40v2 import SoVitsSvc40v2
|
|
||||||
|
|
||||||
self.voiceChanger = SoVitsSvc40v2(self.params)
|
|
||||||
elif (
|
|
||||||
self.modelType == "so-vits-svc-40"
|
|
||||||
or self.modelType == "so-vits-svc-40_c"
|
|
||||||
):
|
|
||||||
from voice_changer.SoVitsSvc40.SoVitsSvc40 import SoVitsSvc40
|
|
||||||
|
|
||||||
self.voiceChanger = SoVitsSvc40(self.params)
|
|
||||||
elif self.modelType == "DDSP-SVC":
|
|
||||||
from voice_changer.DDSP_SVC.DDSP_SVC import DDSP_SVC
|
|
||||||
|
|
||||||
self.voiceChanger = DDSP_SVC(self.params)
|
|
||||||
elif self.modelType == "RVC":
|
|
||||||
from voice_changer.RVC.RVC import RVC
|
|
||||||
|
|
||||||
self.voiceChanger = RVC(self.params)
|
|
||||||
else:
|
|
||||||
from voice_changer.MMVCv13.MMVCv13 import MMVCv13
|
|
||||||
|
|
||||||
self.voiceChanger = MMVCv13()
|
|
||||||
except Exception as e:
|
|
||||||
print(e)
|
|
||||||
print(traceback.format_exc())
|
|
||||||
return {"status": "OK", "msg": "vc is switched."}
|
|
||||||
|
|
||||||
def getModelType(self):
|
|
||||||
if self.modelType is not None:
|
|
||||||
return {"status": "OK", "vc": self.modelType}
|
|
||||||
else:
|
else:
|
||||||
return {"status": "OK", "vc": "none"}
|
print(f"[Voice Changer] unknwon voice changer type. {self.slotInfo.voiceChangerType}")
|
||||||
|
|
||||||
def loadModel(self, props: LoadModelParams):
|
# thread = threading.Thread(target=self.serverLocal, args=(self,))
|
||||||
try:
|
# thread.start()
|
||||||
if self.voiceChanger is None:
|
|
||||||
raise VoiceChangerIsNotSelectedException(
|
def prepareModel(self):
|
||||||
"Voice Changer is not selected."
|
self.voiceChangerModel.prepareModel()
|
||||||
)
|
|
||||||
return self.voiceChanger.loadModel(props)
|
|
||||||
except Exception as e:
|
|
||||||
print(traceback.format_exc())
|
|
||||||
print("[Voice Changer] Model Load Error! Check your model is valid.", e)
|
|
||||||
return {"status": "NG"}
|
|
||||||
|
|
||||||
def get_info(self):
|
def get_info(self):
|
||||||
data = asdict(self.settings)
|
data = asdict(self.settings)
|
||||||
if self.voiceChanger is not None:
|
if self.voiceChangerModel is not None:
|
||||||
data.update(self.voiceChanger.get_info())
|
data.update(self.voiceChangerModel.get_info())
|
||||||
|
|
||||||
devCount = torch.cuda.device_count()
|
|
||||||
gpus = []
|
|
||||||
for id in range(devCount):
|
|
||||||
name = torch.cuda.get_device_name(id)
|
|
||||||
memory = torch.cuda.get_device_properties(id).total_memory
|
|
||||||
gpu = {"id": id, "name": name, "memory": memory}
|
|
||||||
gpus.append(gpu)
|
|
||||||
data["gpus"] = gpus
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def get_performance(self):
|
def get_performance(self):
|
||||||
return self.settings.performance
|
return self.settings.performance
|
||||||
|
|
||||||
def update_settings(self, key: str, val: Any):
|
def update_settings(self, key: str, val: Any):
|
||||||
if self.voiceChanger is None:
|
if self.voiceChangerModel is None:
|
||||||
print("[Voice Changer] Voice Changer is not selected.")
|
print("[Voice Changer] Voice Changer is not selected.")
|
||||||
return self.get_info()
|
return
|
||||||
|
|
||||||
if key in self.settings.intData:
|
if key in self.settings.intData:
|
||||||
setattr(self.settings, key, int(val))
|
setattr(self.settings, key, int(val))
|
||||||
if key == "crossFadeOffsetRate" or key == "crossFadeEndRate":
|
if key == "crossFadeOffsetRate" or key == "crossFadeEndRate":
|
||||||
self.crossfadeSize = 0
|
self.crossfadeSize = 0
|
||||||
if key == "recordIO" and val == 1:
|
if key == "recordIO" and val == 1:
|
||||||
if hasattr(self, "ioRecorder"):
|
if self.ioRecorder is not None:
|
||||||
self.ioRecorder.close()
|
self.ioRecorder.close()
|
||||||
self.ioRecorder = IORecorder(
|
self.ioRecorder = IORecorder(STREAM_INPUT_FILE, STREAM_OUTPUT_FILE, self.settings.inputSampleRate)
|
||||||
STREAM_INPUT_FILE, STREAM_OUTPUT_FILE, self.settings.inputSampleRate
|
|
||||||
)
|
|
||||||
if key == "recordIO" and val == 0:
|
if key == "recordIO" and val == 0:
|
||||||
if hasattr(self, "ioRecorder"):
|
if self.ioRecorder is not None:
|
||||||
self.ioRecorder.close()
|
self.ioRecorder.close()
|
||||||
|
self.ioRecorder = None
|
||||||
pass
|
pass
|
||||||
if key == "recordIO" and val == 2:
|
if key == "recordIO" and val == 2:
|
||||||
if hasattr(self, "ioRecorder"):
|
if self.ioRecorder is not None:
|
||||||
self.ioRecorder.close()
|
self.ioRecorder.close()
|
||||||
|
self.ioRecorder = None
|
||||||
|
|
||||||
elif key in self.settings.floatData:
|
elif key in self.settings.floatData:
|
||||||
setattr(self.settings, key, float(val))
|
setattr(self.settings, key, float(val))
|
||||||
elif key in self.settings.strData:
|
elif key in self.settings.strData:
|
||||||
setattr(self.settings, key, str(val))
|
setattr(self.settings, key, str(val))
|
||||||
else:
|
else:
|
||||||
ret = self.voiceChanger.update_settings(key, val)
|
ret = self.voiceChangerModel.update_settings(key, val)
|
||||||
if ret is False:
|
if ret is False:
|
||||||
pass
|
pass
|
||||||
# print(f"({key} is not mutable variable or unknown variable)")
|
# print(f"({key} is not mutable variable or unknown variable)")
|
||||||
return self.get_info()
|
|
||||||
|
|
||||||
def _generate_strength(self, crossfadeSize: int):
|
def _generate_strength(self, crossfadeSize: int):
|
||||||
if (
|
if self.crossfadeSize != crossfadeSize or self.currentCrossFadeOffsetRate != self.settings.crossFadeOffsetRate or self.currentCrossFadeEndRate != self.settings.crossFadeEndRate or self.currentCrossFadeOverlapSize != self.settings.crossFadeOverlapSize:
|
||||||
self.crossfadeSize != crossfadeSize
|
|
||||||
or self.currentCrossFadeOffsetRate != self.settings.crossFadeOffsetRate
|
|
||||||
or self.currentCrossFadeEndRate != self.settings.crossFadeEndRate
|
|
||||||
or self.currentCrossFadeOverlapSize != self.settings.crossFadeOverlapSize
|
|
||||||
):
|
|
||||||
self.crossfadeSize = crossfadeSize
|
self.crossfadeSize = crossfadeSize
|
||||||
self.currentCrossFadeOffsetRate = self.settings.crossFadeOffsetRate
|
self.currentCrossFadeOffsetRate = self.settings.crossFadeOffsetRate
|
||||||
self.currentCrossFadeEndRate = self.settings.crossFadeEndRate
|
self.currentCrossFadeEndRate = self.settings.crossFadeEndRate
|
||||||
@ -424,32 +333,25 @@ class VoiceChanger:
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
print(
|
print(f"Generated Strengths: for prev:{self.np_prev_strength.shape}, for cur:{self.np_cur_strength.shape}")
|
||||||
f"Generated Strengths: for prev:{self.np_prev_strength.shape}, for cur:{self.np_cur_strength.shape}"
|
|
||||||
)
|
|
||||||
|
|
||||||
# ひとつ前の結果とサイズが変わるため、記録は消去する。
|
# ひとつ前の結果とサイズが変わるため、記録は消去する。
|
||||||
if hasattr(self, "np_prev_audio1") is True:
|
if hasattr(self, "np_prev_audio1") is True:
|
||||||
delattr(self, "np_prev_audio1")
|
delattr(self, "np_prev_audio1")
|
||||||
if hasattr(self, "sola_buffer") is True:
|
if self.sola_buffer is not None:
|
||||||
del self.sola_buffer
|
del self.sola_buffer
|
||||||
|
self.sola_buffer = None
|
||||||
|
|
||||||
# receivedData: tuple of short
|
# receivedData: tuple of short
|
||||||
def on_request(
|
def on_request(self, receivedData: AudioInOut) -> tuple[AudioInOut, list[Union[int, float]]]:
|
||||||
self, receivedData: AudioInOut
|
|
||||||
) -> tuple[AudioInOut, list[Union[int, float]]]:
|
|
||||||
return self.on_request_sola(receivedData)
|
return self.on_request_sola(receivedData)
|
||||||
|
|
||||||
def on_request_sola(
|
def on_request_sola(self, receivedData: AudioInOut) -> tuple[AudioInOut, list[Union[int, float]]]:
|
||||||
self, receivedData: AudioInOut
|
|
||||||
) -> tuple[AudioInOut, list[Union[int, float]]]:
|
|
||||||
try:
|
try:
|
||||||
if self.voiceChanger is None:
|
if self.voiceChangerModel is None:
|
||||||
raise VoiceChangerIsNotSelectedException(
|
raise VoiceChangerIsNotSelectedException("Voice Changer is not selected.")
|
||||||
"Voice Changer is not selected."
|
|
||||||
)
|
|
||||||
|
|
||||||
processing_sampling_rate = self.voiceChanger.get_processing_sampling_rate()
|
processing_sampling_rate = self.voiceChangerModel.get_processing_sampling_rate()
|
||||||
# 前処理
|
# 前処理
|
||||||
with Timer("pre-process") as t:
|
with Timer("pre-process") as t:
|
||||||
if self.settings.inputSampleRate != processing_sampling_rate:
|
if self.settings.inputSampleRate != processing_sampling_rate:
|
||||||
@ -470,21 +372,17 @@ class VoiceChanger:
|
|||||||
crossfade_frame = min(self.settings.crossFadeOverlapSize, block_frame)
|
crossfade_frame = min(self.settings.crossFadeOverlapSize, block_frame)
|
||||||
self._generate_strength(crossfade_frame)
|
self._generate_strength(crossfade_frame)
|
||||||
|
|
||||||
data = self.voiceChanger.generate_input(
|
data = self.voiceChangerModel.generate_input(newData, block_frame, crossfade_frame, sola_search_frame)
|
||||||
newData, block_frame, crossfade_frame, sola_search_frame
|
|
||||||
)
|
|
||||||
preprocess_time = t.secs
|
preprocess_time = t.secs
|
||||||
|
|
||||||
# 変換処理
|
# 変換処理
|
||||||
with Timer("main-process") as t:
|
with Timer("main-process") as t:
|
||||||
# Inference
|
# Inference
|
||||||
audio = self.voiceChanger.inference(data)
|
audio = self.voiceChangerModel.inference(data)
|
||||||
|
|
||||||
if hasattr(self, "sola_buffer") is True:
|
if self.sola_buffer is not None:
|
||||||
np.set_printoptions(threshold=10000)
|
np.set_printoptions(threshold=10000)
|
||||||
audio_offset = -1 * (
|
audio_offset = -1 * (sola_search_frame + crossfade_frame + block_frame)
|
||||||
sola_search_frame + crossfade_frame + block_frame
|
|
||||||
)
|
|
||||||
audio = audio[audio_offset:]
|
audio = audio[audio_offset:]
|
||||||
|
|
||||||
# SOLA algorithm from https://github.com/yxlllc/DDSP-SVC, https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI
|
# SOLA algorithm from https://github.com/yxlllc/DDSP-SVC, https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI
|
||||||
@ -512,10 +410,7 @@ class VoiceChanger:
|
|||||||
print("[Voice Changer] warming up... generating sola buffer.")
|
print("[Voice Changer] warming up... generating sola buffer.")
|
||||||
result = np.zeros(4096).astype(np.int16)
|
result = np.zeros(4096).astype(np.int16)
|
||||||
|
|
||||||
if (
|
if self.sola_buffer is not None and sola_offset < sola_search_frame:
|
||||||
hasattr(self, "sola_buffer") is True
|
|
||||||
and sola_offset < sola_search_frame
|
|
||||||
):
|
|
||||||
offset = -1 * (sola_search_frame + crossfade_frame - sola_offset)
|
offset = -1 * (sola_search_frame + crossfade_frame - sola_offset)
|
||||||
end = -1 * (sola_search_frame - sola_offset)
|
end = -1 * (sola_search_frame - sola_offset)
|
||||||
sola_buf_org = audio[offset:end]
|
sola_buf_org = audio[offset:end]
|
||||||
@ -545,9 +440,7 @@ class VoiceChanger:
|
|||||||
else:
|
else:
|
||||||
outputData = result
|
outputData = result
|
||||||
|
|
||||||
print_convert_processing(
|
print_convert_processing(f" Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz")
|
||||||
f" Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz"
|
|
||||||
)
|
|
||||||
|
|
||||||
if receivedData.shape[0] != outputData.shape[0]:
|
if receivedData.shape[0] != outputData.shape[0]:
|
||||||
# print(
|
# print(
|
||||||
@ -564,9 +457,7 @@ class VoiceChanger:
|
|||||||
|
|
||||||
postprocess_time = t.secs
|
postprocess_time = t.secs
|
||||||
|
|
||||||
print_convert_processing(
|
print_convert_processing(f" [fin] Input/Output size:{receivedData.shape[0]},{outputData.shape[0]}")
|
||||||
f" [fin] Input/Output size:{receivedData.shape[0]},{outputData.shape[0]}"
|
|
||||||
)
|
|
||||||
perf = [preprocess_time, mainprocess_time, postprocess_time]
|
perf = [preprocess_time, mainprocess_time, postprocess_time]
|
||||||
return outputData, perf
|
return outputData, perf
|
||||||
|
|
||||||
@ -586,9 +477,7 @@ class VoiceChanger:
|
|||||||
print("[Voice Changer] embedder:", e)
|
print("[Voice Changer] embedder:", e)
|
||||||
return np.zeros(1).astype(np.int16), [0, 0, 0]
|
return np.zeros(1).astype(np.int16), [0, 0, 0]
|
||||||
except VoiceChangerIsNotSelectedException:
|
except VoiceChangerIsNotSelectedException:
|
||||||
print(
|
print("[Voice Changer] Voice Changer is not selected. Wait a bit and if there is no improvement, please re-select vc.")
|
||||||
"[Voice Changer] Voice Changer is not selected. Wait a bit and if there is no improvement, please re-select vc."
|
|
||||||
)
|
|
||||||
return np.zeros(1).astype(np.int16), [0, 0, 0]
|
return np.zeros(1).astype(np.int16), [0, 0, 0]
|
||||||
except DeviceCannotSupportHalfPrecisionException:
|
except DeviceCannotSupportHalfPrecisionException:
|
||||||
# RVC.pyでfallback処理をするので、ここはダミーデータ返すだけ。
|
# RVC.pyでfallback処理をするので、ここはダミーデータ返すだけ。
|
||||||
|
@ -1,35 +1,105 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
import threading
|
||||||
|
from data.ModelSample import ModelSamples
|
||||||
|
from data.ModelSlot import ModelSlots, loadSlotInfo
|
||||||
|
from utils.downloader.SampleDownloader import downloadSample, getSampleInfos
|
||||||
|
from voice_changer.Local.ServerDevice import ServerDevice
|
||||||
|
from voice_changer.RVC.ModelSlotGenerator import setSlotAsRVC
|
||||||
|
|
||||||
from voice_changer.VoiceChanger import VoiceChanger
|
from voice_changer.VoiceChanger import VoiceChanger
|
||||||
from const import ModelType
|
from const import MAX_SLOT_NUM, VoiceChangerType
|
||||||
from voice_changer.utils.LoadModelParams import LoadModelParams
|
|
||||||
from voice_changer.utils.VoiceChangerModel import AudioInOut
|
from voice_changer.utils.VoiceChangerModel import AudioInOut
|
||||||
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
||||||
|
|
||||||
|
from dataclasses import dataclass, asdict, field
|
||||||
|
import torch
|
||||||
|
import json
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass()
|
||||||
|
class GPUInfo:
|
||||||
|
id: int
|
||||||
|
name: str
|
||||||
|
memory: int
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass()
|
||||||
|
class VoiceChangerManagerSettings:
|
||||||
|
slotIndex: int
|
||||||
|
intData: list[str] = field(default_factory=lambda: ["slotIndex"])
|
||||||
|
|
||||||
|
|
||||||
class VoiceChangerManager(object):
|
class VoiceChangerManager(object):
|
||||||
_instance = None
|
_instance = None
|
||||||
voiceChanger: VoiceChanger = None
|
|
||||||
|
def __init__(self, params: VoiceChangerParams):
|
||||||
|
self.voiceChanger: VoiceChanger = None
|
||||||
|
self.settings: VoiceChangerManagerSettings = VoiceChangerManagerSettings(slotIndex=0)
|
||||||
|
self.params: VoiceChangerParams = params
|
||||||
|
|
||||||
|
self.serverDevice = ServerDevice()
|
||||||
|
|
||||||
|
# スタティックな情報を収集
|
||||||
|
self.sampleModels: list[ModelSamples] = getSampleInfos(self.params.sample_mode)
|
||||||
|
self.gpus: list[GPUInfo] = self._get_gpuInfos()
|
||||||
|
|
||||||
|
def _get_gpuInfos(self):
|
||||||
|
devCount = torch.cuda.device_count()
|
||||||
|
gpus = []
|
||||||
|
for id in range(devCount):
|
||||||
|
name = torch.cuda.get_device_name(id)
|
||||||
|
memory = torch.cuda.get_device_properties(id).total_memory
|
||||||
|
gpu = {"id": id, "name": name, "memory": memory}
|
||||||
|
gpus.append(gpu)
|
||||||
|
return gpus
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_instance(cls, params: VoiceChangerParams):
|
def get_instance(cls, params: VoiceChangerParams):
|
||||||
if cls._instance is None:
|
if cls._instance is None:
|
||||||
cls._instance = cls()
|
cls._instance = cls(params)
|
||||||
cls._instance.voiceChanger = VoiceChanger(params)
|
|
||||||
|
gpu_num = torch.cuda.device_count()
|
||||||
|
mps_enabled: bool = getattr(torch.backends, "mps", None) is not None and torch.backends.mps.is_available()
|
||||||
|
print(f"VoiceChanger Initialized (GPU_NUM:{gpu_num}, mps_enabled:{mps_enabled})")
|
||||||
|
|
||||||
|
cls._instance.voiceChanger = VoiceChanger(params, cls._instance.settings.slotIndex)
|
||||||
|
thread = threading.Thread(target=cls._instance.serverDevice.serverLocal, args=(cls._instance.voiceChanger,))
|
||||||
|
thread.start()
|
||||||
|
cls._instance.voiceChanger.prepareModel()
|
||||||
return cls._instance
|
return cls._instance
|
||||||
|
|
||||||
def loadModel(self, props: LoadModelParams):
|
def loadModel(self, slot: int, voiceChangerType: VoiceChangerType, params: str):
|
||||||
info = self.voiceChanger.loadModel(props)
|
print(slot, voiceChangerType, params)
|
||||||
if hasattr(info, "status") and info["status"] == "NG":
|
paramDict = json.loads(params)
|
||||||
return info
|
if voiceChangerType == "RVC":
|
||||||
else:
|
if "sampleId" in paramDict and len(paramDict["sampleId"]) > 0:
|
||||||
info["status"] = "OK"
|
print("[Voice Canger]: Download RVC sample.")
|
||||||
return info
|
downloadSample(self.params.sample_mode, paramDict["sampleId"], self.params.model_dir, slot, {"useIndex": paramDict["rvcIndexDownload"]})
|
||||||
|
else:
|
||||||
|
print("[Voice Canger]: Set uploaded RVC model to slot.")
|
||||||
|
setSlotAsRVC(self.params.model_dir, slot, paramDict)
|
||||||
|
|
||||||
|
return self.get_info()
|
||||||
|
|
||||||
|
def get_slotInfos(self):
|
||||||
|
slotInfos: list[ModelSlots] = []
|
||||||
|
for slotIndex in range(MAX_SLOT_NUM):
|
||||||
|
slotInfo = loadSlotInfo(self.params.model_dir, slotIndex)
|
||||||
|
slotInfos.append(slotInfo)
|
||||||
|
return slotInfos
|
||||||
|
|
||||||
def get_info(self):
|
def get_info(self):
|
||||||
|
data = asdict(self.settings)
|
||||||
|
slotInfos = self.get_slotInfos()
|
||||||
|
data["slotInfos"] = slotInfos
|
||||||
|
data["gpus"] = self.gpus
|
||||||
|
data["sampleModels"] = self.sampleModels
|
||||||
|
|
||||||
|
data["status"] = "OK"
|
||||||
if hasattr(self, "voiceChanger"):
|
if hasattr(self, "voiceChanger"):
|
||||||
info = self.voiceChanger.get_info()
|
info = self.voiceChanger.get_info()
|
||||||
info["status"] = "OK"
|
data.update(info)
|
||||||
return info
|
return data
|
||||||
else:
|
else:
|
||||||
return {"status": "ERROR", "msg": "no model loaded"}
|
return {"status": "ERROR", "msg": "no model loaded"}
|
||||||
|
|
||||||
@ -41,12 +111,22 @@ class VoiceChangerManager(object):
|
|||||||
return {"status": "ERROR", "msg": "no model loaded"}
|
return {"status": "ERROR", "msg": "no model loaded"}
|
||||||
|
|
||||||
def update_settings(self, key: str, val: str | int | float):
|
def update_settings(self, key: str, val: str | int | float):
|
||||||
if hasattr(self, "voiceChanger"):
|
if key in self.settings.intData:
|
||||||
info = self.voiceChanger.update_settings(key, val)
|
setattr(self.settings, key, int(val))
|
||||||
info["status"] = "OK"
|
if key == "slotIndex":
|
||||||
return info
|
val = val % 1000 # Quick hack for same slot is selected
|
||||||
|
setattr(self.settings, key, int(val))
|
||||||
|
|
||||||
|
newVoiceChanger = VoiceChanger(self.params, self.settings.slotIndex)
|
||||||
|
newVoiceChanger.prepareModel()
|
||||||
|
self.serverDevice.serverLocal(newVoiceChanger)
|
||||||
|
del self.voiceChanger
|
||||||
|
self.voiceChanger = newVoiceChanger
|
||||||
|
elif hasattr(self, "voiceChanger"):
|
||||||
|
self.voiceChanger.update_settings(key, val)
|
||||||
else:
|
else:
|
||||||
return {"status": "ERROR", "msg": "no model loaded"}
|
print(f"[Voice Changer] update is not handled. ({key}:{val})")
|
||||||
|
return self.get_info()
|
||||||
|
|
||||||
def changeVoice(self, receivedData: AudioInOut):
|
def changeVoice(self, receivedData: AudioInOut):
|
||||||
if hasattr(self, "voiceChanger") is True:
|
if hasattr(self, "voiceChanger") is True:
|
||||||
@ -55,12 +135,6 @@ class VoiceChangerManager(object):
|
|||||||
print("Voice Change is not loaded. Did you load a correct model?")
|
print("Voice Change is not loaded. Did you load a correct model?")
|
||||||
return np.zeros(1).astype(np.int16), []
|
return np.zeros(1).astype(np.int16), []
|
||||||
|
|
||||||
def switchModelType(self, modelType: ModelType):
|
|
||||||
return self.voiceChanger.switchModelType(modelType)
|
|
||||||
|
|
||||||
def getModelType(self):
|
|
||||||
return self.voiceChanger.getModelType()
|
|
||||||
|
|
||||||
def export2onnx(self):
|
def export2onnx(self):
|
||||||
return self.voiceChanger.export2onnx()
|
return self.voiceChanger.export2onnx()
|
||||||
|
|
||||||
|
@ -10,10 +10,3 @@ class FilePaths:
|
|||||||
clusterTorchModelFilename: str | None
|
clusterTorchModelFilename: str | None
|
||||||
featureFilename: str | None
|
featureFilename: str | None
|
||||||
indexFilename: str | None
|
indexFilename: str | None
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class LoadModelParams:
|
|
||||||
slot: int
|
|
||||||
isHalf: bool
|
|
||||||
params: Any
|
|
||||||
|
@ -1,17 +1,11 @@
|
|||||||
from typing import Any, Protocol, TypeAlias
|
from typing import Any, Protocol, TypeAlias
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from voice_changer.utils.LoadModelParams import LoadModelParams
|
|
||||||
|
|
||||||
|
|
||||||
AudioInOut: TypeAlias = np.ndarray[Any, np.dtype[np.int16]]
|
AudioInOut: TypeAlias = np.ndarray[Any, np.dtype[np.int16]]
|
||||||
|
|
||||||
|
|
||||||
class VoiceChangerModel(Protocol):
|
class VoiceChangerModel(Protocol):
|
||||||
# loadModel: Callable[..., dict[str, Any]]
|
|
||||||
def loadModel(self, params: LoadModelParams):
|
|
||||||
...
|
|
||||||
|
|
||||||
def get_processing_sampling_rate(self) -> int:
|
def get_processing_sampling_rate(self) -> int:
|
||||||
...
|
...
|
||||||
|
|
||||||
@ -21,9 +15,7 @@ class VoiceChangerModel(Protocol):
|
|||||||
def inference(self, data: tuple[Any, ...]) -> Any:
|
def inference(self, data: tuple[Any, ...]) -> Any:
|
||||||
...
|
...
|
||||||
|
|
||||||
def generate_input(
|
def generate_input(self, newData: AudioInOut, inputSize: int, crossfadeSize: int) -> tuple[Any, ...]:
|
||||||
self, newData: AudioInOut, inputSize: int, crossfadeSize: int
|
|
||||||
) -> tuple[Any, ...]:
|
|
||||||
...
|
...
|
||||||
|
|
||||||
def update_settings(self, key: str, val: Any) -> bool:
|
def update_settings(self, key: str, val: Any) -> bool:
|
||||||
|
@ -11,4 +11,4 @@ class VoiceChangerParams:
|
|||||||
hubert_base_jp: str
|
hubert_base_jp: str
|
||||||
hubert_soft: str
|
hubert_soft: str
|
||||||
nsf_hifigan: str
|
nsf_hifigan: str
|
||||||
rvc_sample_mode: str
|
sample_mode: str
|
||||||
|
Loading…
Reference in New Issue
Block a user