WIP: refactoring

This commit is contained in:
wataru 2023-06-16 00:56:18 +09:00
parent 328ea46161
commit be42bb682d
53 changed files with 3257 additions and 1687 deletions

3
.gitignore vendored
View File

@ -43,12 +43,15 @@ docker/cudnn/
server/pretrain/ server/pretrain/
server/weights/ server/weights/
server/model_dir/ server/model_dir/
server/model_dir2/
server/weights_/ server/weights_/
server/weights__/ server/weights__/
server/models/ server/models/
server/samples.json server/samples.json
server/samples_0003_t.json server/samples_0003_t.json
server/samples_0003_o.json server/samples_0003_o.json
server/samples_0003_o2.json
server/samples_0003_t2.json
server/test_official_v1_v2.json server/test_official_v1_v2.json
server/test_ddpn_v1_v2.json server/test_ddpn_v1_v2.json

View File

@ -1 +1,10 @@
<!doctype html><html style="width:100%;height:100%;overflow:hidden"><head><meta charset="utf-8"/><title>Voice Changer Client Demo</title><script defer="defer" src="index.js"></script></head><body style="width:100%;height:100%;margin:0"><div id="app" style="width:100%;height:100%"></div></body></html> <!DOCTYPE html>
<html style="width: 100%; height: 100%; overflow: hidden">
<head>
<meta charset="utf-8" />
<title>Voice Changer Client Demo</title>
<script defer src="index.js"></script></head>
<body style="width: 100%; height: 100%; margin: 0px">
<div id="app" style="width: 100%; height: 100%"></div>
</body>
</html>

File diff suppressed because one or more lines are too long

View File

@ -1,31 +0,0 @@
/*! regenerator-runtime -- Copyright (c) 2014-present, Facebook, Inc. -- license (MIT): https://github.com/facebook/regenerator/blob/main/LICENSE */
/**
* @license React
* react-dom.production.min.js
*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
/**
* @license React
* react.production.min.js
*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
/**
* @license React
* scheduler.production.min.js
*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

View File

@ -27,7 +27,6 @@ export const useMessageBuilder_old = (): MessageBuilderStateAndMethod => {
lang = "en" lang = "en"
} }
console.log(messagesRef.current)
return messagesRef.current.find(x => { return x.file == file && x.id == id })?.message[lang] || "unknwon message" return messagesRef.current.find(x => { return x.file == file && x.id == id })?.message[lang] || "unknwon message"
} }
return { return {

View File

@ -39,7 +39,7 @@ export const ModelSlotManagerDialog = () => {
if (mode != "localFile") { if (mode != "localFile") {
return <></> return <></>
} }
if (!serverSetting.serverSetting.modelSlots) { if (!serverSetting.serverSetting.slotInfos) {
return <></> return <></>
} }
@ -114,7 +114,7 @@ export const ModelSlotManagerDialog = () => {
} }
const slots = serverSetting.serverSetting.modelSlots.map((x, index) => { const slots = serverSetting.serverSetting.slotInfos.map((x, index) => {
let modelFileName = "" let modelFileName = ""
if (uploadData?.slot == index) { if (uploadData?.slot == index) {
modelFileName = (uploadData.model?.name || "").replace(/^.*[\\\/]/, '') modelFileName = (uploadData.model?.name || "").replace(/^.*[\\\/]/, '')
@ -248,7 +248,7 @@ export const ModelSlotManagerDialog = () => {
}, [ }, [
mode, mode,
serverSetting.serverSetting.modelSlots, serverSetting.serverSetting.slotInfos,
serverSetting.fileUploadSettings, serverSetting.fileUploadSettings,
serverSetting.uploadProgress, serverSetting.uploadProgress,
serverSetting.setFileUploadSetting, serverSetting.setFileUploadSetting,

View File

@ -13,19 +13,22 @@ export const MergeLabDialog = () => {
// スロットが変更されたときの初期化処理 // スロットが変更されたときの初期化処理
const newSlotChangeKey = useMemo(() => { const newSlotChangeKey = useMemo(() => {
if (!serverSetting.serverSetting.modelSlots) { if (!serverSetting.serverSetting.slotInfos) {
return "" return ""
} }
return serverSetting.serverSetting.modelSlots.reduce((prev, cur) => { return serverSetting.serverSetting.slotInfos.reduce((prev, cur) => {
return prev + "_" + cur.modelFile return prev + "_" + cur.modelFile
}, "") }, "")
}, [serverSetting.serverSetting.modelSlots]) }, [serverSetting.serverSetting.slotInfos])
const filterItems = useMemo(() => { const filterItems = useMemo(() => {
return serverSetting.serverSetting.modelSlots.reduce((prev, cur) => { return serverSetting.serverSetting.slotInfos.reduce((prev, cur) => {
const key = `${cur.modelType},${cur.samplingRate},${cur.embChannels}` const key = `${cur.modelType},${cur.samplingRate},${cur.embChannels}`
const val = { type: cur.modelType, samplingRate: cur.samplingRate, embChannels: cur.embChannels } const val = { type: cur.modelType, samplingRate: cur.samplingRate, embChannels: cur.embChannels }
const existKeys = Object.keys(prev) const existKeys = Object.keys(prev)
if (cur.voiceChangerType == null) {
return prev
}
if (cur.modelFile.length == 0) { if (cur.modelFile.length == 0) {
return prev return prev
} }
@ -41,7 +44,7 @@ export const MergeLabDialog = () => {
}, [newSlotChangeKey]) }, [newSlotChangeKey])
const models = useMemo(() => { const models = useMemo(() => {
return serverSetting.serverSetting.modelSlots.filter(x => { return serverSetting.serverSetting.slotInfos.filter(x => {
const filterVals = filterItems[currentFilter] const filterVals = filterItems[currentFilter]
if (!filterVals) { if (!filterVals) {
return false return false

View File

@ -9,11 +9,11 @@ export const ModelSwitchRow = (_props: ModelSwitchRowProps) => {
const appState = useAppState() const appState = useAppState()
const modelSwitchRow = useMemo(() => { const modelSwitchRow = useMemo(() => {
const slot = appState.serverSetting.serverSetting.modelSlotIndex const slot = appState.serverSetting.serverSetting.slotIndex
const onSwitchModelClicked = async (slot: number) => { const onSwitchModelClicked = async (slot: number) => {
// Quick hack for same slot is selected. 下桁が実際のSlotID // Quick hack for same slot is selected. 下桁が実際のSlotID
const dummyModelSlotIndex = (Math.floor(Date.now() / 1000)) * 1000 + slot const dummyModelSlotIndex = (Math.floor(Date.now() / 1000)) * 1000 + slot
await appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, modelSlotIndex: dummyModelSlotIndex }) await appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, slotIndex: dummyModelSlotIndex })
setTimeout(() => { // quick hack setTimeout(() => { // quick hack
appState.getInfo() appState.getInfo()
}, 1000 * 2) }, 1000 * 2)
@ -24,7 +24,7 @@ export const ModelSwitchRow = (_props: ModelSwitchRowProps) => {
} }
const modelSlots = appState.serverSetting.serverSetting.modelSlots const modelSlots = appState.serverSetting.serverSetting.slotInfos
let options: React.JSX.Element[] = [] let options: React.JSX.Element[] = []
if (modelSlots) { if (modelSlots) {
options = modelSlots.map((x, index) => { options = modelSlots.map((x, index) => {

View File

@ -18,14 +18,17 @@ export const ModelSlotArea = (_props: ModelSlotAreaProps) => {
const modelTiles = useMemo(() => { const modelTiles = useMemo(() => {
if (!serverSetting.serverSetting.modelSlots) { if (!serverSetting.serverSetting.slotInfos) {
return [] return []
} }
return serverSetting.serverSetting.modelSlots.map((x, index) => { return serverSetting.serverSetting.slotInfos.map((x, index) => {
if (x.voiceChangerType == null) {
return null
}
if (x.modelFile.length == 0) { if (x.modelFile.length == 0) {
return null return null
} }
const tileContainerClass = index == serverSetting.serverSetting.modelSlotIndex ? "model-slot-tile-container-selected" : "model-slot-tile-container" const tileContainerClass = index == serverSetting.serverSetting.slotIndex ? "model-slot-tile-container-selected" : "model-slot-tile-container"
const name = x.name.length > 8 ? x.name.substring(0, 7) + "..." : x.name const name = x.name.length > 8 ? x.name.substring(0, 7) + "..." : x.name
const iconElem = x.iconFile.length > 0 ? const iconElem = x.iconFile.length > 0 ?
<img className="model-slot-tile-icon" src={x.iconFile} alt={x.name} /> : <img className="model-slot-tile-icon" src={x.iconFile} alt={x.name} /> :
@ -33,7 +36,7 @@ export const ModelSlotArea = (_props: ModelSlotAreaProps) => {
const clickAction = async () => { const clickAction = async () => {
const dummyModelSlotIndex = (Math.floor(Date.now() / 1000)) * 1000 + index const dummyModelSlotIndex = (Math.floor(Date.now() / 1000)) * 1000 + index
await serverSetting.updateServerSettings({ ...serverSetting.serverSetting, modelSlotIndex: dummyModelSlotIndex }) await serverSetting.updateServerSettings({ ...serverSetting.serverSetting, slotIndex: dummyModelSlotIndex })
setTimeout(() => { // quick hack setTimeout(() => { // quick hack
getInfo() getInfo()
}, 1000 * 2) }, 1000 * 2)
@ -50,7 +53,7 @@ export const ModelSlotArea = (_props: ModelSlotAreaProps) => {
</div > </div >
) )
}).filter(x => x != null) }).filter(x => x != null)
}, [serverSetting.serverSetting.modelSlots, serverSetting.serverSetting.modelSlotIndex]) }, [serverSetting.serverSetting.slotInfos, serverSetting.serverSetting.slotIndex])
const modelSlotArea = useMemo(() => { const modelSlotArea = useMemo(() => {

View File

@ -21,11 +21,12 @@ export const CharacterArea = (_props: CharacterAreaProps) => {
}, []) }, [])
const selected = useMemo(() => { const selected = useMemo(() => {
if (serverSetting.serverSetting.modelSlotIndex == undefined) { console.log("serverSetting.serverSetting.slotInfos::", serverSetting.serverSetting.slotIndex, serverSetting.serverSetting.slotInfos)
if (serverSetting.serverSetting.slotIndex == undefined) {
return return
} }
return serverSetting.serverSetting.modelSlots[serverSetting.serverSetting.modelSlotIndex] return serverSetting.serverSetting.slotInfos[serverSetting.serverSetting.slotIndex]
}, [serverSetting.serverSetting.modelSlotIndex, serverSetting.serverSetting.modelSlots]) }, [serverSetting.serverSetting.slotIndex, serverSetting.serverSetting.slotInfos])
useEffect(() => { useEffect(() => {

View File

@ -27,7 +27,6 @@ export const useMessageBuilder = (): MessageBuilderStateAndMethod => {
lang = "en" lang = "en"
} }
console.log(messagesRef.current)
return messagesRef.current.find(x => { return x.file == file && x.id == id })?.message[lang] || "unknwon message" return messagesRef.current.find(x => { return x.file == file && x.id == id })?.message[lang] || "unknwon message"
} }
return { return {

View File

@ -1,4 +1,4 @@
import { ClientType, MergeModelRequest, OnnxExporterInfo, ServerInfo, ServerSettingKey } from "./const"; import { ClientType, MergeModelRequest, OnnxExporterInfo, ServerInfo, ServerSettingKey, VoiceChangerType } from "./const";
type FileChunk = { type FileChunk = {
@ -166,16 +166,12 @@ export class ServerConfigurator {
}) })
} }
loadModel = async (slot: number, isHalf: boolean, params: string = "{}") => { loadModel = async (slot: number, voiceChangerType: VoiceChangerType, params: string = "{}") => {
if (isHalf == undefined || isHalf == null) {
console.warn("isHalf is invalid value", isHalf)
isHalf = false
}
const url = this.serverUrl + "/load_model" const url = this.serverUrl + "/load_model"
const info = new Promise<ServerInfo>(async (resolve) => { const info = new Promise<ServerInfo>(async (resolve) => {
const formData = new FormData(); const formData = new FormData();
formData.append("slot", "" + slot); formData.append("slot", "" + slot);
formData.append("isHalf", "" + isHalf); formData.append("voiceChangerType", voiceChangerType);
formData.append("params", params); formData.append("params", params);
const request = new Request(url, { const request = new Request(url, {

View File

@ -3,7 +3,7 @@ import { VoiceChangerWorkletNode, VoiceChangerWorkletListener } from "./VoiceCha
import workerjs from "raw-loader!../worklet/dist/index.js"; import workerjs from "raw-loader!../worklet/dist/index.js";
import { VoiceFocusDeviceTransformer, VoiceFocusTransformDevice } from "amazon-chime-sdk-js"; import { VoiceFocusDeviceTransformer, VoiceFocusTransformDevice } from "amazon-chime-sdk-js";
import { createDummyMediaStream, validateUrl } from "./util"; import { createDummyMediaStream, validateUrl } from "./util";
import { ClientType, DefaultVoiceChangerClientSetting, MergeModelRequest, ServerSettingKey, VoiceChangerClientSetting, WorkletNodeSetting, WorkletSetting } from "./const"; import { ClientType, DefaultVoiceChangerClientSetting, MergeModelRequest, ServerSettingKey, VoiceChangerClientSetting, VoiceChangerType, WorkletNodeSetting, WorkletSetting } from "./const";
import { ServerConfigurator } from "./ServerConfigurator"; import { ServerConfigurator } from "./ServerConfigurator";
// オーディオデータの流れ // オーディオデータの流れ
@ -298,10 +298,10 @@ export class VoiceChangerClient {
} }
loadModel = ( loadModel = (
slot: number, slot: number,
isHalf: boolean, voiceChangerType: VoiceChangerType,
params: string, params: string,
) => { ) => {
return this.configurator.loadModel(slot, isHalf, params) return this.configurator.loadModel(slot, voiceChangerType, params)
} }
uploadAssets = (params: string) => { uploadAssets = (params: string) => {
return this.configurator.uploadAssets(params) return this.configurator.uploadAssets(params)

View File

@ -16,6 +16,16 @@ export const ClientType = {
} as const } as const
export type ClientType = typeof ClientType[keyof typeof ClientType] export type ClientType = typeof ClientType[keyof typeof ClientType]
export const VoiceChangerType = {
"MMVCv15": "MMVCv15",
"MMVCv13": "MMVCv13",
"so-vits-svc-40": "so-vits-svc-40",
"DDSP-SVC": "DDSP-SVC",
"RVC": "RVC"
} as const
export type VoiceChangerType = typeof VoiceChangerType[keyof typeof VoiceChangerType]
/////////////////////// ///////////////////////
// サーバセッティング // サーバセッティング
/////////////////////// ///////////////////////
@ -122,7 +132,8 @@ export const ServerSettingKey = {
"rvcQuality": "rvcQuality", "rvcQuality": "rvcQuality",
"modelSamplingRate": "modelSamplingRate", "modelSamplingRate": "modelSamplingRate",
"silenceFront": "silenceFront", "silenceFront": "silenceFront",
"modelSlotIndex": "modelSlotIndex", // "modelSlotIndex": "modelSlotIndex",
"slotIndex": "slotIndex",
"useEnhancer": "useEnhancer", "useEnhancer": "useEnhancer",
"useDiff": "useDiff", "useDiff": "useDiff",
@ -181,7 +192,8 @@ export type VoiceChangerServerSetting = {
rvcQuality: number // 0:low, 1:high rvcQuality: number // 0:low, 1:high
silenceFront: number // 0:off, 1:on silenceFront: number // 0:off, 1:on
modelSamplingRate: ModelSamplingRate // 32000,40000,48000 modelSamplingRate: ModelSamplingRate // 32000,40000,48000
modelSlotIndex: number, // modelSlotIndex: number,
slotIndex: number,
useEnhancer: number// DDSP-SVC useEnhancer: number// DDSP-SVC
useDiff: number// DDSP-SVC useDiff: number// DDSP-SVC
@ -197,7 +209,31 @@ export type VoiceChangerServerSetting = {
enableDirectML: number enableDirectML: number
} }
type ModelSlot = { // type ModelSlot = {
// modelFile: string
// featureFile: string,
// indexFile: string,
// defaultTune: number,
// defaultIndexRatio: number,
// defaultProtect: number,
// modelType: ModelType,
// embChannels: number,
// f0: boolean,
// samplingRate: number
// deprecated: boolean
// name: string,
// description: string,
// credit: string,
// termsOfUseUrl: string,
// iconFile: string
// }
type SlotInfo = {
voiceChangerType: VoiceChangerType | null
modelFile: string modelFile: string
featureFile: string, featureFile: string,
indexFile: string, indexFile: string,
@ -233,7 +269,8 @@ export type ServerInfo = VoiceChangerServerSetting & {
pyTorchModelFile: string, pyTorchModelFile: string,
onnxModelFile: string, onnxModelFile: string,
onnxExecutionProviders: OnnxExecutionProvider[] onnxExecutionProviders: OnnxExecutionProvider[]
modelSlots: ModelSlot[] // modelSlots: ModelSlot[]
slotInfos: SlotInfo[]
serverAudioInputDevices: ServerAudioDevice[] serverAudioInputDevices: ServerAudioDevice[]
serverAudioOutputDevices: ServerAudioDevice[] serverAudioOutputDevices: ServerAudioDevice[]
sampleModels: RVCSampleModel[] sampleModels: RVCSampleModel[]
@ -311,7 +348,8 @@ export const DefaultServerSetting: ServerInfo = {
rvcQuality: 0, rvcQuality: 0,
modelSamplingRate: 48000, modelSamplingRate: 48000,
silenceFront: 1, silenceFront: 1,
modelSlotIndex: 0, // modelSlotIndex: 0,
slotIndex: 0,
sampleModels: [], sampleModels: [],
gpus: [], gpus: [],
@ -331,7 +369,8 @@ export const DefaultServerSetting: ServerInfo = {
pyTorchModelFile: "", pyTorchModelFile: "",
onnxModelFile: "", onnxModelFile: "",
onnxExecutionProviders: [], onnxExecutionProviders: [],
modelSlots: [], // modelSlots: [],
slotInfos: [],
serverAudioInputDevices: [], serverAudioInputDevices: [],
serverAudioOutputDevices: [] serverAudioOutputDevices: []
} }

View File

@ -1,5 +1,5 @@
import { useState, useMemo, useEffect } from "react" import { useState, useMemo, useEffect } from "react"
import { VoiceChangerServerSetting, ServerInfo, ServerSettingKey, INDEXEDDB_KEY_SERVER, INDEXEDDB_KEY_MODEL_DATA, ClientType, DefaultServerSetting_MMVCv13, DefaultServerSetting_MMVCv15, DefaultServerSetting_so_vits_svc_40v2, DefaultServerSetting_so_vits_svc_40, DefaultServerSetting_so_vits_svc_40_c, DefaultServerSetting_RVC, OnnxExporterInfo, DefaultServerSetting_DDSP_SVC, MAX_MODEL_SLOT_NUM, Framework, MergeModelRequest } from "../const" import { VoiceChangerServerSetting, ServerInfo, ServerSettingKey, INDEXEDDB_KEY_SERVER, INDEXEDDB_KEY_MODEL_DATA, ClientType, DefaultServerSetting_MMVCv13, DefaultServerSetting_MMVCv15, DefaultServerSetting_so_vits_svc_40v2, DefaultServerSetting_so_vits_svc_40, DefaultServerSetting_so_vits_svc_40_c, DefaultServerSetting_RVC, OnnxExporterInfo, DefaultServerSetting_DDSP_SVC, MAX_MODEL_SLOT_NUM, MergeModelRequest, VoiceChangerType } from "../const"
import { VoiceChangerClient } from "../VoiceChangerClient" import { VoiceChangerClient } from "../VoiceChangerClient"
import { useIndexedDB } from "./useIndexedDB" import { useIndexedDB } from "./useIndexedDB"
import { ModelLoadException } from "../exceptions" import { ModelLoadException } from "../exceptions"
@ -18,12 +18,7 @@ export type ModelAssetName = typeof ModelAssetName[keyof typeof ModelAssetName]
export type FileUploadSetting = { export type FileUploadSetting = {
isHalf: boolean
uploaded: boolean uploaded: boolean
defaultTune: number
defaultIndexRatio: number
defaultProtect: number
framework: Framework
params: string params: string
mmvcv13Config: ModelData | null mmvcv13Config: ModelData | null
@ -52,12 +47,7 @@ export type FileUploadSetting = {
} }
export const InitialFileUploadSetting: FileUploadSetting = { export const InitialFileUploadSetting: FileUploadSetting = {
isHalf: true,
uploaded: false, uploaded: false,
defaultTune: 0,
defaultIndexRatio: 1,
defaultProtect: 0.5,
framework: Framework.PyTorch,
params: "{}", params: "{}",
mmvcv13Config: null, mmvcv13Config: null,
@ -400,43 +390,39 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
} }
// const configFileName = fileUploadSetting.configFile?.filename || "-" // const configFileName = fileUploadSetting.configFile?.filename || "-"
const files: { kind: string, name: string }[] = []
if (fileUploadSetting.mmvcv13Config?.filename) files.push({ kind: "mmvcv13Config", name: fileUploadSetting.mmvcv13Config.filename })
if (fileUploadSetting.mmvcv13Model?.filename) files.push({ kind: "mmvcv13Model", name: fileUploadSetting.mmvcv13Model.filename })
if (fileUploadSetting.mmvcv15Config?.filename) files.push({ kind: "mmvcv15Config", name: fileUploadSetting.mmvcv15Config.filename })
if (fileUploadSetting.mmvcv15Model?.filename) files.push({ kind: "mmvcv15Model", name: fileUploadSetting.mmvcv15Model.filename })
if (fileUploadSetting.soVitsSvc40Config?.filename) files.push({ kind: "soVitsSvc40Config", name: fileUploadSetting.soVitsSvc40Config.filename })
if (fileUploadSetting.soVitsSvc40Model?.filename) files.push({ kind: "soVitsSvc40Model", name: fileUploadSetting.soVitsSvc40Model.filename })
if (fileUploadSetting.soVitsSvc40Cluster?.filename) files.push({ kind: "soVitsSvc40Cluster", name: fileUploadSetting.soVitsSvc40Cluster.filename })
if (fileUploadSetting.rvcModel?.filename) files.push({ kind: "rvcModel", name: fileUploadSetting.rvcModel.filename })
if (fileUploadSetting.rvcIndex?.filename) files.push({ kind: "rvcIndex", name: fileUploadSetting.rvcIndex.filename })
if (fileUploadSetting.ddspSvcModel?.filename) files.push({ kind: "ddspSvcModel", name: fileUploadSetting.ddspSvcModel.filename })
if (fileUploadSetting.ddspSvcModelConfig?.filename) files.push({ kind: "ddspSvcModelConfig", name: fileUploadSetting.ddspSvcModelConfig.filename })
if (fileUploadSetting.ddspSvcDiffusion?.filename) files.push({ kind: "ddspSvcDiffusion", name: fileUploadSetting.ddspSvcDiffusion.filename })
if (fileUploadSetting.ddspSvcDiffusionConfig?.filename) files.push({ kind: "ddspSvcDiffusionConfig", name: fileUploadSetting.ddspSvcDiffusionConfig.filename })
const params = JSON.stringify({ const params = JSON.stringify({
defaultTune: fileUploadSetting.defaultTune || 0,
defaultIndexRatio: fileUploadSetting.defaultIndexRatio || 1,
defaultProtect: fileUploadSetting.defaultProtect || 0.5,
sampleId: fileUploadSetting.isSampleMode ? fileUploadSetting.sampleId || "" : "", sampleId: fileUploadSetting.isSampleMode ? fileUploadSetting.sampleId || "" : "",
rvcIndexDownload: fileUploadSetting.rvcIndexDownload || false, rvcIndexDownload: fileUploadSetting.rvcIndexDownload || false,
files: fileUploadSetting.isSampleMode ? {} : { files: fileUploadSetting.isSampleMode ? [] : files
mmvcv13Config: fileUploadSetting.mmvcv13Config?.filename || "",
mmvcv13Model: fileUploadSetting.mmvcv13Model?.filename || "",
mmvcv15Config: fileUploadSetting.mmvcv15Config?.filename || "",
mmvcv15Model: fileUploadSetting.mmvcv15Model?.filename || "",
soVitsSvc40Config: fileUploadSetting.soVitsSvc40Config?.filename || "",
soVitsSvc40Model: fileUploadSetting.soVitsSvc40Model?.filename || "",
soVitsSvc40Cluster: fileUploadSetting.soVitsSvc40Cluster?.filename || "",
soVitsSvc40v2Config: fileUploadSetting.soVitsSvc40v2Config?.filename || "",
soVitsSvc40v2Model: fileUploadSetting.soVitsSvc40v2Model?.filename || "",
soVitsSvc40v2Cluster: fileUploadSetting.soVitsSvc40v2Cluster?.filename || "",
rvcModel: fileUploadSetting.rvcModel?.filename || "",
rvcIndex: fileUploadSetting.rvcIndex?.filename || "",
rvcFeature: fileUploadSetting.rvcFeature?.filename || "",
ddspSvcModel: fileUploadSetting.ddspSvcModel?.filename ? "ddsp_mod/" + fileUploadSetting.ddspSvcModel?.filename : "",
ddspSvcModelConfig: fileUploadSetting.ddspSvcModelConfig?.filename ? "ddsp_mod/" + fileUploadSetting.ddspSvcModelConfig?.filename : "",
ddspSvcDiffusion: fileUploadSetting.ddspSvcDiffusion?.filename ? "ddsp_diff/" + fileUploadSetting.ddspSvcDiffusion?.filename : "",
ddspSvcDiffusionConfig: fileUploadSetting.ddspSvcDiffusionConfig?.filename ? "ddsp_diff/" + fileUploadSetting.ddspSvcDiffusionConfig.filename : "",
}
}) })
if (fileUploadSetting.isHalf == undefined) {
fileUploadSetting.isHalf = false
}
console.log("PARAMS:", params) console.log("PARAMS:", params)
const voiceChangerType = VoiceChangerType.RVC
const loadPromise = props.voiceChangerClient.loadModel( const loadPromise = props.voiceChangerClient.loadModel(
slot, slot,
fileUploadSetting.isHalf, voiceChangerType,
params, params,
) )
@ -460,12 +446,7 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
const storeToCache = (slot: number, fileUploadSetting: FileUploadSetting) => { const storeToCache = (slot: number, fileUploadSetting: FileUploadSetting) => {
try { try {
const saveData: FileUploadSetting = { const saveData: FileUploadSetting = {
isHalf: fileUploadSetting.isHalf, // キャッシュとしては不使用。guiで上書きされる。
uploaded: false, // キャッシュから読み込まれるときには、まだuploadされていないから。 uploaded: false, // キャッシュから読み込まれるときには、まだuploadされていないから。
defaultTune: fileUploadSetting.defaultTune,
defaultIndexRatio: fileUploadSetting.defaultIndexRatio,
defaultProtect: fileUploadSetting.defaultProtect,
framework: fileUploadSetting.framework,
params: fileUploadSetting.params, params: fileUploadSetting.params,
mmvcv13Config: fileUploadSetting.mmvcv13Config ? { data: fileUploadSetting.mmvcv13Config.data, filename: fileUploadSetting.mmvcv13Config.filename } : null, mmvcv13Config: fileUploadSetting.mmvcv13Config ? { data: fileUploadSetting.mmvcv13Config.data, filename: fileUploadSetting.mmvcv13Config.filename } : null,

View File

@ -8,9 +8,10 @@
"editor.defaultFormatter": null, // Prettier 使 "editor.defaultFormatter": null, // Prettier 使
"editor.formatOnSave": true // "editor.formatOnSave": true //
}, },
"python.formatting.blackArgs": ["--line-length", "550"],
"flake8.args": [ "flake8.args": [
"--ignore=E501,E402,E722,E741,E203,W503" "--ignore=E501,E402,E722,E741,E203,W503"
// "--max-line-length=150", // "--max-line-length=150"
// "--max-complexity=20" // "--max-complexity=20"
] ]
} }

View File

@ -3,9 +3,7 @@ class NoModeLoadedException(Exception):
self.framework = framework self.framework = framework
def __str__(self): def __str__(self):
return repr( return repr(f"No model for {self.framework} loaded. Please confirm the model uploaded.")
f"No model for {self.framework} loaded. Please confirm the model uploaded."
)
class HalfPrecisionChangingException(Exception): class HalfPrecisionChangingException(Exception):
@ -36,3 +34,17 @@ class DeviceCannotSupportHalfPrecisionException(Exception):
class VoiceChangerIsNotSelectedException(Exception): class VoiceChangerIsNotSelectedException(Exception):
def __str__(self): def __str__(self):
return repr("Voice Changer is not selected.") return repr("Voice Changer is not selected.")
class SlotConfigNotFoundException(Exception):
def __init__(self, modelDir, slotIndex):
self.modelDir = modelDir
self.slotIndex = slotIndex
def __str__(self):
return repr(f"Config for slot {self.slotIndex} is not found. (modelDir:{self.modelDir})")
class WeightDownladException(Exception):
def __str__(self):
return repr("Failed to download weight.")

View File

@ -1,4 +1,3 @@
from concurrent.futures import ThreadPoolExecutor
import sys import sys
from distutils.util import strtobool from distutils.util import strtobool
@ -7,24 +6,24 @@ import socket
import platform import platform
import os import os
import argparse import argparse
from Downloader import download, download_no_tqdm
from voice_changer.RVC.SampleDownloader import ( from Exceptions import WeightDownladException
checkRvcModelExist, from utils.downloader.SampleDownloader import downloadInitialSamples
downloadInitialSampleModels, from utils.downloader.WeightDownloader import downloadWeight
)
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
import uvicorn import uvicorn
from mods.ssl import create_self_signed_cert from mods.ssl import create_self_signed_cert
from voice_changer.VoiceChangerManager import VoiceChangerManager from voice_changer.VoiceChangerManager import VoiceChangerManager
from sio.MMVC_SocketIOApp import MMVC_SocketIOApp from sio.MMVC_SocketIOApp import MMVC_SocketIOApp
from restapi.MMVC_Rest import MMVC_Rest from restapi.MMVC_Rest import MMVC_Rest
from const import ( from const import (
NATIVE_CLIENT_FILE_MAC, NATIVE_CLIENT_FILE_MAC,
NATIVE_CLIENT_FILE_WIN, NATIVE_CLIENT_FILE_WIN,
SSL_KEY_DIR, SSL_KEY_DIR,
getRVCSampleJsonAndModelIds,
) )
import subprocess import subprocess
import multiprocessing as mp import multiprocessing as mp
@ -35,56 +34,23 @@ setup_loggers()
def setupArgParser(): def setupArgParser():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument( parser.add_argument("--logLevel", type=str, default="critical", help="Log level info|critical. (default: critical)")
"--logLevel",
type=str,
default="critical",
help="Log level info|critical. (default: critical)",
)
parser.add_argument("-p", type=int, default=18888, help="port") parser.add_argument("-p", type=int, default=18888, help="port")
parser.add_argument("--https", type=strtobool, default=False, help="use https") parser.add_argument("--https", type=strtobool, default=False, help="use https")
parser.add_argument( parser.add_argument("--httpsKey", type=str, default="ssl.key", help="path for the key of https")
"--httpsKey", type=str, default="ssl.key", help="path for the key of https" parser.add_argument("--httpsCert", type=str, default="ssl.cert", help="path for the cert of https")
) parser.add_argument("--httpsSelfSigned", type=strtobool, default=True, help="generate self-signed certificate")
parser.add_argument(
"--httpsCert", type=str, default="ssl.cert", help="path for the cert of https"
)
parser.add_argument(
"--httpsSelfSigned",
type=strtobool,
default=True,
help="generate self-signed certificate",
)
parser.add_argument("--model_dir", type=str, help="path to model files") parser.add_argument("--model_dir", type=str, help="path to model files")
parser.add_argument( parser.add_argument("--sample_mode", type=str, default="production", help="sample_mode")
"--rvc_sample_mode", type=str, default="production", help="rvc_sample_mode"
)
parser.add_argument( parser.add_argument("--content_vec_500", type=str, help="path to content_vec_500 model(pytorch)")
"--content_vec_500", type=str, help="path to content_vec_500 model(pytorch)" parser.add_argument("--content_vec_500_onnx", type=str, help="path to content_vec_500 model(onnx)")
) parser.add_argument("--content_vec_500_onnx_on", type=strtobool, default=False, help="use or not onnx for content_vec_500")
parser.add_argument( parser.add_argument("--hubert_base", type=str, help="path to hubert_base model(pytorch)")
"--content_vec_500_onnx", type=str, help="path to content_vec_500 model(onnx)" parser.add_argument("--hubert_base_jp", type=str, help="path to hubert_base_jp model(pytorch)")
) parser.add_argument("--hubert_soft", type=str, help="path to hubert_soft model(pytorch)")
parser.add_argument( parser.add_argument("--nsf_hifigan", type=str, help="path to nsf_hifigan model(pytorch)")
"--content_vec_500_onnx_on",
type=strtobool,
default=False,
help="use or not onnx for content_vec_500",
)
parser.add_argument(
"--hubert_base", type=str, help="path to hubert_base model(pytorch)"
)
parser.add_argument(
"--hubert_base_jp", type=str, help="path to hubert_base_jp model(pytorch)"
)
parser.add_argument(
"--hubert_soft", type=str, help="path to hubert_soft model(pytorch)"
)
parser.add_argument(
"--nsf_hifigan", type=str, help="path to nsf_hifigan model(pytorch)"
)
return parser return parser
@ -111,76 +77,19 @@ def printMessage(message, level=0):
print(f"\033[47m {message}\033[0m") print(f"\033[47m {message}\033[0m")
def downloadWeight():
# content_vec_500 = (args.content_vec_500,)
# content_vec_500_onnx = (args.content_vec_500_onnx,)
# content_vec_500_onnx_on = (args.content_vec_500_onnx_on,)
hubert_base = args.hubert_base
hubert_base_jp = args.hubert_base_jp
hubert_soft = args.hubert_soft
nsf_hifigan = args.nsf_hifigan
# file exists check (currently only for rvc)
downloadParams = []
if os.path.exists(hubert_base) is False:
downloadParams.append(
{
"url": "https://huggingface.co/ddPn08/rvc-webui-models/resolve/main/embeddings/hubert_base.pt",
"saveTo": hubert_base,
"position": 0,
}
)
if os.path.exists(hubert_base_jp) is False:
downloadParams.append(
{
"url": "https://huggingface.co/rinna/japanese-hubert-base/resolve/main/fairseq/model.pt",
"saveTo": hubert_base_jp,
"position": 1,
}
)
if os.path.exists(hubert_soft) is False:
downloadParams.append(
{
"url": "https://huggingface.co/wok000/weights/resolve/main/ddsp-svc30/embedder/hubert-soft-0d54a1f4.pt",
"saveTo": hubert_soft,
"position": 2,
}
)
if os.path.exists(nsf_hifigan) is False:
downloadParams.append(
{
"url": "https://huggingface.co/wok000/weights/resolve/main/ddsp-svc30/nsf_hifigan_20221211/model.bin",
"saveTo": nsf_hifigan,
"position": 3,
}
)
nsf_hifigan_config = os.path.join(os.path.dirname(nsf_hifigan), "config.json")
if os.path.exists(nsf_hifigan_config) is False:
downloadParams.append(
{
"url": "https://huggingface.co/wok000/weights/raw/main/ddsp-svc30/nsf_hifigan_20221211/config.json",
"saveTo": nsf_hifigan_config,
"position": 4,
}
)
with ThreadPoolExecutor() as pool:
pool.map(download, downloadParams)
if (
os.path.exists(hubert_base) is False
or os.path.exists(hubert_base_jp) is False
or os.path.exists(hubert_soft) is False
or os.path.exists(nsf_hifigan) is False
or os.path.exists(nsf_hifigan_config) is False
):
printMessage("RVC用のモデルファイルのダウンロードに失敗しました。", level=2)
printMessage("failed to download weight for rvc", level=2)
parser = setupArgParser() parser = setupArgParser()
args, unknown = parser.parse_known_args() args, unknown = parser.parse_known_args()
voiceChangerParams = VoiceChangerParams(
model_dir=args.model_dir,
content_vec_500=args.content_vec_500,
content_vec_500_onnx=args.content_vec_500_onnx,
content_vec_500_onnx_on=args.content_vec_500_onnx_on,
hubert_base=args.hubert_base,
hubert_base_jp=args.hubert_base_jp,
hubert_soft=args.hubert_soft,
nsf_hifigan=args.nsf_hifigan,
sample_mode=args.sample_mode,
)
printMessage(f"Booting PHASE :{__name__}", level=2) printMessage(f"Booting PHASE :{__name__}", level=2)
@ -199,24 +108,6 @@ def localServer(logLevel: str = "critical"):
if __name__ == "MMVCServerSIO": if __name__ == "MMVCServerSIO":
mp.freeze_support() mp.freeze_support()
voiceChangerParams = VoiceChangerParams(
model_dir=args.model_dir,
content_vec_500=args.content_vec_500,
content_vec_500_onnx=args.content_vec_500_onnx,
content_vec_500_onnx_on=args.content_vec_500_onnx_on,
hubert_base=args.hubert_base,
hubert_base_jp=args.hubert_base_jp,
hubert_soft=args.hubert_soft,
nsf_hifigan=args.nsf_hifigan,
rvc_sample_mode=args.rvc_sample_mode,
)
if (
os.path.exists(voiceChangerParams.hubert_base) is False
or os.path.exists(voiceChangerParams.hubert_base_jp) is False
):
printMessage("RVC用のモデルファイルのダウンロードに失敗しました。", level=2)
printMessage("failed to download weight for rvc", level=2)
voiceChangerManager = VoiceChangerManager.get_instance(voiceChangerParams) voiceChangerManager = VoiceChangerManager.get_instance(voiceChangerParams)
app_fastapi = MMVC_Rest.get_instance(voiceChangerManager, voiceChangerParams) app_fastapi = MMVC_Rest.get_instance(voiceChangerManager, voiceChangerParams)
@ -230,20 +121,16 @@ if __name__ == "__main__":
mp.freeze_support() mp.freeze_support()
printMessage("Voice Changerを起動しています。", level=2) printMessage("Voice Changerを起動しています。", level=2)
# ダウンロード(Weight)
# ダウンロード
downloadWeight()
os.makedirs(args.model_dir, exist_ok=True)
try: try:
sampleJsons = [] downloadWeight(voiceChangerParams)
sampleJsonUrls, sampleModels = getRVCSampleJsonAndModelIds(args.rvc_sample_mode) except WeightDownladException:
for url in sampleJsonUrls: printMessage("RVC用のモデルファイルのダウンロードに失敗しました。", level=2)
filename = os.path.basename(url) printMessage("failed to download weight for rvc", level=2)
download_no_tqdm({"url": url, "saveTo": filename, "position": 0})
sampleJsons.append(filename) # ダウンロード(Sample)
if checkRvcModelExist(args.model_dir) is False: try:
downloadInitialSampleModels(sampleJsons, sampleModels, args.model_dir) downloadInitialSamples(args.sample_mode, args.model_dir)
except Exception as e: except Exception as e:
print("[Voice Changer] loading sample failed", e) print("[Voice Changer] loading sample failed", e)
@ -280,9 +167,7 @@ if __name__ == "__main__":
) )
key_path = os.path.join(SSL_KEY_DIR, keyname) key_path = os.path.join(SSL_KEY_DIR, keyname)
cert_path = os.path.join(SSL_KEY_DIR, certname) cert_path = os.path.join(SSL_KEY_DIR, certname)
printMessage( printMessage(f"protocol: HTTPS(self-signed), key:{key_path}, cert:{cert_path}", level=1)
f"protocol: HTTPS(self-signed), key:{key_path}, cert:{cert_path}", level=1
)
elif args.https and args.httpsSelfSigned == 0: elif args.https and args.httpsSelfSigned == 0:
# HTTPS # HTTPS
@ -336,16 +221,12 @@ if __name__ == "__main__":
p.start() p.start()
try: try:
if sys.platform.startswith("win"): if sys.platform.startswith("win"):
process = subprocess.Popen( process = subprocess.Popen([NATIVE_CLIENT_FILE_WIN, "-u", f"http://localhost:{PORT}/"])
[NATIVE_CLIENT_FILE_WIN, "-u", f"http://localhost:{PORT}/"]
)
return_code = process.wait() return_code = process.wait()
print("client closed.") print("client closed.")
p.terminate() p.terminate()
elif sys.platform.startswith("darwin"): elif sys.platform.startswith("darwin"):
process = subprocess.Popen( process = subprocess.Popen([NATIVE_CLIENT_FILE_MAC, "-u", f"http://localhost:{PORT}/"])
[NATIVE_CLIENT_FILE_MAC, "-u", f"http://localhost:{PORT}/"]
)
return_code = process.wait() return_code = process.wait()
print("client closed.") print("client closed.")
p.terminate() p.terminate()

View File

@ -1,44 +0,0 @@
from dataclasses import dataclass, field
import json
from const import ModelType
@dataclass
class RVCModelSample:
id: str = ""
lang: str = ""
tag: list[str] = field(default_factory=lambda: [])
name: str = ""
modelUrl: str = ""
indexUrl: str = ""
termsOfUseUrl: str = ""
icon: str = ""
credit: str = ""
description: str = ""
sampleRate: int = 48000
modelType: str = ""
f0: bool = True
def getModelSamples(jsonFiles: list[str], modelType: ModelType):
try:
samples: list[RVCModelSample] = []
for file in jsonFiles:
with open(file, "r", encoding="utf-8") as f:
jsonDict = json.load(f)
modelList = jsonDict[modelType]
if modelType == "RVC":
for s in modelList:
modelSample = RVCModelSample(**s)
samples.append(modelSample)
else:
raise RuntimeError(f"Unknown model type {modelType}")
return samples
except Exception as e:
print("[Voice Changer] loading sample info error:", e)
return None

View File

@ -5,33 +5,21 @@ import tempfile
from typing import Literal, TypeAlias from typing import Literal, TypeAlias
ModelType: TypeAlias = Literal[ VoiceChangerType: TypeAlias = Literal[
"MMVCv15",
"MMVCv13", "MMVCv13",
"so-vits-svc-40v2", "MMVCv15",
"so-vits-svc-40", "so-vits-svc-40",
"so-vits-svc-40_c",
"DDSP-SVC", "DDSP-SVC",
"RVC", "RVC",
] ]
ERROR_NO_ONNX_SESSION = "ERROR_NO_ONNX_SESSION" ERROR_NO_ONNX_SESSION = "ERROR_NO_ONNX_SESSION"
tmpdir = tempfile.TemporaryDirectory() tmpdir = tempfile.TemporaryDirectory()
# print("generate tmpdir:::",tmpdir)
SSL_KEY_DIR = os.path.join(tmpdir.name, "keys") if hasattr(sys, "_MEIPASS") else "keys" SSL_KEY_DIR = os.path.join(tmpdir.name, "keys") if hasattr(sys, "_MEIPASS") else "keys"
MODEL_DIR = os.path.join(tmpdir.name, "logs") if hasattr(sys, "_MEIPASS") else "logs" MODEL_DIR = os.path.join(tmpdir.name, "logs") if hasattr(sys, "_MEIPASS") else "logs"
UPLOAD_DIR = ( UPLOAD_DIR = os.path.join(tmpdir.name, "upload_dir") if hasattr(sys, "_MEIPASS") else "upload_dir"
os.path.join(tmpdir.name, "upload_dir") NATIVE_CLIENT_FILE_WIN = os.path.join(sys._MEIPASS, "voice-changer-native-client.exe") if hasattr(sys, "_MEIPASS") else "voice-changer-native-client" # type: ignore
if hasattr(sys, "_MEIPASS")
else "upload_dir"
)
NATIVE_CLIENT_FILE_WIN = (
os.path.join(sys._MEIPASS, "voice-changer-native-client.exe") # type: ignore
if hasattr(sys, "_MEIPASS")
else "voice-changer-native-client"
)
NATIVE_CLIENT_FILE_MAC = ( NATIVE_CLIENT_FILE_MAC = (
os.path.join( os.path.join(
sys._MEIPASS, # type: ignore sys._MEIPASS, # type: ignore
@ -44,25 +32,12 @@ NATIVE_CLIENT_FILE_MAC = (
else "voice-changer-native-client" else "voice-changer-native-client"
) )
HUBERT_ONNX_MODEL_PATH = ( TMP_DIR = os.path.join(tmpdir.name, "tmp_dir") if hasattr(sys, "_MEIPASS") else "tmp_dir"
os.path.join(sys._MEIPASS, "model_hubert/hubert_simple.onnx") # type: ignore
if hasattr(sys, "_MEIPASS")
else "model_hubert/hubert_simple.onnx"
)
TMP_DIR = (
os.path.join(tmpdir.name, "tmp_dir") if hasattr(sys, "_MEIPASS") else "tmp_dir"
)
os.makedirs(TMP_DIR, exist_ok=True) os.makedirs(TMP_DIR, exist_ok=True)
def getFrontendPath(): def getFrontendPath():
frontend_path = ( frontend_path = os.path.join(sys._MEIPASS, "dist") if hasattr(sys, "_MEIPASS") else "../client/demo/dist"
os.path.join(sys._MEIPASS, "dist")
if hasattr(sys, "_MEIPASS")
else "../client/demo/dist"
)
return frontend_path return frontend_path
@ -100,84 +75,84 @@ class ServerAudioDeviceTypes(Enum):
audiooutput = "audiooutput" audiooutput = "audiooutput"
class RVCSampleMode(Enum): RVCSampleMode: TypeAlias = Literal[
production = "production" "production",
testOfficial = "testOfficial" "testOfficial",
testDDPNTorch = "testDDPNTorch" "testDDPNTorch",
testDDPNONNX = "testDDPNONNX" "testDDPNONNX",
testONNXFull = "testONNXFull" "testONNXFull",
]
def getRVCSampleJsonAndModelIds(mode: RVCSampleMode): def getSampleJsonAndModelIds(mode: RVCSampleMode):
if mode == RVCSampleMode.production.value: if mode == "production":
return [ return [
# "https://huggingface.co/wok000/vcclient_model/raw/main/samples_0001.json", # "https://huggingface.co/wok000/vcclient_model/raw/main/samples_0001.json",
# "https://huggingface.co/wok000/vcclient_model/raw/main/samples_0002.json", # "https://huggingface.co/wok000/vcclient_model/raw/main/samples_0002.json",
"https://huggingface.co/wok000/vcclient_model/raw/main/samples_0003_t.json", "https://huggingface.co/wok000/vcclient_model/raw/main/samples_0003_t2.json",
"https://huggingface.co/wok000/vcclient_model/raw/main/samples_0003_o.json", "https://huggingface.co/wok000/vcclient_model/raw/main/samples_0003_o2.json",
], [ ], [
("TokinaShigure_o", True), ("TokinaShigure_o", {"useIndex": True}),
("KikotoMahiro_o", False), ("KikotoMahiro_o", {"useIndex": False}),
("Amitaro_o", False), ("Amitaro_o", {"useIndex": False}),
("Tsukuyomi-chan_o", False), ("Tsukuyomi-chan_o", {"useIndex": False}),
] ]
elif mode == RVCSampleMode.testOfficial.value: elif mode == "testOfficial":
return [ return [
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_official_v1_v2.json", "https://huggingface.co/wok000/vcclient_model/raw/main/test/test_official_v1_v2.json",
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_ddpn_v1_v2.json", "https://huggingface.co/wok000/vcclient_model/raw/main/test/test_ddpn_v1_v2.json",
], [ ], [
("test-official-v1-f0-48k-l9-hubert_t", True), ("test-official-v1-f0-48k-l9-hubert_t", {"useIndex": True}),
("test-official-v1-nof0-48k-l9-hubert_t", False), ("test-official-v1-nof0-48k-l9-hubert_t", {"useIndex": False}),
("test-official-v2-f0-40k-l12-hubert_t", False), ("test-official-v2-f0-40k-l12-hubert_t", {"useIndex": False}),
("test-official-v2-nof0-40k-l12-hubert_t", False), ("test-official-v2-nof0-40k-l12-hubert_t", {"useIndex": False}),
("test-official-v1-f0-48k-l9-hubert_o", True), ("test-official-v1-f0-48k-l9-hubert_o", {"useIndex": True}),
("test-official-v1-nof0-48k-l9-hubert_o", False), ("test-official-v1-nof0-48k-l9-hubert_o", {"useIndex": False}),
("test-official-v2-f0-40k-l12-hubert_o", False), ("test-official-v2-f0-40k-l12-hubert_o", {"useIndex": False}),
("test-official-v2-nof0-40k-l12-hubert_o", False), ("test-official-v2-nof0-40k-l12-hubert_o", {"useIndex": False}),
] ]
elif mode == RVCSampleMode.testDDPNTorch.value: elif mode == "testDDPNTorch":
return [ return [
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_official_v1_v2.json", "https://huggingface.co/wok000/vcclient_model/raw/main/test/test_official_v1_v2.json",
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_ddpn_v1_v2.json", "https://huggingface.co/wok000/vcclient_model/raw/main/test/test_ddpn_v1_v2.json",
], [ ], [
("test-ddpn-v1-f0-48k-l9-hubert_t", False), ("test-ddpn-v1-f0-48k-l9-hubert_t", {"useIndex": False}),
("test-ddpn-v1-nof0-48k-l9-hubert_t", False), ("test-ddpn-v1-nof0-48k-l9-hubert_t", {"useIndex": False}),
("test-ddpn-v2-f0-40k-l12-hubert_t", False), ("test-ddpn-v2-f0-40k-l12-hubert_t", {"useIndex": False}),
("test-ddpn-v2-nof0-40k-l12-hubert_t", False), ("test-ddpn-v2-nof0-40k-l12-hubert_t", {"useIndex": False}),
("test-ddpn-v2-f0-40k-l12-hubert_jp_t", False), ("test-ddpn-v2-f0-40k-l12-hubert_jp_t", {"useIndex": False}),
("test-ddpn-v2-nof0-40k-l12-hubert_jp_t", False), ("test-ddpn-v2-nof0-40k-l12-hubert_jp_t", {"useIndex": False}),
] ]
elif mode == RVCSampleMode.testDDPNONNX.value: elif mode == "testDDPNONNX":
return [ return [
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_official_v1_v2.json", "https://huggingface.co/wok000/vcclient_model/raw/main/test/test_official_v1_v2.json",
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_ddpn_v1_v2.json", "https://huggingface.co/wok000/vcclient_model/raw/main/test/test_ddpn_v1_v2.json",
], [ ], [
("test-ddpn-v1-f0-48k-l9-hubert_o", False), ("test-ddpn-v1-f0-48k-l9-hubert_o", {"useIndex": False}),
("test-ddpn-v1-nof0-48k-l9-hubert_o", False), ("test-ddpn-v1-nof0-48k-l9-hubert_o", {"useIndex": False}),
("test-ddpn-v2-f0-40k-l12-hubert_o", False), ("test-ddpn-v2-f0-40k-l12-hubert_o", {"useIndex": False}),
("test-ddpn-v2-nof0-40k-l12-hubert_o", False), ("test-ddpn-v2-nof0-40k-l12-hubert_o", {"useIndex": False}),
("test-ddpn-v2-f0-40k-l12-hubert_jp_o", False), ("test-ddpn-v2-f0-40k-l12-hubert_jp_o", {"useIndex": False}),
("test-ddpn-v2-nof0-40k-l12-hubert_jp_o", False), ("test-ddpn-v2-nof0-40k-l12-hubert_jp_o", {"useIndex": False}),
] ]
elif mode == RVCSampleMode.testONNXFull.value: elif mode == "testONNXFull":
return [ return [
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_official_v1_v2.json", "https://huggingface.co/wok000/vcclient_model/raw/main/test/test_official_v1_v2.json",
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_ddpn_v1_v2.json", "https://huggingface.co/wok000/vcclient_model/raw/main/test/test_ddpn_v1_v2.json",
], [ ], [
("test-official-v1-f0-48k-l9-hubert_o_full", False), ("test-official-v1-f0-48k-l9-hubert_o_full", {"useIndex": False}),
("test-official-v1-nof0-48k-l9-hubert_o_full", False), ("test-official-v1-nof0-48k-l9-hubert_o_full", {"useIndex": False}),
("test-official-v2-f0-40k-l12-hubert_o_full", False), ("test-official-v2-f0-40k-l12-hubert_o_full", {"useIndex": False}),
("test-official-v2-nof0-40k-l12-hubert_o_full", False), ("test-official-v2-nof0-40k-l12-hubert_o_full", {"useIndex": False}),
("test-ddpn-v1-f0-48k-l9-hubert_o_full", False), ("test-ddpn-v1-f0-48k-l9-hubert_o_full", {"useIndex": False}),
("test-ddpn-v1-nof0-48k-l9-hubert_o_full", False), ("test-ddpn-v1-nof0-48k-l9-hubert_o_full", {"useIndex": False}),
("test-ddpn-v2-f0-40k-l12-hubert_o_full", False), ("test-ddpn-v2-f0-40k-l12-hubert_o_full", {"useIndex": False}),
("test-ddpn-v2-nof0-40k-l12-hubert_o_full", False), ("test-ddpn-v2-nof0-40k-l12-hubert_o_full", {"useIndex": False}),
("test-ddpn-v2-f0-40k-l12-hubert_jp_o_full", False), ("test-ddpn-v2-f0-40k-l12-hubert_jp_o_full", {"useIndex": False}),
("test-ddpn-v2-nof0-40k-l12-hubert_jp_o_full", False), ("test-ddpn-v2-nof0-40k-l12-hubert_jp_o_full", {"useIndex": False}),
] ]
else: else:
return [], [] return [], []
RVC_MODEL_DIRNAME = "rvc" MAX_SLOT_NUM = 10
RVC_MAX_SLOT_NUM = 10

View File

@ -0,0 +1,37 @@
from dataclasses import dataclass, field
from typing import TypeAlias, Union, Any
from const import VoiceChangerType
@dataclass
class ModelSample:
id: str = ""
voiceChangerType: VoiceChangerType | None = None
@dataclass
class RVCModelSample(ModelSample):
voiceChangerType: VoiceChangerType = "RVC"
lang: str = ""
tag: list[str] = field(default_factory=lambda: [])
name: str = ""
modelUrl: str = ""
indexUrl: str = ""
termsOfUseUrl: str = ""
icon: str = ""
credit: str = ""
description: str = ""
sampleRate: int = 48000
modelType: str = ""
f0: bool = True
ModelSamples: TypeAlias = Union[ModelSample, RVCModelSample]
def generateModelSample(params: Any) -> ModelSamples:
if params["voiceChangerType"] == "RVC":
return RVCModelSample(**params)
else:
return ModelSample(**{k: v for k, v in params.items() if k in ModelSample.__annotations__})

59
server/data/ModelSlot.py Normal file
View File

@ -0,0 +1,59 @@
from typing import TypeAlias, Union
from const import EnumInferenceTypes, EnumEmbedderTypes, VoiceChangerType
from dataclasses import dataclass, asdict
import os
import json
@dataclass
class ModelSlot:
voiceChangerType: VoiceChangerType | None = None
@dataclass
class RVCModelSlot(ModelSlot):
voiceChangerType: VoiceChangerType = "RVC"
modelFile: str = ""
indexFile: str = ""
defaultTune: int = 0
defaultIndexRatio: int = 1
defaultProtect: float = 0.5
isONNX: bool = False
modelType: str = EnumInferenceTypes.pyTorchRVC.value
samplingRate: int = -1
f0: bool = True
embChannels: int = 256
embOutputLayer: int = 9
useFinalProj: bool = True
deprecated: bool = False
embedder: str = EnumEmbedderTypes.hubert.value
name: str = ""
description: str = ""
credit: str = ""
termsOfUseUrl: str = ""
sampleId: str = ""
iconFile: str = ""
ModelSlots: TypeAlias = Union[ModelSlot, RVCModelSlot]
def loadSlotInfo(model_dir: str, slotIndex: int) -> ModelSlots:
slotDir = os.path.join(model_dir, str(slotIndex))
jsonFile = os.path.join(slotDir, "params.json")
if not os.path.exists(jsonFile):
return ModelSlot()
jsonDict = json.load(open(os.path.join(slotDir, "params.json")))
slotInfo = ModelSlot(**{k: v for k, v in jsonDict.items() if k in ModelSlot.__annotations__})
if slotInfo.voiceChangerType == "RVC":
return RVCModelSlot(**jsonDict)
else:
return ModelSlot()
def saveSlotInfo(model_dir: str, slotIndex: int, slotInfo: ModelSlots):
slotDir = os.path.join(model_dir, str(slotIndex))
json.dump(asdict(slotInfo), open(os.path.join(slotDir, "params.json"), "w"))

View File

@ -69,19 +69,14 @@ class MMVC_Rest:
StaticFiles(directory=f"{getFrontendPath()}", html=True), StaticFiles(directory=f"{getFrontendPath()}", html=True),
name="static", name="static",
) )
app_fastapi.mount( app_fastapi.mount("/tmp", StaticFiles(directory=f"{TMP_DIR}"), name="static")
"/tmp", StaticFiles(directory=f"{TMP_DIR}"), name="static" app_fastapi.mount("/upload_dir", StaticFiles(directory=f"{UPLOAD_DIR}"), name="static")
)
app_fastapi.mount(
"/upload_dir", StaticFiles(directory=f"{UPLOAD_DIR}"), name="static"
)
if sys.platform.startswith("darwin"): if sys.platform.startswith("darwin"):
p1 = os.path.dirname(sys._MEIPASS) p1 = os.path.dirname(sys._MEIPASS)
p2 = os.path.dirname(p1) p2 = os.path.dirname(p1)
p3 = os.path.dirname(p2) p3 = os.path.dirname(p2)
model_dir = os.path.join(p3, voiceChangerParams.model_dir) model_dir = os.path.join(p3, voiceChangerParams.model_dir)
print("mac model_dir:", model_dir)
app_fastapi.mount( app_fastapi.mount(
f"/{voiceChangerParams.model_dir}", f"/{voiceChangerParams.model_dir}",
StaticFiles(directory=model_dir), StaticFiles(directory=model_dir),

View File

@ -1,6 +1,4 @@
import json
import os import os
import shutil
from typing import Union from typing import Union
from fastapi import APIRouter from fastapi import APIRouter
from fastapi.encoders import jsonable_encoder from fastapi.encoders import jsonable_encoder
@ -10,8 +8,7 @@ from fastapi import UploadFile, File, Form
from restapi.mods.FileUploader import upload_file, concat_file_chunks from restapi.mods.FileUploader import upload_file, concat_file_chunks
from voice_changer.VoiceChangerManager import VoiceChangerManager from voice_changer.VoiceChangerManager import VoiceChangerManager
from const import MODEL_DIR, UPLOAD_DIR, ModelType from const import MODEL_DIR, UPLOAD_DIR, VoiceChangerType
from voice_changer.utils.LoadModelParams import LoadModelParams
os.makedirs(UPLOAD_DIR, exist_ok=True) os.makedirs(UPLOAD_DIR, exist_ok=True)
@ -24,123 +21,130 @@ class MMVC_Rest_Fileuploader:
self.router = APIRouter() self.router = APIRouter()
self.router.add_api_route("/info", self.get_info, methods=["GET"]) self.router.add_api_route("/info", self.get_info, methods=["GET"])
self.router.add_api_route("/performance", self.get_performance, methods=["GET"]) self.router.add_api_route("/performance", self.get_performance, methods=["GET"])
self.router.add_api_route( self.router.add_api_route("/upload_file", self.post_upload_file, methods=["POST"])
"/upload_file", self.post_upload_file, methods=["POST"] self.router.add_api_route("/concat_uploaded_file", self.post_concat_uploaded_file, methods=["POST"])
) self.router.add_api_route("/update_settings", self.post_update_settings, methods=["POST"])
self.router.add_api_route(
"/concat_uploaded_file", self.post_concat_uploaded_file, methods=["POST"]
)
self.router.add_api_route(
"/update_settings", self.post_update_settings, methods=["POST"]
)
self.router.add_api_route("/load_model", self.post_load_model, methods=["POST"]) self.router.add_api_route("/load_model", self.post_load_model, methods=["POST"])
self.router.add_api_route("/model_type", self.post_model_type, methods=["POST"]) self.router.add_api_route("/model_type", self.post_model_type, methods=["POST"])
self.router.add_api_route("/model_type", self.get_model_type, methods=["GET"]) # self.router.add_api_route("/model_type", self.get_model_type, methods=["GET"])
self.router.add_api_route("/onnx", self.get_onnx, methods=["GET"]) self.router.add_api_route("/onnx", self.get_onnx, methods=["GET"])
self.router.add_api_route( self.router.add_api_route("/merge_model", self.post_merge_models, methods=["POST"])
"/merge_model", self.post_merge_models, methods=["POST"] self.router.add_api_route("/update_model_default", self.post_update_model_default, methods=["POST"])
) self.router.add_api_route("/update_model_info", self.post_update_model_info, methods=["POST"])
self.router.add_api_route( self.router.add_api_route("/upload_model_assets", self.post_upload_model_assets, methods=["POST"])
"/update_model_default", self.post_update_model_default, methods=["POST"]
)
self.router.add_api_route(
"/update_model_info", self.post_update_model_info, methods=["POST"]
)
self.router.add_api_route(
"/upload_model_assets", self.post_upload_model_assets, methods=["POST"]
)
def post_upload_file(self, file: UploadFile = File(...), filename: str = Form(...)): def post_upload_file(self, file: UploadFile = File(...), filename: str = Form(...)):
res = upload_file(UPLOAD_DIR, file, filename) try:
json_compatible_item_data = jsonable_encoder(res) res = upload_file(UPLOAD_DIR, file, filename)
return JSONResponse(content=json_compatible_item_data) json_compatible_item_data = jsonable_encoder(res)
return JSONResponse(content=json_compatible_item_data)
except Exception as e:
print("[Voice Changer] ", e)
def post_concat_uploaded_file( def post_concat_uploaded_file(self, filename: str = Form(...), filenameChunkNum: int = Form(...)):
self, filename: str = Form(...), filenameChunkNum: int = Form(...) try:
): res = concat_file_chunks(UPLOAD_DIR, filename, filenameChunkNum, UPLOAD_DIR)
res = concat_file_chunks(UPLOAD_DIR, filename, filenameChunkNum, UPLOAD_DIR) json_compatible_item_data = jsonable_encoder(res)
json_compatible_item_data = jsonable_encoder(res) return JSONResponse(content=json_compatible_item_data)
return JSONResponse(content=json_compatible_item_data) except Exception as e:
print("[Voice Changer] ", e)
def get_info(self): def get_info(self):
info = self.voiceChangerManager.get_info() try:
json_compatible_item_data = jsonable_encoder(info) info = self.voiceChangerManager.get_info()
return JSONResponse(content=json_compatible_item_data) json_compatible_item_data = jsonable_encoder(info)
return JSONResponse(content=json_compatible_item_data)
except Exception as e:
print("[Voice Changer] ", e)
def get_performance(self): def get_performance(self):
info = self.voiceChangerManager.get_performance() try:
json_compatible_item_data = jsonable_encoder(info) info = self.voiceChangerManager.get_performance()
return JSONResponse(content=json_compatible_item_data) json_compatible_item_data = jsonable_encoder(info)
return JSONResponse(content=json_compatible_item_data)
except Exception as e:
print("[Voice Changer] ", e)
def post_update_settings( def post_update_settings(self, key: str = Form(...), val: Union[int, str, float] = Form(...)):
self, key: str = Form(...), val: Union[int, str, float] = Form(...) try:
): print("[Voice Changer] update configuration:", key, val)
print("[Voice Changer] update configuration:", key, val) info = self.voiceChangerManager.update_settings(key, val)
info = self.voiceChangerManager.update_settings(key, val) json_compatible_item_data = jsonable_encoder(info)
json_compatible_item_data = jsonable_encoder(info) return JSONResponse(content=json_compatible_item_data)
return JSONResponse(content=json_compatible_item_data) except Exception as e:
print("[Voice Changer] ", e)
def post_load_model( def post_load_model(
self, self,
slot: int = Form(...), slot: int = Form(...),
isHalf: bool = Form(...), voiceChangerType: str = Form(...),
params: str = Form(...), params: str = Form(...),
): ):
paramDict = json.loads(params) try:
# print("paramDict", paramDict) info = self.voiceChangerManager.loadModel(slot, voiceChangerType, params)
json_compatible_item_data = jsonable_encoder(info)
return JSONResponse(content=json_compatible_item_data)
except Exception as e:
print("[Voice Changer] ", e)
# Change Filepath def post_model_type(self, modelType: VoiceChangerType = Form(...)):
newFilesDict = {} try:
for key, val in paramDict["files"].items(): # info = self.voiceChangerManager.switchModelType(modelType)
if val != "-" and val != "": # json_compatible_item_data = jsonable_encoder(info)
uploadPath = os.path.join(UPLOAD_DIR, val) json_compatible_item_data = jsonable_encoder({"status": "ok"})
storePath = os.path.join(UPLOAD_DIR, f"{slot}", val) return JSONResponse(content=json_compatible_item_data)
storeDir = os.path.dirname(storePath) except Exception as e:
os.makedirs(storeDir, exist_ok=True) print("[Voice Changer] ", e)
shutil.move(uploadPath, storePath)
newFilesDict[key] = storePath
paramDict["files"] = newFilesDict
props: LoadModelParams = LoadModelParams( # def get_model_type(self):
slot=slot, isHalf=isHalf, params=paramDict # try:
) # # info = self.voiceChangerManager.getModelType()
# # json_compatible_item_data = jsonable_encoder(info)
info = self.voiceChangerManager.loadModel(props) # print(
json_compatible_item_data = jsonable_encoder(info) # "-------------- get_model_type",
return JSONResponse(content=json_compatible_item_data) # )
# json_compatible_item_data = jsonable_encoder({"status": "ok"})
def post_model_type(self, modelType: ModelType = Form(...)): # return JSONResponse(content=json_compatible_item_data)
info = self.voiceChangerManager.switchModelType(modelType) # except Exception as e:
json_compatible_item_data = jsonable_encoder(info) # print("[Voice Changer] ", e)
return JSONResponse(content=json_compatible_item_data)
def get_model_type(self):
info = self.voiceChangerManager.getModelType()
json_compatible_item_data = jsonable_encoder(info)
return JSONResponse(content=json_compatible_item_data)
def get_onnx(self): def get_onnx(self):
info = self.voiceChangerManager.export2onnx() try:
json_compatible_item_data = jsonable_encoder(info) info = self.voiceChangerManager.export2onnx()
return JSONResponse(content=json_compatible_item_data) json_compatible_item_data = jsonable_encoder(info)
return JSONResponse(content=json_compatible_item_data)
except Exception as e:
print("[Voice Changer] ", e)
def post_merge_models(self, request: str = Form(...)): def post_merge_models(self, request: str = Form(...)):
print(request) try:
info = self.voiceChangerManager.merge_models(request) print(request)
json_compatible_item_data = jsonable_encoder(info) info = self.voiceChangerManager.merge_models(request)
return JSONResponse(content=json_compatible_item_data) json_compatible_item_data = jsonable_encoder(info)
return JSONResponse(content=json_compatible_item_data)
except Exception as e:
print("[Voice Changer] ", e)
def post_update_model_default(self): def post_update_model_default(self):
info = self.voiceChangerManager.update_model_default() try:
json_compatible_item_data = jsonable_encoder(info) info = self.voiceChangerManager.update_model_default()
return JSONResponse(content=json_compatible_item_data) json_compatible_item_data = jsonable_encoder(info)
return JSONResponse(content=json_compatible_item_data)
except Exception as e:
print("[Voice Changer] ", e)
def post_update_model_info(self, newData: str = Form(...)): def post_update_model_info(self, newData: str = Form(...)):
info = self.voiceChangerManager.update_model_info(newData) try:
json_compatible_item_data = jsonable_encoder(info) info = self.voiceChangerManager.update_model_info(newData)
return JSONResponse(content=json_compatible_item_data) json_compatible_item_data = jsonable_encoder(info)
return JSONResponse(content=json_compatible_item_data)
except Exception as e:
print("[Voice Changer] ", e)
def post_upload_model_assets(self, params: str = Form(...)): def post_upload_model_assets(self, params: str = Form(...)):
info = self.voiceChangerManager.upload_model_assets(params) try:
json_compatible_item_data = jsonable_encoder(info) info = self.voiceChangerManager.upload_model_assets(params)
return JSONResponse(content=json_compatible_item_data) json_compatible_item_data = jsonable_encoder(info)
return JSONResponse(content=json_compatible_item_data)
except Exception as e:
print("[Voice Changer] ", e)

View File

@ -1,14 +0,0 @@
from fastapi.responses import FileResponse
import os
def mod_get_model(modelFile: str):
modelPath = os.path.join("MMVC_Trainer/logs", modelFile)
return FileResponse(path=modelPath)
def mod_delete_model(modelFile: str):
modelPath = os.path.join("MMVC_Trainer/logs", modelFile)
os.remove(modelPath)
return {"Model deleted": f"{modelFile}"}

View File

@ -1,23 +0,0 @@
from fastapi.responses import JSONResponse
from fastapi.encoders import jsonable_encoder
from restapi.utils.files import get_file_list
import os
def mod_get_models():
gModels = get_file_list(f'MMVC_Trainer/logs/G*.pth')
dModels = get_file_list(f'MMVC_Trainer/logs/D*.pth')
configs = get_file_list(f'MMVC_Trainer/logs/config.json')
models = []
models.extend(gModels)
models.extend(dModels)
models.extend(configs)
models = [ os.path.basename(x) for x in models]
models = sorted(models)
data = {
"models":models
}
json_compatible_item_data = jsonable_encoder(data)
return JSONResponse(content=json_compatible_item_data)

View File

@ -1,26 +0,0 @@
from fastapi.responses import JSONResponse
from fastapi.encoders import jsonable_encoder
import os
MULTI_SPEAKER_SETTING_PATH = "MMVC_Trainer/dataset/multi_speaker_correspondence.txt"
def mod_get_multi_speaker_setting():
data = {}
if os.path.isfile(MULTI_SPEAKER_SETTING_PATH) == False:
with open(MULTI_SPEAKER_SETTING_PATH, "w") as f:
f.write("")
f.flush()
f.close()
with open(MULTI_SPEAKER_SETTING_PATH, "r") as f:
setting = f.read()
data["multi_speaker_setting"] = setting
json_compatible_item_data = jsonable_encoder(data)
return JSONResponse(content=json_compatible_item_data)
def mod_post_multi_speaker_setting(setting:str):
with open(MULTI_SPEAKER_SETTING_PATH, "w") as f:
f.write(setting)
f.flush()
f.close()
return {"Write Multispeaker setting": f"{setting}"}

View File

@ -1,15 +0,0 @@
import shutil
from restapi.mods.Trainer_MultiSpeakerSetting import MULTI_SPEAKER_SETTING_PATH
def mod_delete_speaker(speaker:str):
shutil.rmtree(f"MMVC_Trainer/dataset/textful/{speaker}")
with open(MULTI_SPEAKER_SETTING_PATH, "r") as f:
setting = f.readlines()
filtered = filter(lambda x: x.startswith(f"{speaker}|")==False, setting)
with open(MULTI_SPEAKER_SETTING_PATH, "w") as f:
f.writelines(list(filtered))
f.flush()
f.close()
return {"Speaker deleted": f"{speaker}"}

View File

@ -1,28 +0,0 @@
from fastapi.responses import JSONResponse
from fastapi.encoders import jsonable_encoder
import os, base64
def mod_get_speaker_voice(speaker:str, voice:str):
wav_file = f'MMVC_Trainer/dataset/textful/{speaker}/wav/{voice}.wav'
text_file = f'MMVC_Trainer/dataset/textful/{speaker}/text/{voice}.txt'
readable_text_file = f'MMVC_Trainer/dataset/textful/{speaker}/readable_text/{voice}.txt'
data = {}
if os.path.exists(wav_file):
with open(wav_file, "rb") as f:
wav_data = f.read()
wav_data_base64 = base64.b64encode(wav_data).decode('utf-8')
data["wav"] = wav_data_base64
if os.path.exists(text_file):
with open(text_file, "r") as f:
text_data = f.read()
data["text"] = text_data
if os.path.exists(readable_text_file):
with open(readable_text_file, "r") as f:
text_data = f.read()
data["readable_text"] = text_data
json_compatible_item_data = jsonable_encoder(data)
return JSONResponse(content=json_compatible_item_data)

View File

@ -1,22 +0,0 @@
from fastapi.responses import JSONResponse
from fastapi.encoders import jsonable_encoder
from restapi.utils.files import get_file_list
import os
def mod_get_speaker_voices(speaker:str):
voices = get_file_list(f'MMVC_Trainer/dataset/textful/{speaker}/wav/*.wav')
texts = get_file_list(f'MMVC_Trainer/dataset/textful/{speaker}/text/*.txt')
readable_texts = get_file_list(f'MMVC_Trainer/dataset/textful/{speaker}/readable_text/*.txt')
items = voices
items.extend(texts)
items.extend(readable_texts)
items = [ os.path.splitext(os.path.basename(x))[0] for x in items]
items = sorted(set(items))
data = {
"voices":items
}
json_compatible_item_data = jsonable_encoder(data)
return JSONResponse(content=json_compatible_item_data)

View File

@ -1,15 +0,0 @@
from fastapi.responses import JSONResponse
from fastapi.encoders import jsonable_encoder
from restapi.utils.files import get_dir_list
import os
# CreateはFileUploaderで実装。
def mod_get_speakers():
os.makedirs("MMVC_Trainer/dataset/textful", exist_ok=True)
speakers = get_dir_list("MMVC_Trainer/dataset/textful/")
data = {
"speakers":sorted(speakers)
}
json_compatible_item_data = jsonable_encoder(data)
return JSONResponse(content=json_compatible_item_data)

View File

@ -1,176 +0,0 @@
import subprocess,os
from restapi.utils.files import get_file_list
from fastapi.responses import JSONResponse
from fastapi.encoders import jsonable_encoder
LOG_DIR = "info"
train_proc = None
SUCCESS = 0
ERROR = -1
### Submodule for Pre train
def sync_exec(cmd:str, log_path:str, cwd=None):
shortCmdStr = cmd[:20]
try:
with open(log_path, 'w') as log_file:
if cwd == None:
proc = subprocess.run(cmd, shell=True, text=True, stdout=log_file, stderr=log_file)
else:
proc = subprocess.run(cmd, shell=True, text=True, stdout=log_file, stderr=log_file, cwd=cwd)
print(f"{shortCmdStr} returncode:{proc.returncode}")
if proc.returncode != 0:
print(f"{shortCmdStr} exception:")
return (ERROR, f"returncode:{proc.returncode}")
except Exception as e:
print(f"{shortCmdStr} exception:", str(e))
return (ERROR, str(e))
return (SUCCESS, "success")
def sync_exec_with_stdout(cmd:str, log_path:str):
shortCmdStr = cmd[:20]
try:
with open(log_path, 'w') as log_file:
proc = subprocess.run(cmd, shell=True, text=True, stdout=subprocess.PIPE,
stderr=log_file, cwd="MMVC_Trainer")
print(f"STDOUT{shortCmdStr}",proc.stdout)
except Exception as e:
print(f"{shortCmdStr} exception:", str(e))
return (ERROR, str(e))
return (SUCCESS, proc.stdout)
def create_dataset():
cmd = "python3 create_dataset_jtalk.py -f train_config -s 24000 -m dataset/multi_speaker_correspondence.txt"
log_file = os.path.join(LOG_DIR, "log_create_dataset_jtalk.txt")
res = sync_exec(cmd, log_file, "MMVC_Trainer")
return res
def set_batch_size(batch:int):
cmd = "sed -i 's/\"batch_size\": [0-9]*/\"batch_size\": " + str(batch) + "/' MMVC_Trainer/configs/baseconfig.json"
log_file = os.path.join(LOG_DIR, "log_set_batch_size.txt")
res = sync_exec(cmd, log_file)
return res
def set_dummy_device_count():
cmd = 'sed -ie "s/torch.cuda.device_count()/1/" MMVC_Trainer/train_ms.py'
log_file = os.path.join(LOG_DIR, "log_set_dummy_device_count.txt")
res = sync_exec(cmd, log_file)
return res
### Submodule for Train
def exec_training(enable_finetuning:bool, GModel:str, DModel:str):
global train_proc
log_file = os.path.join(LOG_DIR, "training.txt")
# トレーニング開始確認(二重起動回避)
if train_proc != None:
status = train_proc.poll()
if status != None:
print("Training have ended.", status)
train_proc = None
else:
print("Training have stated.")
return (ERROR, "Training have started")
try:
with open(log_file, 'w') as log_file:
if enable_finetuning == True:
GModelPath = os.path.join("logs", GModel) # 実行時にcwdを指定しているのでフォルダはlogsでよい。
DModelPath = os.path.join("logs", DModel)
cmd = f'python3 train_ms.py -c configs/train_config.json -m ./ -fg {GModelPath} -fd {DModelPath}'
else:
cmd = 'python3 train_ms.py -c configs/train_config.json -m ./'
print("exec:",cmd)
train_proc = subprocess.Popen("exec "+cmd, shell=True, text=True, stdout=log_file, stderr=log_file, cwd="MMVC_Trainer")
print("Training stated")
print(f"returncode:{train_proc.returncode}")
except Exception as e:
print("start training exception:", str(e))
return (ERROR, str(e))
return (SUCCESS, "success")
def stop_training():
global train_proc
if train_proc == None:
print("Training have not stated.")
return (ERROR, "Training have not stated.")
status = train_proc.poll()
if status != None:
print("Training have already ended.", status)
train_proc = None
return (ERROR, "Training have already ended. " + status)
else:
train_proc.kill()
print("Training have stoped.")
return (SUCCESS, "success")
### Main
def mod_post_pre_training(batch:int):
res = set_batch_size(batch)
if res[0] == ERROR:
return {"result":"failed", "detail": f"Preprocess(set_batch_size) failed. {res[1]}"}
res = set_dummy_device_count()
if res[0] == ERROR:
return {"result":"failed", "detail": f"Preprocess(set_dummy_device_count) failed. {res[1]}"}
res = create_dataset()
if res[0] == ERROR:
return {"result":"failed", "detail": f"Preprocess failed(create_dataset). {res[1]}"}
return {"result":"success", "detail": f"Preprocess succeeded. {res[1]}"}
def mod_post_start_training(enable_finetuning:str, GModel:str, DModel:str):
print("START_TRAINING:::::::", enable_finetuning, GModel, DModel)
res = exec_training(enable_finetuning, GModel, DModel)
if res[0] == ERROR:
return {"result":"failed", "detail": f"Start training failed. {res[1]}"}
return {"result":"success", "detail": f"Start training succeeded. {res[1]}"}
def mod_post_stop_training():
res = stop_training()
if res[0] == ERROR:
return {"result":"failed", "detail": f"Stop training failed. {res[1]}"}
return {"result":"success", "detail": f"Stop training succeeded. {res[1]}"}
### DEBUG
def mod_get_related_files():
files = get_file_list(os.path.join(LOG_DIR,"*"))
files.extend([
"MMVC_Trainer/dataset/multi_speaker_correspondence.txt",
"MMVC_Trainer/train_ms.py",
])
files.extend(
get_file_list("MMVC_Trainer/configs/*")
)
res = []
for f in files:
size = os.path.getsize(f)
data = ""
if size < 1024*1024:
with open(f, "r") as input:
data = input.read()
res.append({
"name":f,
"size":size,
"data":data
})
json_compatible_item_data = jsonable_encoder(res)
return JSONResponse(content=json_compatible_item_data)
def mod_get_tail_training_log(num:int):
training_log_file = os.path.join(LOG_DIR, "training.txt")
res = sync_exec(f"cat {training_log_file} | sed -e 's/.*\r//' > /tmp/out","/dev/null")
cmd = f'tail -n {num} /tmp/out'
res = sync_exec_with_stdout(cmd, "/dev/null")
if res[0] == ERROR:
return {"result":"failed", "detail": f"Tail training log failed. {res[1]}"}
return {"result":"success", "detail":res[1]}

View File

@ -1,26 +0,0 @@
import os
import glob
# def get_file_list(top_dir):
# for root, dirs, files in os.walk(top_dir):
# for dir in dirs:
# dirPath = os.path.join(root, dir)
# print(f'dirPath = {dirPath}')
# for file in files:
# filePath = os.path.join(root, file)
# print(f'filePath = {filePath}')
def get_dir_list(top_dir):
dirlist = []
files = os.listdir(top_dir)
for filename in files:
if os.path.isdir(os.path.join(top_dir, filename)):
dirlist.append(filename)
return dirlist
def get_file_list(top_dir):
return glob.glob(top_dir)

View File

@ -0,0 +1,165 @@
import json
import os
from concurrent.futures import ThreadPoolExecutor
from typing import Any, Tuple
from const import RVCSampleMode, getSampleJsonAndModelIds
from data.ModelSample import ModelSamples, generateModelSample
from data.ModelSlot import RVCModelSlot, loadSlotInfo, saveSlotInfo
from voice_changer.RVC.ModelSlotGenerator import _setInfoByONNX, _setInfoByPytorch
from utils.downloader.Downloader import download, download_no_tqdm
def downloadInitialSamples(mode: RVCSampleMode, model_dir: str):
sampleJsonUrls, sampleModels = getSampleJsonAndModelIds(mode)
sampleJsons = _downloadSampleJsons(sampleJsonUrls)
if os.path.exists(model_dir):
print("[Voice Changer] model_dir is already exists. skil download samples.")
return
samples = _generateSampleList(sampleJsons)
slotIndex = list(range(len(sampleModels)))
_downloadSamples(samples, sampleModels, model_dir, slotIndex)
pass
def downloadSample(mode: RVCSampleMode, modelId: str, model_dir: str, slotIndex: int, params: Any):
sampleJsonUrls, _sampleModels = getSampleJsonAndModelIds(mode)
sampleJsons = _generateSampleJsons(sampleJsonUrls)
samples = _generateSampleList(sampleJsons)
_downloadSamples(samples, [(modelId, params)], model_dir, [slotIndex])
pass
def getSampleInfos(mode: RVCSampleMode):
sampleJsonUrls, _sampleModels = getSampleJsonAndModelIds(mode)
sampleJsons = _generateSampleJsons(sampleJsonUrls)
samples = _generateSampleList(sampleJsons)
return samples
def _downloadSampleJsons(sampleJsonUrls: list[str]):
sampleJsons = []
for url in sampleJsonUrls:
filename = os.path.basename(url)
download_no_tqdm({"url": url, "saveTo": filename, "position": 0})
sampleJsons.append(filename)
return sampleJsons
def _generateSampleJsons(sampleJsonUrls: list[str]):
sampleJsons = []
for url in sampleJsonUrls:
filename = os.path.basename(url)
sampleJsons.append(filename)
return sampleJsons
def _generateSampleList(sampleJsons: list[str]):
samples: list[ModelSamples] = []
for file in sampleJsons:
with open(file, "r", encoding="utf-8") as f:
jsonDict = json.load(f)
for vcType in jsonDict:
for sampleParams in jsonDict[vcType]:
sample = generateModelSample(sampleParams)
samples.append(sample)
return samples
def _downloadSamples(samples: list[ModelSamples], sampleModelIds: list[Tuple[str, Any]], model_dir: str, slotIndex: list[int]):
downloadParams = []
line_num = 0
for i, initSampleId in enumerate(sampleModelIds):
targetSampleId = initSampleId[0]
targetSampleParams = initSampleId[1]
tagetSlotIndex = slotIndex[i]
# 初期サンプルをサーチ
match = False
for sample in samples:
print("sample", sample)
if sample.id == targetSampleId:
match = True
break
if match is False:
print(f"[Voice Changer] initiail sample not found. {targetSampleId}")
continue
# 検出されたら、、、
slotDir = os.path.join(model_dir, str(tagetSlotIndex))
if sample.voiceChangerType == "RVC":
slotInfo: RVCModelSlot = RVCModelSlot()
os.makedirs(slotDir, exist_ok=True)
modelFilePath = os.path.join(
slotDir,
os.path.basename(sample.modelUrl),
)
downloadParams.append(
{
"url": sample.modelUrl,
"saveTo": modelFilePath,
"position": line_num,
}
)
slotInfo.modelFile = modelFilePath
line_num += 1
if targetSampleParams["useIndex"] is True and hasattr(sample, "indexUrl") and sample.indexUrl != "":
indexPath = os.path.join(
slotDir,
os.path.basename(sample.indexUrl),
)
downloadParams.append(
{
"url": sample.indexUrl,
"saveTo": indexPath,
"position": line_num,
}
)
slotInfo.indexFile = indexPath
line_num += 1
if hasattr(sample, "icon") and sample.icon != "":
iconPath = os.path.join(
slotDir,
os.path.basename(sample.icon),
)
downloadParams.append(
{
"url": sample.icon,
"saveTo": iconPath,
"position": line_num,
}
)
slotInfo.iconFile = iconPath
line_num += 1
slotInfo.sampleId = sample.id
slotInfo.credit = sample.credit
slotInfo.description = sample.description
slotInfo.name = sample.name
slotInfo.termsOfUseUrl = sample.termsOfUseUrl
slotInfo.defaultTune = 0
slotInfo.defaultIndexRatio = 1
slotInfo.defaultProtect = 0.5
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
saveSlotInfo(model_dir, tagetSlotIndex, slotInfo)
# ダウンロード
print("[Voice Changer] Downloading model files...")
with ThreadPoolExecutor() as pool:
pool.map(download, downloadParams)
# メタデータ作成
print("[Voice Changer] Generating metadata...")
for targetSlotIndex in slotIndex:
slotInfo = loadSlotInfo(model_dir, targetSlotIndex)
if slotInfo.voiceChangerType == "RVC":
if slotInfo.isONNX:
_setInfoByONNX(slotInfo)
else:
_setInfoByPytorch(slotInfo)
saveSlotInfo(model_dir, targetSlotIndex, slotInfo)

View File

@ -0,0 +1,64 @@
import os
from concurrent.futures import ThreadPoolExecutor
from utils.downloader.Downloader import download
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
from Exceptions import WeightDownladException
def downloadWeight(voiceChangerParams: VoiceChangerParams):
hubert_base = voiceChangerParams.hubert_base
hubert_base_jp = voiceChangerParams.hubert_base_jp
hubert_soft = voiceChangerParams.hubert_soft
nsf_hifigan = voiceChangerParams.nsf_hifigan
# file exists check (currently only for rvc)
downloadParams = []
if os.path.exists(hubert_base) is False:
downloadParams.append(
{
"url": "https://huggingface.co/ddPn08/rvc-webui-models/resolve/main/embeddings/hubert_base.pt",
"saveTo": hubert_base,
"position": 0,
}
)
if os.path.exists(hubert_base_jp) is False:
downloadParams.append(
{
"url": "https://huggingface.co/rinna/japanese-hubert-base/resolve/main/fairseq/model.pt",
"saveTo": hubert_base_jp,
"position": 1,
}
)
if os.path.exists(hubert_soft) is False:
downloadParams.append(
{
"url": "https://huggingface.co/wok000/weights/resolve/main/ddsp-svc30/embedder/hubert-soft-0d54a1f4.pt",
"saveTo": hubert_soft,
"position": 2,
}
)
if os.path.exists(nsf_hifigan) is False:
downloadParams.append(
{
"url": "https://huggingface.co/wok000/weights/resolve/main/ddsp-svc30/nsf_hifigan_20221211/model.bin",
"saveTo": nsf_hifigan,
"position": 3,
}
)
nsf_hifigan_config = os.path.join(os.path.dirname(nsf_hifigan), "config.json")
if os.path.exists(nsf_hifigan_config) is False:
downloadParams.append(
{
"url": "https://huggingface.co/wok000/weights/raw/main/ddsp-svc30/nsf_hifigan_20221211/config.json",
"saveTo": nsf_hifigan_config,
"position": 4,
}
)
with ThreadPoolExecutor() as pool:
pool.map(download, downloadParams)
if os.path.exists(hubert_base) is False or os.path.exists(hubert_base_jp) is False or os.path.exists(hubert_soft) is False or os.path.exists(nsf_hifigan) is False or os.path.exists(nsf_hifigan_config) is False:
raise WeightDownladException()

View File

@ -0,0 +1,134 @@
import time
import numpy as np
import librosa
import sounddevice as sd
from voice_changer.Local.AudioDeviceList import ServerAudioDevice
from voice_changer.VoiceChanger import VoiceChanger
from voice_changer.utils.Timer import Timer
class ServerDevice:
def __init__(self):
self.voiceChanger: VoiceChanger | None = None
pass
def audio_callback(self, indata: np.ndarray, outdata: np.ndarray, frames, times, status):
if self.voiceChanger is None:
print("[Voice Changer] voiceChanger is None")
return
try:
indata = indata * self.voiceChanger.settings.serverInputAudioGain
with Timer("all_inference_time") as t:
unpackedData = librosa.to_mono(indata.T) * 32768.0
out_wav, times = self.voiceChanger.on_request(unpackedData)
outputChunnels = outdata.shape[1]
outdata[:] = np.repeat(out_wav, outputChunnels).reshape(-1, outputChunnels) / 32768.0
outdata[:] = outdata * self.voiceChanger.settings.serverOutputAudioGain
all_inference_time = t.secs
performance = [all_inference_time] + times
if self.voiceChanger.emitTo is not None:
self.voiceChanger.emitTo(performance)
self.voiceChanger.settings.performance = [round(x * 1000) for x in performance]
except Exception as e:
print("[Voice Changer] ex:", e)
def getServerAudioDevice(self, audioDeviceList: list[ServerAudioDevice], index: int):
serverAudioDevice = [x for x in audioDeviceList if x.index == index]
if len(serverAudioDevice) > 0:
return serverAudioDevice[0]
else:
return None
def serverLocal(self, _vc: VoiceChanger):
self.voiceChanger = _vc
vc = self.voiceChanger
currentInputDeviceId = -1
currentModelSamplingRate = -1
currentOutputDeviceId = -1
currentInputChunkNum = -1
while True:
if vc.settings.serverAudioStated == 0 or vc.settings.serverInputDeviceId == -1 or vc is None:
vc.settings.inputSampleRate = 48000
time.sleep(2)
else:
sd._terminate()
sd._initialize()
sd.default.device[0] = vc.settings.serverInputDeviceId
currentInputDeviceId = vc.settings.serverInputDeviceId
sd.default.device[1] = vc.settings.serverOutputDeviceId
currentOutputDeviceId = vc.settings.serverOutputDeviceId
currentInputChannelNum = vc.settings.serverAudioInputDevices
serverInputAudioDevice = self.getServerAudioDevice(vc.settings.serverAudioInputDevices, currentInputDeviceId)
serverOutputAudioDevice = self.getServerAudioDevice(vc.settings.serverAudioOutputDevices, currentOutputDeviceId)
print(serverInputAudioDevice, serverOutputAudioDevice)
if serverInputAudioDevice is None or serverOutputAudioDevice is None:
time.sleep(2)
print("serverInputAudioDevice or serverOutputAudioDevice is None")
continue
currentInputChannelNum = serverInputAudioDevice.maxInputChannels
currentOutputChannelNum = serverOutputAudioDevice.maxOutputChannels
currentInputChunkNum = vc.settings.serverReadChunkSize
block_frame = currentInputChunkNum * 128
# sample rate precheck(alsa cannot use 40000?)
try:
currentModelSamplingRate = self.voiceChanger.voiceChangerModel.get_processing_sampling_rate()
except Exception as e:
print("[Voice Changer] ex: get_processing_sampling_rate", e)
continue
try:
with sd.Stream(
callback=self.audio_callback,
blocksize=block_frame,
samplerate=currentModelSamplingRate,
dtype="float32",
channels=[currentInputChannelNum, currentOutputChannelNum],
):
pass
vc.settings.serverInputAudioSampleRate = currentModelSamplingRate
vc.settings.inputSampleRate = currentModelSamplingRate
print(f"[Voice Changer] sample rate {vc.settings.serverInputAudioSampleRate}")
except Exception as e:
print(
"[Voice Changer] ex: fallback to device default samplerate",
e,
)
vc.settings.serverInputAudioSampleRate = serverInputAudioDevice.default_samplerate
vc.settings.inputSampleRate = vc.settings.serverInputAudioSampleRate
# main loop
try:
with sd.Stream(
callback=self.audio_callback,
blocksize=block_frame,
samplerate=vc.settings.serverInputAudioSampleRate,
dtype="float32",
channels=[currentInputChannelNum, currentOutputChannelNum],
):
while vc.settings.serverAudioStated == 1 and currentInputDeviceId == vc.settings.serverInputDeviceId and currentOutputDeviceId == vc.settings.serverOutputDeviceId and currentModelSamplingRate == self.voiceChanger.voiceChangerModel.get_processing_sampling_rate() and currentInputChunkNum == vc.settings.serverReadChunkSize:
time.sleep(2)
print(
"[Voice Changer] server audio",
vc.settings.performance,
)
print(
"[Voice Changer] info:",
vc.settings.serverAudioStated,
currentInputDeviceId,
currentOutputDeviceId,
vc.settings.serverInputAudioSampleRate,
currentInputChunkNum,
)
except Exception as e:
print("[Voice Changer] ex:", e)
time.sleep(2)

View File

@ -3,26 +3,26 @@ from const import EnumInferenceTypes, EnumEmbedderTypes
from dataclasses import dataclass from dataclasses import dataclass
@dataclass # @dataclass
class ModelSlot: # class ModelSlot:
modelFile: str = "" # modelFile: str = ""
indexFile: str = "" # indexFile: str = ""
defaultTune: int = 0 # defaultTune: int = 0
defaultIndexRatio: int = 1 # defaultIndexRatio: int = 1
defaultProtect: float = 0.5 # defaultProtect: float = 0.5
isONNX: bool = False # isONNX: bool = False
modelType: str = EnumInferenceTypes.pyTorchRVC.value # modelType: str = EnumInferenceTypes.pyTorchRVC.value
samplingRate: int = -1 # samplingRate: int = -1
f0: bool = True # f0: bool = True
embChannels: int = 256 # embChannels: int = 256
embOutputLayer: int = 9 # embOutputLayer: int = 9
useFinalProj: bool = True # useFinalProj: bool = True
deprecated: bool = False # deprecated: bool = False
embedder: str = EnumEmbedderTypes.hubert.value # embedder: str = EnumEmbedderTypes.hubert.value
name: str = "" # name: str = ""
description: str = "" # description: str = ""
credit: str = "" # credit: str = ""
termsOfUseUrl: str = "" # termsOfUseUrl: str = ""
sampleId: str = "" # sampleId: str = ""
iconFile: str = "" # iconFile: str = ""

View File

@ -1,9 +1,45 @@
from const import EnumEmbedderTypes, EnumInferenceTypes from const import UPLOAD_DIR, EnumEmbedderTypes, EnumInferenceTypes
from voice_changer.RVC.ModelSlot import ModelSlot
import torch import torch
import onnxruntime import onnxruntime
import json import json
import os
import shutil
from data.ModelSlot import ModelSlot, RVCModelSlot, saveSlotInfo
def setSlotAsRVC(model_dir: str, slot: int, paramDict):
slotInfo: RVCModelSlot = RVCModelSlot()
slotDir = os.path.join(model_dir, str(slot))
os.makedirs(slotDir, exist_ok=True)
print("RVC SLot Load", slot, paramDict)
for f in paramDict["files"]:
srcPath = os.path.join(UPLOAD_DIR, f["name"])
dstPath = os.path.join(slotDir, f["name"])
if f["kind"] == "rvcModel":
slotInfo.modelFile = dstPath
slotInfo.name = os.path.splitext(f["name"])[0]
elif f["kind"] == "rvcIndex":
slotInfo.indexFile = dstPath
else:
print(f"[Voice Changer] unknown file kind {f['kind']}")
shutil.move(srcPath, dstPath)
slotInfo.defaultTune = 0
slotInfo.defaultIndexRatio = 1
slotInfo.defaultProtect = 0.5
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
if slotInfo.isONNX:
_setInfoByONNX(slotInfo)
else:
_setInfoByPytorch(slotInfo)
saveSlotInfo(model_dir, slot, slotInfo)
print("[Voice Changer] new model added:", slotInfo)
def _setInfoByPytorch(slot: ModelSlot): def _setInfoByPytorch(slot: ModelSlot):
@ -15,22 +51,14 @@ def _setInfoByPytorch(slot: ModelSlot):
slot.f0 = True if cpt["f0"] == 1 else False slot.f0 = True if cpt["f0"] == 1 else False
version = cpt.get("version", "v1") version = cpt.get("version", "v1")
if version is None or version == "v1": if version is None or version == "v1":
slot.modelType = ( slot.modelType = EnumInferenceTypes.pyTorchRVC.value if slot.f0 else EnumInferenceTypes.pyTorchRVCNono.value
EnumInferenceTypes.pyTorchRVC.value
if slot.f0
else EnumInferenceTypes.pyTorchRVCNono.value
)
slot.embChannels = 256 slot.embChannels = 256
slot.embOutputLayer = 9 slot.embOutputLayer = 9
slot.useFinalProj = True slot.useFinalProj = True
slot.embedder = EnumEmbedderTypes.hubert.value slot.embedder = EnumEmbedderTypes.hubert.value
print("[Voice Changer] Official Model(pyTorch) : v1") print("[Voice Changer] Official Model(pyTorch) : v1")
else: else:
slot.modelType = ( slot.modelType = EnumInferenceTypes.pyTorchRVCv2.value if slot.f0 else EnumInferenceTypes.pyTorchRVCv2Nono.value
EnumInferenceTypes.pyTorchRVCv2.value
if slot.f0
else EnumInferenceTypes.pyTorchRVCv2Nono.value
)
slot.embChannels = 768 slot.embChannels = 768
slot.embOutputLayer = 12 slot.embOutputLayer = 12
slot.useFinalProj = False slot.useFinalProj = False
@ -40,37 +68,21 @@ def _setInfoByPytorch(slot: ModelSlot):
else: else:
# DDPN RVC # DDPN RVC
slot.f0 = True if cpt["f0"] == 1 else False slot.f0 = True if cpt["f0"] == 1 else False
slot.modelType = ( slot.modelType = EnumInferenceTypes.pyTorchWebUI.value if slot.f0 else EnumInferenceTypes.pyTorchWebUINono.value
EnumInferenceTypes.pyTorchWebUI.value
if slot.f0
else EnumInferenceTypes.pyTorchWebUINono.value
)
slot.embChannels = cpt["config"][17] slot.embChannels = cpt["config"][17]
slot.embOutputLayer = ( slot.embOutputLayer = cpt["embedder_output_layer"] if "embedder_output_layer" in cpt else 9
cpt["embedder_output_layer"] if "embedder_output_layer" in cpt else 9
)
if slot.embChannels == 256: if slot.embChannels == 256:
slot.useFinalProj = True slot.useFinalProj = True
else: else:
slot.useFinalProj = False slot.useFinalProj = False
# DDPNモデルの情報を表示 # DDPNモデルの情報を表示
if ( if slot.embChannels == 256 and slot.embOutputLayer == 9 and slot.useFinalProj is True:
slot.embChannels == 256
and slot.embOutputLayer == 9
and slot.useFinalProj is True
):
print("[Voice Changer] DDPN Model(pyTorch) : Official v1 like") print("[Voice Changer] DDPN Model(pyTorch) : Official v1 like")
elif ( elif slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False:
slot.embChannels == 768
and slot.embOutputLayer == 12
and slot.useFinalProj is False
):
print("[Voice Changer] DDPN Model(pyTorch): Official v2 like") print("[Voice Changer] DDPN Model(pyTorch): Official v2 like")
else: else:
print( print(f"[Voice Changer] DDPN Model(pyTorch): ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}")
f"[Voice Changer] DDPN Model(pyTorch): ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}"
)
slot.embedder = cpt["embedder_name"] slot.embedder = cpt["embedder_name"]
if slot.embedder.endswith("768"): if slot.embedder.endswith("768"):
@ -91,9 +103,8 @@ def _setInfoByPytorch(slot: ModelSlot):
def _setInfoByONNX(slot: ModelSlot): def _setInfoByONNX(slot: ModelSlot):
tmp_onnx_session = onnxruntime.InferenceSession( print("......................................_setInfoByONNX")
slot.modelFile, providers=["CPUExecutionProvider"] tmp_onnx_session = onnxruntime.InferenceSession(slot.modelFile, providers=["CPUExecutionProvider"])
)
modelmeta = tmp_onnx_session.get_modelmeta() modelmeta = tmp_onnx_session.get_modelmeta()
try: try:
metadata = json.loads(modelmeta.custom_metadata_map["metadata"]) metadata = json.loads(modelmeta.custom_metadata_map["metadata"])
@ -101,16 +112,8 @@ def _setInfoByONNX(slot: ModelSlot):
# slot.modelType = metadata["modelType"] # slot.modelType = metadata["modelType"]
slot.embChannels = metadata["embChannels"] slot.embChannels = metadata["embChannels"]
slot.embOutputLayer = ( slot.embOutputLayer = metadata["embOutputLayer"] if "embOutputLayer" in metadata else 9
metadata["embOutputLayer"] if "embOutputLayer" in metadata else 9 slot.useFinalProj = metadata["useFinalProj"] if "useFinalProj" in metadata else True if slot.embChannels == 256 else False
)
slot.useFinalProj = (
metadata["useFinalProj"]
if "useFinalProj" in metadata
else True
if slot.embChannels == 256
else False
)
if slot.embChannels == 256: if slot.embChannels == 256:
slot.useFinalProj = True slot.useFinalProj = True
@ -118,22 +121,12 @@ def _setInfoByONNX(slot: ModelSlot):
slot.useFinalProj = False slot.useFinalProj = False
# ONNXモデルの情報を表示 # ONNXモデルの情報を表示
if ( if slot.embChannels == 256 and slot.embOutputLayer == 9 and slot.useFinalProj is True:
slot.embChannels == 256
and slot.embOutputLayer == 9
and slot.useFinalProj is True
):
print("[Voice Changer] ONNX Model: Official v1 like") print("[Voice Changer] ONNX Model: Official v1 like")
elif ( elif slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False:
slot.embChannels == 768
and slot.embOutputLayer == 12
and slot.useFinalProj is False
):
print("[Voice Changer] ONNX Model: Official v2 like") print("[Voice Changer] ONNX Model: Official v2 like")
else: else:
print( print(f"[Voice Changer] ONNX Model: ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}")
f"[Voice Changer] ONNX Model: ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}"
)
if "embedder" not in metadata: if "embedder" not in metadata:
slot.embedder = EnumEmbedderTypes.hubert.value slot.embedder = EnumEmbedderTypes.hubert.value
@ -149,11 +142,9 @@ def _setInfoByONNX(slot: ModelSlot):
# raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder") # raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder")
slot.f0 = metadata["f0"] slot.f0 = metadata["f0"]
slot.modelType = ( print("slot.modelType1", slot.modelType)
EnumInferenceTypes.onnxRVC.value slot.modelType = EnumInferenceTypes.onnxRVC.value if slot.f0 else EnumInferenceTypes.onnxRVCNono.value
if slot.f0 print("slot.modelType2", slot.modelType)
else EnumInferenceTypes.onnxRVCNono.value
)
slot.samplingRate = metadata["samplingRate"] slot.samplingRate = metadata["samplingRate"]
slot.deprecated = False slot.deprecated = False

View File

@ -1,14 +1,12 @@
import sys import sys
import os import os
from dataclasses import asdict from dataclasses import dataclass, asdict
from typing import cast from typing import cast
import numpy as np import numpy as np
import torch import torch
import torchaudio import torchaudio
from ModelSample import getModelSamples from data.ModelSlot import loadSlotInfo
from voice_changer.RVC.ModelSlot import ModelSlot from voice_changer.RVC.RVCSlotInfo import RVCSlotInfo
from voice_changer.RVC.SampleDownloader import downloadModelFiles
# avoiding parse arg error in RVC # avoiding parse arg error in RVC
sys.argv = ["MMVCServerSIO.py"] sys.argv = ["MMVCServerSIO.py"]
@ -31,7 +29,6 @@ from voice_changer.RVC.ModelSlotGenerator import (
) )
from voice_changer.RVC.RVCSettings import RVCSettings from voice_changer.RVC.RVCSettings import RVCSettings
from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager
from voice_changer.utils.LoadModelParams import LoadModelParams
from voice_changer.utils.VoiceChangerModel import AudioInOut from voice_changer.utils.VoiceChangerModel import AudioInOut
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
from voice_changer.RVC.onnxExporter.export2onnx import export2onnx from voice_changer.RVC.onnxExporter.export2onnx import export2onnx
@ -40,147 +37,104 @@ from voice_changer.RVC.pipeline.PipelineGenerator import createPipeline
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
from voice_changer.RVC.pipeline.Pipeline import Pipeline from voice_changer.RVC.pipeline.Pipeline import Pipeline
from Exceptions import DeviceCannotSupportHalfPrecisionException, NoModeLoadedException from Exceptions import DeviceCannotSupportHalfPrecisionException
from const import ( from const import (
RVC_MODEL_DIRNAME,
UPLOAD_DIR, UPLOAD_DIR,
getRVCSampleJsonAndModelIds,
) )
import shutil import shutil
import json import json
class RVC: class RVC:
initialLoad: bool = True # initialLoad: bool = True
settings: RVCSettings = RVCSettings() # currentSlot: int = 0
# needSwitch: bool = False
pipeline: Pipeline | None = None def __init__(self, slotIndex: int, params: VoiceChangerParams):
self.params: VoiceChangerParams = params
deviceManager = DeviceManager.get_instance() self.settings: RVCSettings = RVCSettings()
self.deviceManager = DeviceManager.get_instance()
audio_buffer: AudioInOut | None = None
prevVol: float = 0
params: VoiceChangerParams
currentSlot: int = 0
needSwitch: bool = False
def __init__(self, params: VoiceChangerParams):
self.pitchExtractor = PitchExtractorManager.getPitchExtractor(
self.settings.f0Detector
)
self.params = params
EmbedderManager.initialize(params) EmbedderManager.initialize(params)
self.loadSlots()
print("[Voice Changer] RVC initialization: ", params)
# サンプルカタログ作成 self.pipeline: Pipeline | None = None
sampleJsons: list[str] = [] self.pitchExtractor = PitchExtractorManager.getPitchExtractor(self.settings.f0Detector)
sampleJsonUrls, _sampleModels = getRVCSampleJsonAndModelIds(
params.rvc_sample_mode
)
for url in sampleJsonUrls:
filename = os.path.basename(url)
sampleJsons.append(filename)
sampleModels = getModelSamples(sampleJsons, "RVC")
if sampleModels is not None:
self.settings.sampleModels = sampleModels
# 起動時にスロットにモデルがある場合はロードしておく self.audio_buffer: AudioInOut | None = None
if len(self.settings.modelSlots) > 0:
for i, slot in enumerate(self.settings.modelSlots): print("[Voice Changer] RVC Slot initialization. global params:", params)
if len(slot.modelFile) > 0:
self.prepareModel(i) self.slotIndex = slotIndex
self.settings.modelSlotIndex = i self.slotInfo: RVCSlotInfo = loadSlotInfo(params.model_dir, self.slotIndex)
self.switchModel(self.settings.modelSlotIndex)
self.initialLoad = False
break
self.prevVol = 0.0 self.prevVol = 0.0
print("[Voice Changer] RVC Slot initialization. slot info:", self.slotInfo)
def getSampleInfo(self, id: str): # def loadModel(self, props: LoadModelParams):
sampleInfos = list(filter(lambda x: x.id == id, self.settings.sampleModels)) # target_slot_idx = props.slot
if len(sampleInfos) > 0: # params = props.params
return sampleInfos[0] # slotInfo: ModelSlot = ModelSlot()
else:
None
def moveToModelDir(self, file: str, dstDir: str): # print("loadModel", params)
dst = os.path.join(dstDir, os.path.basename(file)) # # サンプルが指定されたときはダウンロードしてメタデータをでっちあげる
if os.path.exists(dst): # if len(params["sampleId"]) > 0:
os.remove(dst) # sampleId = params["sampleId"]
shutil.move(file, dst) # sampleInfo = self.getSampleInfo(sampleId)
return dst # useIndex = params["rvcIndexDownload"]
def loadModel(self, props: LoadModelParams): # if sampleInfo is None:
target_slot_idx = props.slot # print("[Voice Changer] sampleInfo is None")
params = props.params # return
slotInfo: ModelSlot = ModelSlot() # modelPath, indexPath, iconPath = downloadModelFiles(sampleInfo, useIndex)
# slotInfo.modelFile = modelPath
# if indexPath is not None:
# slotInfo.indexFile = indexPath
# if iconPath is not None:
# slotInfo.iconFile = iconPath
print("loadModel", params) # slotInfo.sampleId = sampleInfo.id
# サンプルが指定されたときはダウンロードしてメタデータをでっちあげる # slotInfo.credit = sampleInfo.credit
if len(params["sampleId"]) > 0: # slotInfo.description = sampleInfo.description
sampleId = params["sampleId"] # slotInfo.name = sampleInfo.name
sampleInfo = self.getSampleInfo(sampleId) # slotInfo.termsOfUseUrl = sampleInfo.termsOfUseUrl
useIndex = params["rvcIndexDownload"]
if sampleInfo is None: # # slotInfo.samplingRate = sampleInfo.sampleRate
print("[Voice Changer] sampleInfo is None") # # slotInfo.modelType = sampleInfo.modelType
return # # slotInfo.f0 = sampleInfo.f0
modelPath, indexPath, iconPath = downloadModelFiles(sampleInfo, useIndex) # else:
slotInfo.modelFile = modelPath # slotInfo.modelFile = params["files"]["rvcModel"]
if indexPath is not None: # slotInfo.indexFile = params["files"]["rvcIndex"] if "rvcIndex" in params["files"] else None
slotInfo.indexFile = indexPath
if iconPath is not None:
slotInfo.iconFile = iconPath
slotInfo.sampleId = sampleInfo.id # slotInfo.defaultTune = params["defaultTune"]
slotInfo.credit = sampleInfo.credit # slotInfo.defaultIndexRatio = params["defaultIndexRatio"]
slotInfo.description = sampleInfo.description # slotInfo.defaultProtect = params["defaultProtect"]
slotInfo.name = sampleInfo.name # slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
slotInfo.termsOfUseUrl = sampleInfo.termsOfUseUrl
# slotInfo.samplingRate = sampleInfo.sampleRate # if slotInfo.isONNX:
# slotInfo.modelType = sampleInfo.modelType # _setInfoByONNX(slotInfo)
# slotInfo.f0 = sampleInfo.f0 # else:
else: # _setInfoByPytorch(slotInfo)
slotInfo.modelFile = params["files"]["rvcModel"]
slotInfo.indexFile = (
params["files"]["rvcIndex"] if "rvcIndex" in params["files"] else None
)
slotInfo.defaultTune = params["defaultTune"] # # メタデータを見て、永続化モデルフォルダに移動させる
slotInfo.defaultIndexRatio = params["defaultIndexRatio"] # # その際に、メタデータのファイル格納場所も書き換える
slotInfo.defaultProtect = params["defaultProtect"] # slotDir = os.path.join(self.params.model_dir, RVC_MODEL_DIRNAME, str(target_slot_idx))
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx") # os.makedirs(slotDir, exist_ok=True)
# slotInfo.modelFile = self.moveToModelDir(slotInfo.modelFile, slotDir)
# if slotInfo.indexFile is not None and len(slotInfo.indexFile) > 0:
# slotInfo.indexFile = self.moveToModelDir(slotInfo.indexFile, slotDir)
# if slotInfo.iconFile is not None and len(slotInfo.iconFile) > 0:
# slotInfo.iconFile = self.moveToModelDir(slotInfo.iconFile, slotDir)
# json.dump(asdict(slotInfo), open(os.path.join(slotDir, "params.json"), "w"))
# self.loadSlots()
if slotInfo.isONNX: # # 初回のみロード(起動時にスロットにモデルがあった場合はinitialLoadはFalseになっている)
_setInfoByONNX(slotInfo) # if self.initialLoad:
else: # self.prepareModel(target_slot_idx)
_setInfoByPytorch(slotInfo) # self.settings.modelSlotIndex = target_slot_idx
# self.switchModel(self.settings.modelSlotIndex)
# self.initialLoad = False
# elif target_slot_idx == self.currentSlot:
# self.prepareModel(target_slot_idx)
# メタデータを見て、永続化モデルフォルダに移動させる # return self.get_info()
# その際に、メタデータのファイル格納場所も書き換える
slotDir = os.path.join(
self.params.model_dir, RVC_MODEL_DIRNAME, str(target_slot_idx)
)
os.makedirs(slotDir, exist_ok=True)
slotInfo.modelFile = self.moveToModelDir(slotInfo.modelFile, slotDir)
if slotInfo.indexFile is not None and len(slotInfo.indexFile) > 0:
slotInfo.indexFile = self.moveToModelDir(slotInfo.indexFile, slotDir)
if slotInfo.iconFile is not None and len(slotInfo.iconFile) > 0:
slotInfo.iconFile = self.moveToModelDir(slotInfo.iconFile, slotDir)
json.dump(asdict(slotInfo), open(os.path.join(slotDir, "params.json"), "w"))
self.loadSlots()
# 初回のみロード(起動時にスロットにモデルがあった場合はinitialLoadはFalseになっている)
if self.initialLoad:
self.prepareModel(target_slot_idx)
self.settings.modelSlotIndex = target_slot_idx
self.switchModel(self.settings.modelSlotIndex)
self.initialLoad = False
elif target_slot_idx == self.currentSlot:
self.prepareModel(target_slot_idx)
return self.get_info()
def loadSlots(self): def loadSlots(self):
dirname = os.path.join(self.params.model_dir, RVC_MODEL_DIRNAME) dirname = os.path.join(self.params.model_dir, RVC_MODEL_DIRNAME)
@ -189,9 +143,7 @@ class RVC:
modelSlots: list[ModelSlot] = [] modelSlots: list[ModelSlot] = []
for slot_idx in range(len(self.settings.modelSlots)): for slot_idx in range(len(self.settings.modelSlots)):
slotDir = os.path.join( slotDir = os.path.join(self.params.model_dir, RVC_MODEL_DIRNAME, str(slot_idx))
self.params.model_dir, RVC_MODEL_DIRNAME, str(slot_idx)
)
jsonDict = os.path.join(slotDir, "params.json") jsonDict = os.path.join(slotDir, "params.json")
if os.path.exists(jsonDict): if os.path.exists(jsonDict):
jsonDict = json.load(open(os.path.join(slotDir, "params.json"))) jsonDict = json.load(open(os.path.join(slotDir, "params.json")))
@ -205,76 +157,56 @@ class RVC:
if key in self.settings.intData: if key in self.settings.intData:
# 設定前処理 # 設定前処理
val = cast(int, val) val = cast(int, val)
if key == "modelSlotIndex":
if val < 0:
return True
val = val % 1000 # Quick hack for same slot is selected
if (
self.settings.modelSlots[val].modelFile is None
or self.settings.modelSlots[val].modelFile == ""
):
print("[Voice Changer] slot does not have model.")
return True
self.prepareModel(val)
# 設定 # 設定
setattr(self.settings, key, val) setattr(self.settings, key, val)
if key == "gpu": if key == "gpu":
self.deviceManager.setForceTensor(False) self.deviceManager.setForceTensor(False)
self.prepareModel(self.settings.modelSlotIndex) self.prepareModel()
elif key in self.settings.floatData: elif key in self.settings.floatData:
setattr(self.settings, key, float(val)) setattr(self.settings, key, float(val))
elif key in self.settings.strData: elif key in self.settings.strData:
setattr(self.settings, key, str(val)) setattr(self.settings, key, str(val))
if key == "f0Detector" and self.pipeline is not None: if key == "f0Detector" and self.pipeline is not None:
pitchExtractor = PitchExtractorManager.getPitchExtractor( pitchExtractor = PitchExtractorManager.getPitchExtractor(self.settings.f0Detector)
self.settings.f0Detector
)
self.pipeline.setPitchExtractor(pitchExtractor) self.pipeline.setPitchExtractor(pitchExtractor)
else: else:
return False return False
return True return True
def prepareModel(self, slot: int): def prepareModel(self):
if slot < 0: print("[Voice Changer] Prepare Model of slot:", self.slotIndex)
print("[Voice Changer] Prepare Model of slot skip:", slot)
return self.get_info()
modelSlot = self.settings.modelSlots[slot]
print("[Voice Changer] Prepare Model of slot:", slot)
# pipelineの生成 # pipelineの生成
self.next_pipeline = createPipeline( self.pipeline = createPipeline(self.slotInfo, self.settings.gpu, self.settings.f0Detector)
modelSlot, self.settings.gpu, self.settings.f0Detector
)
# その他の設定 # # その他の設定
self.next_trans = modelSlot.defaultTune # self.next_trans = modelSlot.defaultTune
self.next_index_ratio = modelSlot.defaultIndexRatio # self.next_index_ratio = modelSlot.defaultIndexRatio
self.next_protect = modelSlot.defaultProtect # self.next_protect = modelSlot.defaultProtect
self.next_samplingRate = modelSlot.samplingRate # self.next_samplingRate = modelSlot.samplingRate
self.next_framework = "ONNX" if modelSlot.isONNX else "PyTorch" # self.next_framework = "ONNX" if modelSlot.isONNX else "PyTorch"
# self.needSwitch = True # # self.needSwitch = True
print("[Voice Changer] Prepare done.") # print("[Voice Changer] Prepare done.")
self.switchModel(slot) # self.switchModel(slot)
return self.get_info() return self.get_info()
def switchModel(self, slot: int): # def switchModel(self, slot: int):
print("[Voice Changer] Switching model..") # print("[Voice Changer] Switching model..")
self.pipeline = self.next_pipeline # self.pipeline = self.next_pipeline
self.settings.tran = self.next_trans # self.settings.tran = self.next_trans
self.settings.indexRatio = self.next_index_ratio # self.settings.indexRatio = self.next_index_ratio
self.settings.protect = self.next_protect # self.settings.protect = self.next_protect
self.settings.modelSamplingRate = self.next_samplingRate # self.settings.modelSamplingRate = self.next_samplingRate
self.settings.framework = self.next_framework # self.settings.framework = self.next_framework
# self.currentSlot = self.settings.modelSlotIndex # prepareModelから呼ばれるということはupdate_settingsの中で呼ばれるということなので、まだmodelSlotIndexは更新されていない # # self.currentSlot = self.settings.modelSlotIndex # prepareModelから呼ばれるということはupdate_settingsの中で呼ばれるということなので、まだmodelSlotIndexは更新されていない
self.currentSlot = slot # self.currentSlot = slot
print( # print(
"[Voice Changer] Switching model..done", # "[Voice Changer] Switching model..done",
) # )
def get_info(self): def get_info(self):
data = asdict(self.settings) data = asdict(self.settings)
@ -293,9 +225,7 @@ class RVC:
crossfadeSize: int, crossfadeSize: int,
solaSearchFrame: int = 0, solaSearchFrame: int = 0,
): ):
newData = ( newData = newData.astype(np.float32) / 32768.0 # RVCのモデルのサンプリングレートで入ってきている。extraDataLength, Crossfade等も同じSRで処理(★1)
newData.astype(np.float32) / 32768.0
) # RVCのモデルのサンプリングレートで入ってきている。extraDataLength, Crossfade等も同じSRで処理(★1)
if self.audio_buffer is not None: if self.audio_buffer is not None:
# 過去のデータに連結 # 過去のデータに連結
@ -303,18 +233,14 @@ class RVC:
else: else:
self.audio_buffer = newData self.audio_buffer = newData
convertSize = ( convertSize = inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
)
if convertSize % 128 != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。 if convertSize % 128 != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
convertSize = convertSize + (128 - (convertSize % 128)) convertSize = convertSize + (128 - (convertSize % 128))
# バッファがたまっていない場合はzeroで補う # バッファがたまっていない場合はzeroで補う
if self.audio_buffer.shape[0] < convertSize: if self.audio_buffer.shape[0] < convertSize:
self.audio_buffer = np.concatenate( self.audio_buffer = np.concatenate([np.zeros([convertSize]), self.audio_buffer])
[np.zeros([convertSize]), self.audio_buffer]
)
convertOffset = -1 * convertSize convertOffset = -1 * convertSize
self.audio_buffer = self.audio_buffer[convertOffset:] # 変換対象の部分だけ抽出 self.audio_buffer = self.audio_buffer[convertOffset:] # 変換対象の部分だけ抽出
@ -324,9 +250,7 @@ class RVC:
else: else:
device = torch.device("cpu") device = torch.device("cpu")
audio_buffer = torch.from_numpy(self.audio_buffer).to( audio_buffer = torch.from_numpy(self.audio_buffer).to(device=device, dtype=torch.float32)
device=device, dtype=torch.float32
)
# 出力部分だけ切り出して音量を確認。(TODO:段階的消音にする) # 出力部分だけ切り出して音量を確認。(TODO:段階的消音にする)
cropOffset = -1 * (inputSize + crossfadeSize) cropOffset = -1 * (inputSize + crossfadeSize)
@ -339,21 +263,14 @@ class RVC:
return (audio_buffer, convertSize, vol) return (audio_buffer, convertSize, vol)
def inference(self, data): def inference(self, data):
if self.settings.modelSlotIndex < 0: # if self.settings.modelSlotIndex < 0:
print(
"[Voice Changer] wait for loading model...",
self.settings.modelSlotIndex,
self.currentSlot,
)
raise NoModeLoadedException("model_common")
# if self.needSwitch:
# print( # print(
# f"[Voice Changer] Switch model {self.currentSlot} -> {self.settings.modelSlotIndex}" # "[Voice Changer] wait for loading model...",
# self.settings.modelSlotIndex,
# self.currentSlot,
# ) # )
# self.switchModel() # raise NoModeLoadedException("model_common")
# self.needSwitch = False
# half = self.deviceManager.halfPrecisionAvailable(self.settings.gpu)
half = self.pipeline.isHalf half = self.pipeline.isHalf
audio = data[0] audio = data[0]
@ -363,18 +280,16 @@ class RVC:
if vol < self.settings.silentThreshold: if vol < self.settings.silentThreshold:
return np.zeros(convertSize).astype(np.int16) return np.zeros(convertSize).astype(np.int16)
audio = torchaudio.functional.resample( audio = torchaudio.functional.resample(audio, self.settings.modelSamplingRate, 16000, rolloff=0.99)
audio, self.settings.modelSamplingRate, 16000, rolloff=0.99
)
repeat = 3 if half else 1 repeat = 3 if half else 1
repeat *= self.settings.rvcQuality # 0 or 3 repeat *= self.settings.rvcQuality # 0 or 3
sid = 0 sid = 0
f0_up_key = self.settings.tran f0_up_key = self.settings.tran
index_rate = self.settings.indexRatio index_rate = self.settings.indexRatio
protect = self.settings.protect protect = self.settings.protect
if_f0 = 1 if self.settings.modelSlots[self.currentSlot].f0 else 0 if_f0 = 1 if self.slotInfo.f0 else 0
embOutputLayer = self.settings.modelSlots[self.currentSlot].embOutputLayer embOutputLayer = self.slotInfo.embOutputLayer
useFinalProj = self.settings.modelSlots[self.currentSlot].useFinalProj useFinalProj = self.slotInfo.useFinalProj
try: try:
audio_out = self.pipeline.exec( audio_out = self.pipeline.exec(
sid, sid,
@ -382,8 +297,7 @@ class RVC:
f0_up_key, f0_up_key,
index_rate, index_rate,
if_f0, if_f0,
self.settings.extraConvertSize self.settings.extraConvertSize / self.settings.modelSamplingRate, # extaraDataSizeの秒数。RVCのモデルのサンプリングレートで処理(★1)。
/ self.settings.modelSamplingRate, # extaraDataSizeの秒数。RVCのモデルのサンプリングレートで処理(★1)。
embOutputLayer, embOutputLayer,
useFinalProj, useFinalProj,
repeat, repeat,
@ -393,9 +307,7 @@ class RVC:
return result return result
except DeviceCannotSupportHalfPrecisionException as e: except DeviceCannotSupportHalfPrecisionException as e:
print( print("[Device Manager] Device cannot support half precision. Fallback to float....")
"[Device Manager] Device cannot support half precision. Fallback to float...."
)
self.deviceManager.setForceTensor(True) self.deviceManager.setForceTensor(True)
self.prepareModel(self.settings.modelSlotIndex) self.prepareModel(self.settings.modelSlotIndex)
raise e raise e
@ -405,7 +317,7 @@ class RVC:
def __del__(self): def __del__(self):
del self.pipeline del self.pipeline
# print("---------- REMOVING ---------------") print("---------- REMOVING ---------------")
remove_path = os.path.join("RVC") remove_path = os.path.join("RVC")
sys.path = [x for x in sys.path if x.endswith(remove_path) is False] sys.path = [x for x in sys.path if x.endswith(remove_path) is False]
@ -461,9 +373,7 @@ class RVC:
"sampleId": "", "sampleId": "",
"files": {"rvcModel": storeFile}, "files": {"rvcModel": storeFile},
} }
props: LoadModelParams = LoadModelParams( props: LoadModelParams = LoadModelParams(slot=targetSlot, isHalf=True, params=params)
slot=targetSlot, isHalf=True, params=params
)
self.loadModel(props) self.loadModel(props)
self.prepareModel(targetSlot) self.prepareModel(targetSlot)
self.settings.modelSlotIndex = targetSlot self.settings.modelSlotIndex = targetSlot
@ -471,12 +381,8 @@ class RVC:
def update_model_default(self): def update_model_default(self):
print("[Voice Changer] UPDATE MODEL DEFAULT!!") print("[Voice Changer] UPDATE MODEL DEFAULT!!")
slotDir = os.path.join( slotDir = os.path.join(self.params.model_dir, RVC_MODEL_DIRNAME, str(self.currentSlot))
self.params.model_dir, RVC_MODEL_DIRNAME, str(self.currentSlot) params = json.load(open(os.path.join(slotDir, "params.json"), "r", encoding="utf-8"))
)
params = json.load(
open(os.path.join(slotDir, "params.json"), "r", encoding="utf-8")
)
params["defaultTune"] = self.settings.tran params["defaultTune"] = self.settings.tran
params["defaultIndexRatio"] = self.settings.indexRatio params["defaultIndexRatio"] = self.settings.indexRatio
params["defaultProtect"] = self.settings.protect params["defaultProtect"] = self.settings.protect
@ -488,14 +394,10 @@ class RVC:
print("[Voice Changer] UPDATE MODEL INFO", newData) print("[Voice Changer] UPDATE MODEL INFO", newData)
newDataDict = json.loads(newData) newDataDict = json.loads(newData)
try: try:
slotDir = os.path.join( slotDir = os.path.join(self.params.model_dir, RVC_MODEL_DIRNAME, str(newDataDict["slot"]))
self.params.model_dir, RVC_MODEL_DIRNAME, str(newDataDict["slot"])
)
except Exception as e: except Exception as e:
print("Exception::::", e) print("Exception::::", e)
params = json.load( params = json.load(open(os.path.join(slotDir, "params.json"), "r", encoding="utf-8"))
open(os.path.join(slotDir, "params.json"), "r", encoding="utf-8")
)
params[newDataDict["key"]] = newDataDict["val"] params[newDataDict["key"]] = newDataDict["val"]
json.dump(params, open(os.path.join(slotDir, "params.json"), "w")) json.dump(params, open(os.path.join(slotDir, "params.json"), "w"))
self.loadSlots() self.loadSlots()
@ -504,9 +406,7 @@ class RVC:
print("[Voice Changer] UPLOAD ASSETS", params) print("[Voice Changer] UPLOAD ASSETS", params)
paramsDict = json.loads(params) paramsDict = json.loads(params)
uploadPath = os.path.join(UPLOAD_DIR, paramsDict["file"]) uploadPath = os.path.join(UPLOAD_DIR, paramsDict["file"])
storeDir = os.path.join( storeDir = os.path.join(self.params.model_dir, RVC_MODEL_DIRNAME, str(paramsDict["slot"]))
self.params.model_dir, RVC_MODEL_DIRNAME, str(paramsDict["slot"])
)
storePath = os.path.join( storePath = os.path.join(
storeDir, storeDir,
paramsDict["file"], paramsDict["file"],

View File

@ -1,8 +1,4 @@
from dataclasses import dataclass, field from dataclasses import dataclass, field
from ModelSample import RVCModelSample
from const import RVC_MAX_SLOT_NUM
from voice_changer.RVC.ModelSlot import ModelSlot
@dataclass @dataclass
@ -17,11 +13,6 @@ class RVCSettings:
clusterInferRatio: float = 0.1 clusterInferRatio: float = 0.1
framework: str = "PyTorch" # PyTorch or ONNX framework: str = "PyTorch" # PyTorch or ONNX
modelSlots: list[ModelSlot] = field(
default_factory=lambda: [ModelSlot() for _x in range(RVC_MAX_SLOT_NUM)]
)
sampleModels: list[RVCModelSample] = field(default_factory=lambda: [])
indexRatio: float = 0 indexRatio: float = 0
protect: float = 0.5 protect: float = 0.5

View File

@ -0,0 +1,28 @@
from dataclasses import dataclass
from voice_changer.VoiceChanger import SlotInfo
@dataclass
class RVCSlotInfo(SlotInfo):
modelFile: str = ""
indexFile: str = ""
defaultTune: int = 0
defaultIndexRatio: float = 0
defaultProtect: float = 1
isONNX: bool = False
modelType: str = ""
samplingRate: int = 40000
f0: bool = True
embChannels: int = 256
embOutputLayer: int = 12
useFinalProj: bool = False
deprecated: bool = False
embedder: str = ""
name: str = ""
description: str = ""
credit: str = ""
termsOfUseUrl: str = ""
sampleId: str = ""
iconFile: str = ""

View File

@ -1,174 +1,52 @@
from concurrent.futures import ThreadPoolExecutor # from concurrent.futures import ThreadPoolExecutor
from dataclasses import asdict # from dataclasses import asdict
import os # import os
from const import RVC_MODEL_DIRNAME, TMP_DIR # from const import RVC_MODEL_DIRNAME, TMP_DIR
from Downloader import download, download_no_tqdm # from Downloader import download, download_no_tqdm
from ModelSample import RVCModelSample, getModelSamples # from ModelSample import RVCModelSample, getModelSamples
import json # import json
from voice_changer.RVC.ModelSlot import ModelSlot # from voice_changer.RVC.ModelSlot import ModelSlot
from voice_changer.RVC.ModelSlotGenerator import _setInfoByONNX, _setInfoByPytorch # from voice_changer.RVC.ModelSlotGenerator import _setInfoByONNX, _setInfoByPytorch
def checkRvcModelExist(model_dir: str): # def downloadModelFiles(sampleInfo: RVCModelSample, useIndex: bool = True):
rvcModelDir = os.path.join(model_dir, RVC_MODEL_DIRNAME) # downloadParams = []
if not os.path.exists(rvcModelDir):
return False
return True
# modelPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.modelUrl))
# downloadParams.append(
# {
# "url": sampleInfo.modelUrl,
# "saveTo": modelPath,
# "position": 0,
# }
# )
def downloadInitialSampleModels( # indexPath = None
sampleJsons: list[str], sampleModelIds: list[str], model_dir: str # if useIndex is True and hasattr(sampleInfo, "indexUrl") and sampleInfo.indexUrl != "":
): # print("[Voice Changer] Download sample with index.")
sampleModels = getModelSamples(sampleJsons, "RVC") # indexPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.indexUrl))
if sampleModels is None: # downloadParams.append(
return # {
# "url": sampleInfo.indexUrl,
# "saveTo": indexPath,
# "position": 1,
# }
# )
downloadParams = [] # iconPath = None
slot_count = 0 # if hasattr(sampleInfo, "icon") and sampleInfo.icon != "":
line_num = 0 # iconPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.icon))
for initSampleId in sampleModelIds: # downloadParams.append(
# 初期サンプルをサーチ # {
match = False # "url": sampleInfo.icon,
for sample in sampleModels: # "saveTo": iconPath,
if sample.id == initSampleId[0]: # "position": 2,
match = True # }
break # )
if match is False:
print(f"[Voice Changer] initiail sample not found. {initSampleId[0]}")
continue
# 検出されたら、、、 # print("[Voice Changer] Downloading model files...", end="")
slotInfo: ModelSlot = ModelSlot() # with ThreadPoolExecutor() as pool:
# sampleParams: Any = {"files": {}} # pool.map(download_no_tqdm, downloadParams)
# print("")
slotDir = os.path.join(model_dir, RVC_MODEL_DIRNAME, str(slot_count)) # return modelPath, indexPath, iconPath
os.makedirs(slotDir, exist_ok=True)
modelFilePath = os.path.join(
slotDir,
os.path.basename(sample.modelUrl),
)
downloadParams.append(
{
"url": sample.modelUrl,
"saveTo": modelFilePath,
"position": line_num,
}
)
slotInfo.modelFile = modelFilePath
line_num += 1
if (
initSampleId[1] is True
and hasattr(sample, "indexUrl")
and sample.indexUrl != ""
):
indexPath = os.path.join(
slotDir,
os.path.basename(sample.indexUrl),
)
downloadParams.append(
{
"url": sample.indexUrl,
"saveTo": indexPath,
"position": line_num,
}
)
slotInfo.indexFile = indexPath
line_num += 1
if hasattr(sample, "icon") and sample.icon != "":
iconPath = os.path.join(
slotDir,
os.path.basename(sample.icon),
)
downloadParams.append(
{
"url": sample.icon,
"saveTo": iconPath,
"position": line_num,
}
)
slotInfo.iconFile = iconPath
line_num += 1
slotInfo.sampleId = sample.id
slotInfo.credit = sample.credit
slotInfo.description = sample.description
slotInfo.name = sample.name
slotInfo.termsOfUseUrl = sample.termsOfUseUrl
slotInfo.defaultTune = 0
slotInfo.defaultIndexRatio = 1
slotInfo.defaultProtect = 0.5
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
# この時点ではまだファイルはダウンロードされていない
# if slotInfo.isONNX:
# _setInfoByONNX(slotInfo)
# else:
# _setInfoByPytorch(slotInfo)
json.dump(asdict(slotInfo), open(os.path.join(slotDir, "params.json"), "w"))
slot_count += 1
# ダウンロード
print("[Voice Changer] Downloading model files...")
with ThreadPoolExecutor() as pool:
pool.map(download, downloadParams)
# メタデータ作成
print("[Voice Changer] Generating metadata...")
for slotId in range(slot_count):
slotDir = os.path.join(model_dir, RVC_MODEL_DIRNAME, str(slotId))
jsonDict = json.load(open(os.path.join(slotDir, "params.json")))
slotInfo = ModelSlot(**jsonDict)
if slotInfo.isONNX:
_setInfoByONNX(slotInfo)
else:
_setInfoByPytorch(slotInfo)
json.dump(asdict(slotInfo), open(os.path.join(slotDir, "params.json"), "w"))
def downloadModelFiles(sampleInfo: RVCModelSample, useIndex: bool = True):
downloadParams = []
modelPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.modelUrl))
downloadParams.append(
{
"url": sampleInfo.modelUrl,
"saveTo": modelPath,
"position": 0,
}
)
indexPath = None
if (
useIndex is True
and hasattr(sampleInfo, "indexUrl")
and sampleInfo.indexUrl != ""
):
print("[Voice Changer] Download sample with index.")
indexPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.indexUrl))
downloadParams.append(
{
"url": sampleInfo.indexUrl,
"saveTo": indexPath,
"position": 1,
}
)
iconPath = None
if hasattr(sampleInfo, "icon") and sampleInfo.icon != "":
iconPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.icon))
downloadParams.append(
{
"url": sampleInfo.icon,
"saveTo": iconPath,
"position": 2,
}
)
print("[Voice Changer] Downloading model files...", end="")
with ThreadPoolExecutor() as pool:
pool.map(download_no_tqdm, downloadParams)
print("")
return modelPath, indexPath, iconPath

View File

@ -14,7 +14,7 @@ class RVCInferencer(Inferencer):
dev = DeviceManager.get_instance().getDevice(gpu) dev = DeviceManager.get_instance().getDevice(gpu)
isHalf = DeviceManager.get_instance().halfPrecisionAvailable(gpu) isHalf = DeviceManager.get_instance().halfPrecisionAvailable(gpu)
print("LLLLLLLLLLLLLLLOOOOOOOOOOOOOOOOOOOOOOO", file)
cpt = torch.load(file, map_location="cpu") cpt = torch.load(file, map_location="cpu")
model = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=isHalf) model = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=isHalf)

View File

@ -4,7 +4,7 @@ import torch
from onnxsim import simplify from onnxsim import simplify
import onnx import onnx
from const import TMP_DIR, EnumInferenceTypes from const import TMP_DIR, EnumInferenceTypes
from voice_changer.RVC.ModelSlot import ModelSlot from data.ModelSlot import ModelSlot
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
from voice_changer.RVC.onnxExporter.SynthesizerTrnMs256NSFsid_ONNX import ( from voice_changer.RVC.onnxExporter.SynthesizerTrnMs256NSFsid_ONNX import (
SynthesizerTrnMs256NSFsid_ONNX, SynthesizerTrnMs256NSFsid_ONNX,
@ -30,9 +30,7 @@ def export2onnx(gpu: int, modelSlot: ModelSlot):
modelFile = modelSlot.modelFile modelFile = modelSlot.modelFile
output_file = os.path.splitext(os.path.basename(modelFile))[0] + ".onnx" output_file = os.path.splitext(os.path.basename(modelFile))[0] + ".onnx"
output_file_simple = ( output_file_simple = os.path.splitext(os.path.basename(modelFile))[0] + "_simple.onnx"
os.path.splitext(os.path.basename(modelFile))[0] + "_simple.onnx"
)
output_path = os.path.join(TMP_DIR, output_file) output_path = os.path.join(TMP_DIR, output_file)
output_path_simple = os.path.join(TMP_DIR, output_file_simple) output_path_simple = os.path.join(TMP_DIR, output_file_simple)
metadata = { metadata = {
@ -52,9 +50,7 @@ def export2onnx(gpu: int, modelSlot: ModelSlot):
if gpuMomory > 0: if gpuMomory > 0:
_export2onnx(modelFile, output_path, output_path_simple, True, metadata) _export2onnx(modelFile, output_path, output_path_simple, True, metadata)
else: else:
print( print("[Voice Changer] Warning!!! onnx export with float32. maybe size is doubled.")
"[Voice Changer] Warning!!! onnx export with float32. maybe size is doubled."
)
_export2onnx(modelFile, output_path, output_path_simple, False, metadata) _export2onnx(modelFile, output_path, output_path_simple, False, metadata)
return output_file_simple return output_file_simple

View File

@ -1,8 +1,8 @@
import os import os
import traceback import traceback
import faiss import faiss
from voice_changer.RVC.RVCSlotInfo import RVCSlotInfo
from voice_changer.RVC.ModelSlot import ModelSlot
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager
from voice_changer.RVC.inferencer.InferencerManager import InferencerManager from voice_changer.RVC.inferencer.InferencerManager import InferencerManager
@ -10,15 +10,13 @@ from voice_changer.RVC.pipeline.Pipeline import Pipeline
from voice_changer.RVC.pitchExtractor.PitchExtractorManager import PitchExtractorManager from voice_changer.RVC.pitchExtractor.PitchExtractorManager import PitchExtractorManager
def createPipeline(modelSlot: ModelSlot, gpu: int, f0Detector: str): def createPipeline(slotInfo: RVCSlotInfo, gpu: int, f0Detector: str):
dev = DeviceManager.get_instance().getDevice(gpu) dev = DeviceManager.get_instance().getDevice(gpu)
half = DeviceManager.get_instance().halfPrecisionAvailable(gpu) half = DeviceManager.get_instance().halfPrecisionAvailable(gpu)
# Inferencer 生成 # Inferencer 生成
try: try:
inferencer = InferencerManager.getInferencer( inferencer = InferencerManager.getInferencer(slotInfo.modelType, slotInfo.modelFile, gpu)
modelSlot.modelType, modelSlot.modelFile, gpu
)
except Exception as e: except Exception as e:
print("[Voice Changer] exception! loading inferencer", e) print("[Voice Changer] exception! loading inferencer", e)
traceback.print_exc() traceback.print_exc()
@ -26,7 +24,7 @@ def createPipeline(modelSlot: ModelSlot, gpu: int, f0Detector: str):
# Embedder 生成 # Embedder 生成
try: try:
embedder = EmbedderManager.getEmbedder( embedder = EmbedderManager.getEmbedder(
modelSlot.embedder, slotInfo.embedder,
# emmbedderFilename, # emmbedderFilename,
half, half,
dev, dev,
@ -39,14 +37,14 @@ def createPipeline(modelSlot: ModelSlot, gpu: int, f0Detector: str):
pitchExtractor = PitchExtractorManager.getPitchExtractor(f0Detector) pitchExtractor = PitchExtractorManager.getPitchExtractor(f0Detector)
# index, feature # index, feature
index = _loadIndex(modelSlot) index = _loadIndex(slotInfo)
pipeline = Pipeline( pipeline = Pipeline(
embedder, embedder,
inferencer, inferencer,
pitchExtractor, pitchExtractor,
index, index,
modelSlot.samplingRate, slotInfo.samplingRate,
dev, dev,
half, half,
) )
@ -54,21 +52,21 @@ def createPipeline(modelSlot: ModelSlot, gpu: int, f0Detector: str):
return pipeline return pipeline
def _loadIndex(modelSlot: ModelSlot): def _loadIndex(slotInfo: RVCSlotInfo):
# Indexのロード # Indexのロード
print("[Voice Changer] Loading index...") print("[Voice Changer] Loading index...")
# ファイル指定がない場合はNone # ファイル指定がない場合はNone
if modelSlot.indexFile is None: if slotInfo.indexFile is None:
print("[Voice Changer] Index is None, not used") print("[Voice Changer] Index is None, not used")
return None return None
# ファイル指定があってもファイルがない場合はNone # ファイル指定があってもファイルがない場合はNone
if os.path.exists(modelSlot.indexFile) is not True: if os.path.exists(slotInfo.indexFile) is not True:
return None return None
try: try:
print("Try loading...", modelSlot.indexFile) print("Try loading...", slotInfo.indexFile)
index = faiss.read_index(modelSlot.indexFile) index = faiss.read_index(slotInfo.indexFile)
except: except:
print("[Voice Changer] load index failed. Use no index.") print("[Voice Changer] load index failed. Use no index.")
traceback.print_exc() traceback.print_exc()

View File

@ -28,7 +28,7 @@ class DioPitchExtractor(PitchExtractor):
f0_floor=f0_min, f0_floor=f0_min,
f0_ceil=f0_max, f0_ceil=f0_max,
channels_in_octave=2, channels_in_octave=2,
frame_period=10, frame_period=10, # 10ms, <= 160(window) / 16000(samplerate) * 1000(ms)
) )
f0 = pyworld.stonemask(audio.astype(np.double), _f0, t, sr) f0 = pyworld.stonemask(audio.astype(np.double), _f0, t, sr)
f0 = np.pad(f0.astype("float"), (start_frame, n_frames - len(f0) - start_frame)) f0 = np.pad(f0.astype("float"), (start_frame, n_frames - len(f0) - start_frame))

View File

@ -27,7 +27,7 @@ class HarvestPitchExtractor(PitchExtractor):
audio.astype(np.double), audio.astype(np.double),
fs=sr, fs=sr,
f0_ceil=f0_max, f0_ceil=f0_max,
frame_period=10, frame_period=10, # 10ms, <= 160(window) / 16000(samplerate) * 1000(ms)
) )
f0 = pyworld.stonemask(audio.astype(np.double), f0, t, sr) f0 = pyworld.stonemask(audio.astype(np.double), f0, t, sr)
f0 = signal.medfilt(f0, 3) f0 = signal.medfilt(f0, 3)

View File

@ -1,21 +1,21 @@
from typing import Any, Union, cast from typing import Any, Union, cast
import socketio import socketio
from const import TMP_DIR, ModelType from const import TMP_DIR, VoiceChangerType
import torch import torch
import os import os
import traceback import traceback
import numpy as np import numpy as np
from dataclasses import dataclass, asdict, field from dataclasses import dataclass, asdict, field
import resampy import resampy
from data.ModelSlot import loadSlotInfo
from voice_changer.IORecorder import IORecorder from voice_changer.IORecorder import IORecorder
from voice_changer.Local.AudioDeviceList import ServerAudioDevice, list_audio_device from voice_changer.Local.AudioDeviceList import ServerAudioDevice, list_audio_device
from voice_changer.utils.LoadModelParams import LoadModelParams
from voice_changer.utils.Timer import Timer from voice_changer.utils.Timer import Timer
from voice_changer.utils.VoiceChangerModel import VoiceChangerModel, AudioInOut from voice_changer.utils.VoiceChangerModel import AudioInOut
from Exceptions import ( from Exceptions import (
DeviceCannotSupportHalfPrecisionException, DeviceCannotSupportHalfPrecisionException,
DeviceChangingException, DeviceChangingException,
@ -26,15 +26,22 @@ from Exceptions import (
VoiceChangerIsNotSelectedException, VoiceChangerIsNotSelectedException,
) )
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
import threading
import time # import threading
import sounddevice as sd # import time
import librosa # import sounddevice as sd
# import librosa
import json
STREAM_INPUT_FILE = os.path.join(TMP_DIR, "in.wav") STREAM_INPUT_FILE = os.path.join(TMP_DIR, "in.wav")
STREAM_OUTPUT_FILE = os.path.join(TMP_DIR, "out.wav") STREAM_OUTPUT_FILE = os.path.join(TMP_DIR, "out.wav")
@dataclass
class SlotInfo:
voiceChangerType: VoiceChangerType | None = None
@dataclass @dataclass
class VoiceChangerSettings: class VoiceChangerSettings:
inputSampleRate: int = 48000 # 48000 or 24000 inputSampleRate: int = 48000 # 48000 or 24000
@ -45,9 +52,7 @@ class VoiceChangerSettings:
recordIO: int = 0 # 0:off, 1:on recordIO: int = 0 # 0:off, 1:on
serverAudioInputDevices: list[ServerAudioDevice] = field(default_factory=lambda: []) serverAudioInputDevices: list[ServerAudioDevice] = field(default_factory=lambda: [])
serverAudioOutputDevices: list[ServerAudioDevice] = field( serverAudioOutputDevices: list[ServerAudioDevice] = field(default_factory=lambda: [])
default_factory=lambda: []
)
enableServerAudio: int = 0 # 0:off, 1:on enableServerAudio: int = 0 # 0:off, 1:on
serverAudioStated: int = 0 # 0:off, 1:on serverAudioStated: int = 0 # 0:off, 1:on
@ -93,160 +98,131 @@ class VoiceChangerSettings:
class VoiceChanger: class VoiceChanger:
settings: VoiceChangerSettings = VoiceChangerSettings() # settings: VoiceChangerSettings = VoiceChangerSettings()
voiceChanger: VoiceChangerModel | None = None # voiceChangerModel: VoiceChangerModel | None = None
ioRecorder: IORecorder #
sola_buffer: AudioInOut #
namespace: socketio.AsyncNamespace | None = None # namespace: socketio.AsyncNamespace | None = None
localPerformanceShowTime = 0.0 # localPerformanceShowTime = 0.0
emitTo = None # emitTo = None
def audio_callback( # def audio_callback(self, indata: np.ndarray, outdata: np.ndarray, frames, times, status):
self, indata: np.ndarray, outdata: np.ndarray, frames, times, status # try:
): # indata = indata * self.settings.serverInputAudioGain
try: # with Timer("all_inference_time") as t:
indata = indata * self.settings.serverInputAudioGain # unpackedData = librosa.to_mono(indata.T) * 32768.0
with Timer("all_inference_time") as t: # out_wav, times = self.on_request(unpackedData)
unpackedData = librosa.to_mono(indata.T) * 32768.0 # outputChunnels = outdata.shape[1]
out_wav, times = self.on_request(unpackedData) # outdata[:] = np.repeat(out_wav, outputChunnels).reshape(-1, outputChunnels) / 32768.0
outputChunnels = outdata.shape[1] # outdata[:] = outdata * self.settings.serverOutputAudioGain
outdata[:] = ( # all_inference_time = t.secs
np.repeat(out_wav, outputChunnels).reshape(-1, outputChunnels) # performance = [all_inference_time] + times
/ 32768.0 # if self.emitTo is not None:
) # self.emitTo(performance)
outdata[:] = outdata * self.settings.serverOutputAudioGain # self.settings.performance = [round(x * 1000) for x in performance]
all_inference_time = t.secs # except Exception as e:
performance = [all_inference_time] + times # print("[Voice Changer] ex:", e)
if self.emitTo is not None:
self.emitTo(performance)
self.settings.performance = [round(x * 1000) for x in performance]
except Exception as e:
print("[Voice Changer] ex:", e)
def getServerAudioDevice( # def getServerAudioDevice(self, audioDeviceList: list[ServerAudioDevice], index: int):
self, audioDeviceList: list[ServerAudioDevice], index: int # serverAudioDevice = [x for x in audioDeviceList if x.index == index]
): # if len(serverAudioDevice) > 0:
serverAudioDevice = [x for x in audioDeviceList if x.index == index] # return serverAudioDevice[0]
if len(serverAudioDevice) > 0: # else:
return serverAudioDevice[0] # return None
else:
return None
def serverLocal(self, _vc): # def serverLocal(self, _vc):
vc: VoiceChanger = _vc # vc: VoiceChanger = _vc
currentInputDeviceId = -1 # currentInputDeviceId = -1
currentModelSamplingRate = -1 # currentModelSamplingRate = -1
currentOutputDeviceId = -1 # currentOutputDeviceId = -1
currentInputChunkNum = -1 # currentInputChunkNum = -1
while True: # while True:
if ( # if vc.settings.serverAudioStated == 0 or vc.settings.serverInputDeviceId == -1 or vc.voiceChanger is None:
vc.settings.serverAudioStated == 0 # vc.settings.inputSampleRate = 48000
or vc.settings.serverInputDeviceId == -1 # time.sleep(2)
or vc.voiceChanger is None # else:
): # sd._terminate()
vc.settings.inputSampleRate = 48000 # sd._initialize()
time.sleep(2)
else:
sd._terminate()
sd._initialize()
sd.default.device[0] = vc.settings.serverInputDeviceId # sd.default.device[0] = vc.settings.serverInputDeviceId
currentInputDeviceId = vc.settings.serverInputDeviceId # currentInputDeviceId = vc.settings.serverInputDeviceId
sd.default.device[1] = vc.settings.serverOutputDeviceId # sd.default.device[1] = vc.settings.serverOutputDeviceId
currentOutputDeviceId = vc.settings.serverOutputDeviceId # currentOutputDeviceId = vc.settings.serverOutputDeviceId
currentInputChannelNum = vc.settings.serverAudioInputDevices # currentInputChannelNum = vc.settings.serverAudioInputDevices
serverInputAudioDevice = self.getServerAudioDevice( # serverInputAudioDevice = self.getServerAudioDevice(vc.settings.serverAudioInputDevices, currentInputDeviceId)
vc.settings.serverAudioInputDevices, currentInputDeviceId # serverOutputAudioDevice = self.getServerAudioDevice(vc.settings.serverAudioOutputDevices, currentOutputDeviceId)
) # print(serverInputAudioDevice, serverOutputAudioDevice)
serverOutputAudioDevice = self.getServerAudioDevice( # if serverInputAudioDevice is None or serverOutputAudioDevice is None:
vc.settings.serverAudioOutputDevices, currentOutputDeviceId # time.sleep(2)
) # print("serverInputAudioDevice or serverOutputAudioDevice is None")
print(serverInputAudioDevice, serverOutputAudioDevice) # continue
if serverInputAudioDevice is None or serverOutputAudioDevice is None:
time.sleep(2)
print("serverInputAudioDevice or serverOutputAudioDevice is None")
continue
currentInputChannelNum = serverInputAudioDevice.maxInputChannels # currentInputChannelNum = serverInputAudioDevice.maxInputChannels
currentOutputChannelNum = serverOutputAudioDevice.maxOutputChannels # currentOutputChannelNum = serverOutputAudioDevice.maxOutputChannels
currentInputChunkNum = vc.settings.serverReadChunkSize # currentInputChunkNum = vc.settings.serverReadChunkSize
block_frame = currentInputChunkNum * 128 # block_frame = currentInputChunkNum * 128
# sample rate precheck(alsa cannot use 40000?) # # sample rate precheck(alsa cannot use 40000?)
try: # try:
currentModelSamplingRate = ( # currentModelSamplingRate = self.voiceChanger.get_processing_sampling_rate()
self.voiceChanger.get_processing_sampling_rate() # except Exception as e:
) # print("[Voice Changer] ex: get_processing_sampling_rate", e)
except Exception as e: # continue
print("[Voice Changer] ex: get_processing_sampling_rate", e) # try:
continue # with sd.Stream(
try: # callback=self.audio_callback,
with sd.Stream( # blocksize=block_frame,
callback=self.audio_callback, # samplerate=currentModelSamplingRate,
blocksize=block_frame, # dtype="float32",
samplerate=currentModelSamplingRate, # channels=[currentInputChannelNum, currentOutputChannelNum],
dtype="float32", # ):
channels=[currentInputChannelNum, currentOutputChannelNum], # pass
): # vc.settings.serverInputAudioSampleRate = currentModelSamplingRate
pass # vc.settings.inputSampleRate = currentModelSamplingRate
vc.settings.serverInputAudioSampleRate = currentModelSamplingRate # print(f"[Voice Changer] sample rate {vc.settings.serverInputAudioSampleRate}")
vc.settings.inputSampleRate = currentModelSamplingRate # except Exception as e:
print( # print(
f"[Voice Changer] sample rate {vc.settings.serverInputAudioSampleRate}" # "[Voice Changer] ex: fallback to device default samplerate",
) # e,
except Exception as e: # )
print( # vc.settings.serverInputAudioSampleRate = serverInputAudioDevice.default_samplerate
"[Voice Changer] ex: fallback to device default samplerate", # vc.settings.inputSampleRate = vc.settings.serverInputAudioSampleRate
e,
)
vc.settings.serverInputAudioSampleRate = (
serverInputAudioDevice.default_samplerate
)
vc.settings.inputSampleRate = vc.settings.serverInputAudioSampleRate
# main loop # # main loop
try: # try:
with sd.Stream( # with sd.Stream(
callback=self.audio_callback, # callback=self.audio_callback,
blocksize=block_frame, # blocksize=block_frame,
samplerate=vc.settings.serverInputAudioSampleRate, # samplerate=vc.settings.serverInputAudioSampleRate,
dtype="float32", # dtype="float32",
channels=[currentInputChannelNum, currentOutputChannelNum], # channels=[currentInputChannelNum, currentOutputChannelNum],
): # ):
while ( # while vc.settings.serverAudioStated == 1 and currentInputDeviceId == vc.settings.serverInputDeviceId and currentOutputDeviceId == vc.settings.serverOutputDeviceId and currentModelSamplingRate == self.voiceChanger.get_processing_sampling_rate() and currentInputChunkNum == vc.settings.serverReadChunkSize:
vc.settings.serverAudioStated == 1 # time.sleep(2)
and currentInputDeviceId == vc.settings.serverInputDeviceId # print(
and currentOutputDeviceId # "[Voice Changer] server audio",
== vc.settings.serverOutputDeviceId # self.settings.performance,
and currentModelSamplingRate # )
== self.voiceChanger.get_processing_sampling_rate() # print(
and currentInputChunkNum == vc.settings.serverReadChunkSize # "[Voice Changer] info:",
): # vc.settings.serverAudioStated,
time.sleep(2) # currentInputDeviceId,
print( # currentOutputDeviceId,
"[Voice Changer] server audio", # vc.settings.serverInputAudioSampleRate,
self.settings.performance, # currentInputChunkNum,
) # )
print(
"[Voice Changer] info:",
vc.settings.serverAudioStated,
currentInputDeviceId,
currentOutputDeviceId,
vc.settings.serverInputAudioSampleRate,
currentInputChunkNum,
)
except Exception as e: # except Exception as e:
print("[Voice Changer] ex:", e) # print("[Voice Changer] ex:", e)
time.sleep(2) # time.sleep(2)
def __init__(self, params: VoiceChangerParams): def __init__(self, params: VoiceChangerParams, slotIndex: int):
# 初期化 # 初期化
self.settings = VoiceChangerSettings() self.settings = VoiceChangerSettings()
self.onnx_session = None self.onnx_session = None
@ -255,147 +231,80 @@ class VoiceChanger:
self.currentCrossFadeOverlapSize = 0 # setting self.currentCrossFadeOverlapSize = 0 # setting
self.crossfadeSize = 0 # calculated self.crossfadeSize = 0 # calculated
self.voiceChanger = None self.voiceChangerModel = None
self.modelType: ModelType | None = None self.modelType: VoiceChangerType | None = None
self.params = params self.params = params
self.gpu_num = torch.cuda.device_count()
self.prev_audio = np.zeros(4096) self.prev_audio = np.zeros(4096)
self.mps_enabled: bool = ( self.ioRecorder: IORecorder | None = None
getattr(torch.backends, "mps", None) is not None self.sola_buffer: AudioInOut | None = None
and torch.backends.mps.is_available()
)
audioinput, audiooutput = list_audio_device() audioinput, audiooutput = list_audio_device()
self.settings.serverAudioInputDevices = audioinput self.settings.serverAudioInputDevices = audioinput
self.settings.serverAudioOutputDevices = audiooutput self.settings.serverAudioOutputDevices = audiooutput
thread = threading.Thread(target=self.serverLocal, args=(self,)) self.slotIndex = slotIndex
thread.start() self.slotInfo = loadSlotInfo(params.model_dir, self.slotIndex)
print( if self.slotInfo.voiceChangerType is None:
f"VoiceChanger Initialized (GPU_NUM:{self.gpu_num}, mps_enabled:{self.mps_enabled})" print(f"[Voice Changer] Voice Changer Type is None for slot {slotIndex} is not found.")
) return
elif self.slotInfo.voiceChangerType == "RVC":
from voice_changer.RVC.RVC import RVC
def switchModelType(self, modelType: ModelType): self.voiceChangerModel = RVC(self.slotIndex, self.params)
try:
if self.voiceChanger is not None:
# return {"status": "ERROR", "msg": "vc is already selected. currently re-select is not implemented"}
del self.voiceChanger
self.voiceChanger = None
self.modelType = modelType
if self.modelType == "MMVCv15":
from voice_changer.MMVCv15.MMVCv15 import MMVCv15
self.voiceChanger = MMVCv15() # type: ignore
elif self.modelType == "MMVCv13":
from voice_changer.MMVCv13.MMVCv13 import MMVCv13
self.voiceChanger = MMVCv13()
elif self.modelType == "so-vits-svc-40v2":
from voice_changer.SoVitsSvc40v2.SoVitsSvc40v2 import SoVitsSvc40v2
self.voiceChanger = SoVitsSvc40v2(self.params)
elif (
self.modelType == "so-vits-svc-40"
or self.modelType == "so-vits-svc-40_c"
):
from voice_changer.SoVitsSvc40.SoVitsSvc40 import SoVitsSvc40
self.voiceChanger = SoVitsSvc40(self.params)
elif self.modelType == "DDSP-SVC":
from voice_changer.DDSP_SVC.DDSP_SVC import DDSP_SVC
self.voiceChanger = DDSP_SVC(self.params)
elif self.modelType == "RVC":
from voice_changer.RVC.RVC import RVC
self.voiceChanger = RVC(self.params)
else:
from voice_changer.MMVCv13.MMVCv13 import MMVCv13
self.voiceChanger = MMVCv13()
except Exception as e:
print(e)
print(traceback.format_exc())
return {"status": "OK", "msg": "vc is switched."}
def getModelType(self):
if self.modelType is not None:
return {"status": "OK", "vc": self.modelType}
else: else:
return {"status": "OK", "vc": "none"} print(f"[Voice Changer] unknwon voice changer type. {self.slotInfo.voiceChangerType}")
def loadModel(self, props: LoadModelParams): # thread = threading.Thread(target=self.serverLocal, args=(self,))
try: # thread.start()
if self.voiceChanger is None:
raise VoiceChangerIsNotSelectedException( def prepareModel(self):
"Voice Changer is not selected." self.voiceChangerModel.prepareModel()
)
return self.voiceChanger.loadModel(props)
except Exception as e:
print(traceback.format_exc())
print("[Voice Changer] Model Load Error! Check your model is valid.", e)
return {"status": "NG"}
def get_info(self): def get_info(self):
data = asdict(self.settings) data = asdict(self.settings)
if self.voiceChanger is not None: if self.voiceChangerModel is not None:
data.update(self.voiceChanger.get_info()) data.update(self.voiceChangerModel.get_info())
devCount = torch.cuda.device_count()
gpus = []
for id in range(devCount):
name = torch.cuda.get_device_name(id)
memory = torch.cuda.get_device_properties(id).total_memory
gpu = {"id": id, "name": name, "memory": memory}
gpus.append(gpu)
data["gpus"] = gpus
return data return data
def get_performance(self): def get_performance(self):
return self.settings.performance return self.settings.performance
def update_settings(self, key: str, val: Any): def update_settings(self, key: str, val: Any):
if self.voiceChanger is None: if self.voiceChangerModel is None:
print("[Voice Changer] Voice Changer is not selected.") print("[Voice Changer] Voice Changer is not selected.")
return self.get_info() return
if key in self.settings.intData: if key in self.settings.intData:
setattr(self.settings, key, int(val)) setattr(self.settings, key, int(val))
if key == "crossFadeOffsetRate" or key == "crossFadeEndRate": if key == "crossFadeOffsetRate" or key == "crossFadeEndRate":
self.crossfadeSize = 0 self.crossfadeSize = 0
if key == "recordIO" and val == 1: if key == "recordIO" and val == 1:
if hasattr(self, "ioRecorder"): if self.ioRecorder is not None:
self.ioRecorder.close() self.ioRecorder.close()
self.ioRecorder = IORecorder( self.ioRecorder = IORecorder(STREAM_INPUT_FILE, STREAM_OUTPUT_FILE, self.settings.inputSampleRate)
STREAM_INPUT_FILE, STREAM_OUTPUT_FILE, self.settings.inputSampleRate
)
if key == "recordIO" and val == 0: if key == "recordIO" and val == 0:
if hasattr(self, "ioRecorder"): if self.ioRecorder is not None:
self.ioRecorder.close() self.ioRecorder.close()
self.ioRecorder = None
pass pass
if key == "recordIO" and val == 2: if key == "recordIO" and val == 2:
if hasattr(self, "ioRecorder"): if self.ioRecorder is not None:
self.ioRecorder.close() self.ioRecorder.close()
self.ioRecorder = None
elif key in self.settings.floatData: elif key in self.settings.floatData:
setattr(self.settings, key, float(val)) setattr(self.settings, key, float(val))
elif key in self.settings.strData: elif key in self.settings.strData:
setattr(self.settings, key, str(val)) setattr(self.settings, key, str(val))
else: else:
ret = self.voiceChanger.update_settings(key, val) ret = self.voiceChangerModel.update_settings(key, val)
if ret is False: if ret is False:
pass pass
# print(f"({key} is not mutable variable or unknown variable)") # print(f"({key} is not mutable variable or unknown variable)")
return self.get_info()
def _generate_strength(self, crossfadeSize: int): def _generate_strength(self, crossfadeSize: int):
if ( if self.crossfadeSize != crossfadeSize or self.currentCrossFadeOffsetRate != self.settings.crossFadeOffsetRate or self.currentCrossFadeEndRate != self.settings.crossFadeEndRate or self.currentCrossFadeOverlapSize != self.settings.crossFadeOverlapSize:
self.crossfadeSize != crossfadeSize
or self.currentCrossFadeOffsetRate != self.settings.crossFadeOffsetRate
or self.currentCrossFadeEndRate != self.settings.crossFadeEndRate
or self.currentCrossFadeOverlapSize != self.settings.crossFadeOverlapSize
):
self.crossfadeSize = crossfadeSize self.crossfadeSize = crossfadeSize
self.currentCrossFadeOffsetRate = self.settings.crossFadeOffsetRate self.currentCrossFadeOffsetRate = self.settings.crossFadeOffsetRate
self.currentCrossFadeEndRate = self.settings.crossFadeEndRate self.currentCrossFadeEndRate = self.settings.crossFadeEndRate
@ -424,32 +333,25 @@ class VoiceChanger:
] ]
) )
print( print(f"Generated Strengths: for prev:{self.np_prev_strength.shape}, for cur:{self.np_cur_strength.shape}")
f"Generated Strengths: for prev:{self.np_prev_strength.shape}, for cur:{self.np_cur_strength.shape}"
)
# ひとつ前の結果とサイズが変わるため、記録は消去する。 # ひとつ前の結果とサイズが変わるため、記録は消去する。
if hasattr(self, "np_prev_audio1") is True: if hasattr(self, "np_prev_audio1") is True:
delattr(self, "np_prev_audio1") delattr(self, "np_prev_audio1")
if hasattr(self, "sola_buffer") is True: if self.sola_buffer is not None:
del self.sola_buffer del self.sola_buffer
self.sola_buffer = None
# receivedData: tuple of short # receivedData: tuple of short
def on_request( def on_request(self, receivedData: AudioInOut) -> tuple[AudioInOut, list[Union[int, float]]]:
self, receivedData: AudioInOut
) -> tuple[AudioInOut, list[Union[int, float]]]:
return self.on_request_sola(receivedData) return self.on_request_sola(receivedData)
def on_request_sola( def on_request_sola(self, receivedData: AudioInOut) -> tuple[AudioInOut, list[Union[int, float]]]:
self, receivedData: AudioInOut
) -> tuple[AudioInOut, list[Union[int, float]]]:
try: try:
if self.voiceChanger is None: if self.voiceChangerModel is None:
raise VoiceChangerIsNotSelectedException( raise VoiceChangerIsNotSelectedException("Voice Changer is not selected.")
"Voice Changer is not selected."
)
processing_sampling_rate = self.voiceChanger.get_processing_sampling_rate() processing_sampling_rate = self.voiceChangerModel.get_processing_sampling_rate()
# 前処理 # 前処理
with Timer("pre-process") as t: with Timer("pre-process") as t:
if self.settings.inputSampleRate != processing_sampling_rate: if self.settings.inputSampleRate != processing_sampling_rate:
@ -470,21 +372,17 @@ class VoiceChanger:
crossfade_frame = min(self.settings.crossFadeOverlapSize, block_frame) crossfade_frame = min(self.settings.crossFadeOverlapSize, block_frame)
self._generate_strength(crossfade_frame) self._generate_strength(crossfade_frame)
data = self.voiceChanger.generate_input( data = self.voiceChangerModel.generate_input(newData, block_frame, crossfade_frame, sola_search_frame)
newData, block_frame, crossfade_frame, sola_search_frame
)
preprocess_time = t.secs preprocess_time = t.secs
# 変換処理 # 変換処理
with Timer("main-process") as t: with Timer("main-process") as t:
# Inference # Inference
audio = self.voiceChanger.inference(data) audio = self.voiceChangerModel.inference(data)
if hasattr(self, "sola_buffer") is True: if self.sola_buffer is not None:
np.set_printoptions(threshold=10000) np.set_printoptions(threshold=10000)
audio_offset = -1 * ( audio_offset = -1 * (sola_search_frame + crossfade_frame + block_frame)
sola_search_frame + crossfade_frame + block_frame
)
audio = audio[audio_offset:] audio = audio[audio_offset:]
# SOLA algorithm from https://github.com/yxlllc/DDSP-SVC, https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI # SOLA algorithm from https://github.com/yxlllc/DDSP-SVC, https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI
@ -512,10 +410,7 @@ class VoiceChanger:
print("[Voice Changer] warming up... generating sola buffer.") print("[Voice Changer] warming up... generating sola buffer.")
result = np.zeros(4096).astype(np.int16) result = np.zeros(4096).astype(np.int16)
if ( if self.sola_buffer is not None and sola_offset < sola_search_frame:
hasattr(self, "sola_buffer") is True
and sola_offset < sola_search_frame
):
offset = -1 * (sola_search_frame + crossfade_frame - sola_offset) offset = -1 * (sola_search_frame + crossfade_frame - sola_offset)
end = -1 * (sola_search_frame - sola_offset) end = -1 * (sola_search_frame - sola_offset)
sola_buf_org = audio[offset:end] sola_buf_org = audio[offset:end]
@ -545,9 +440,7 @@ class VoiceChanger:
else: else:
outputData = result outputData = result
print_convert_processing( print_convert_processing(f" Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz")
f" Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz"
)
if receivedData.shape[0] != outputData.shape[0]: if receivedData.shape[0] != outputData.shape[0]:
# print( # print(
@ -564,9 +457,7 @@ class VoiceChanger:
postprocess_time = t.secs postprocess_time = t.secs
print_convert_processing( print_convert_processing(f" [fin] Input/Output size:{receivedData.shape[0]},{outputData.shape[0]}")
f" [fin] Input/Output size:{receivedData.shape[0]},{outputData.shape[0]}"
)
perf = [preprocess_time, mainprocess_time, postprocess_time] perf = [preprocess_time, mainprocess_time, postprocess_time]
return outputData, perf return outputData, perf
@ -586,9 +477,7 @@ class VoiceChanger:
print("[Voice Changer] embedder:", e) print("[Voice Changer] embedder:", e)
return np.zeros(1).astype(np.int16), [0, 0, 0] return np.zeros(1).astype(np.int16), [0, 0, 0]
except VoiceChangerIsNotSelectedException: except VoiceChangerIsNotSelectedException:
print( print("[Voice Changer] Voice Changer is not selected. Wait a bit and if there is no improvement, please re-select vc.")
"[Voice Changer] Voice Changer is not selected. Wait a bit and if there is no improvement, please re-select vc."
)
return np.zeros(1).astype(np.int16), [0, 0, 0] return np.zeros(1).astype(np.int16), [0, 0, 0]
except DeviceCannotSupportHalfPrecisionException: except DeviceCannotSupportHalfPrecisionException:
# RVC.pyでfallback処理をするので、ここはダミーデータ返すだけ。 # RVC.pyでfallback処理をするので、ここはダミーデータ返すだけ。

View File

@ -1,35 +1,105 @@
import numpy as np import numpy as np
import threading
from data.ModelSample import ModelSamples
from data.ModelSlot import ModelSlots, loadSlotInfo
from utils.downloader.SampleDownloader import downloadSample, getSampleInfos
from voice_changer.Local.ServerDevice import ServerDevice
from voice_changer.RVC.ModelSlotGenerator import setSlotAsRVC
from voice_changer.VoiceChanger import VoiceChanger from voice_changer.VoiceChanger import VoiceChanger
from const import ModelType from const import MAX_SLOT_NUM, VoiceChangerType
from voice_changer.utils.LoadModelParams import LoadModelParams
from voice_changer.utils.VoiceChangerModel import AudioInOut from voice_changer.utils.VoiceChangerModel import AudioInOut
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
from dataclasses import dataclass, asdict, field
import torch
import json
@dataclass()
class GPUInfo:
id: int
name: str
memory: int
@dataclass()
class VoiceChangerManagerSettings:
slotIndex: int
intData: list[str] = field(default_factory=lambda: ["slotIndex"])
class VoiceChangerManager(object): class VoiceChangerManager(object):
_instance = None _instance = None
voiceChanger: VoiceChanger = None
def __init__(self, params: VoiceChangerParams):
self.voiceChanger: VoiceChanger = None
self.settings: VoiceChangerManagerSettings = VoiceChangerManagerSettings(slotIndex=0)
self.params: VoiceChangerParams = params
self.serverDevice = ServerDevice()
# スタティックな情報を収集
self.sampleModels: list[ModelSamples] = getSampleInfos(self.params.sample_mode)
self.gpus: list[GPUInfo] = self._get_gpuInfos()
def _get_gpuInfos(self):
devCount = torch.cuda.device_count()
gpus = []
for id in range(devCount):
name = torch.cuda.get_device_name(id)
memory = torch.cuda.get_device_properties(id).total_memory
gpu = {"id": id, "name": name, "memory": memory}
gpus.append(gpu)
return gpus
@classmethod @classmethod
def get_instance(cls, params: VoiceChangerParams): def get_instance(cls, params: VoiceChangerParams):
if cls._instance is None: if cls._instance is None:
cls._instance = cls() cls._instance = cls(params)
cls._instance.voiceChanger = VoiceChanger(params)
gpu_num = torch.cuda.device_count()
mps_enabled: bool = getattr(torch.backends, "mps", None) is not None and torch.backends.mps.is_available()
print(f"VoiceChanger Initialized (GPU_NUM:{gpu_num}, mps_enabled:{mps_enabled})")
cls._instance.voiceChanger = VoiceChanger(params, cls._instance.settings.slotIndex)
thread = threading.Thread(target=cls._instance.serverDevice.serverLocal, args=(cls._instance.voiceChanger,))
thread.start()
cls._instance.voiceChanger.prepareModel()
return cls._instance return cls._instance
def loadModel(self, props: LoadModelParams): def loadModel(self, slot: int, voiceChangerType: VoiceChangerType, params: str):
info = self.voiceChanger.loadModel(props) print(slot, voiceChangerType, params)
if hasattr(info, "status") and info["status"] == "NG": paramDict = json.loads(params)
return info if voiceChangerType == "RVC":
else: if "sampleId" in paramDict and len(paramDict["sampleId"]) > 0:
info["status"] = "OK" print("[Voice Canger]: Download RVC sample.")
return info downloadSample(self.params.sample_mode, paramDict["sampleId"], self.params.model_dir, slot, {"useIndex": paramDict["rvcIndexDownload"]})
else:
print("[Voice Canger]: Set uploaded RVC model to slot.")
setSlotAsRVC(self.params.model_dir, slot, paramDict)
return self.get_info()
def get_slotInfos(self):
slotInfos: list[ModelSlots] = []
for slotIndex in range(MAX_SLOT_NUM):
slotInfo = loadSlotInfo(self.params.model_dir, slotIndex)
slotInfos.append(slotInfo)
return slotInfos
def get_info(self): def get_info(self):
data = asdict(self.settings)
slotInfos = self.get_slotInfos()
data["slotInfos"] = slotInfos
data["gpus"] = self.gpus
data["sampleModels"] = self.sampleModels
data["status"] = "OK"
if hasattr(self, "voiceChanger"): if hasattr(self, "voiceChanger"):
info = self.voiceChanger.get_info() info = self.voiceChanger.get_info()
info["status"] = "OK" data.update(info)
return info return data
else: else:
return {"status": "ERROR", "msg": "no model loaded"} return {"status": "ERROR", "msg": "no model loaded"}
@ -41,12 +111,22 @@ class VoiceChangerManager(object):
return {"status": "ERROR", "msg": "no model loaded"} return {"status": "ERROR", "msg": "no model loaded"}
def update_settings(self, key: str, val: str | int | float): def update_settings(self, key: str, val: str | int | float):
if hasattr(self, "voiceChanger"): if key in self.settings.intData:
info = self.voiceChanger.update_settings(key, val) setattr(self.settings, key, int(val))
info["status"] = "OK" if key == "slotIndex":
return info val = val % 1000 # Quick hack for same slot is selected
setattr(self.settings, key, int(val))
newVoiceChanger = VoiceChanger(self.params, self.settings.slotIndex)
newVoiceChanger.prepareModel()
self.serverDevice.serverLocal(newVoiceChanger)
del self.voiceChanger
self.voiceChanger = newVoiceChanger
elif hasattr(self, "voiceChanger"):
self.voiceChanger.update_settings(key, val)
else: else:
return {"status": "ERROR", "msg": "no model loaded"} print(f"[Voice Changer] update is not handled. ({key}:{val})")
return self.get_info()
def changeVoice(self, receivedData: AudioInOut): def changeVoice(self, receivedData: AudioInOut):
if hasattr(self, "voiceChanger") is True: if hasattr(self, "voiceChanger") is True:
@ -55,12 +135,6 @@ class VoiceChangerManager(object):
print("Voice Change is not loaded. Did you load a correct model?") print("Voice Change is not loaded. Did you load a correct model?")
return np.zeros(1).astype(np.int16), [] return np.zeros(1).astype(np.int16), []
def switchModelType(self, modelType: ModelType):
return self.voiceChanger.switchModelType(modelType)
def getModelType(self):
return self.voiceChanger.getModelType()
def export2onnx(self): def export2onnx(self):
return self.voiceChanger.export2onnx() return self.voiceChanger.export2onnx()

View File

@ -10,10 +10,3 @@ class FilePaths:
clusterTorchModelFilename: str | None clusterTorchModelFilename: str | None
featureFilename: str | None featureFilename: str | None
indexFilename: str | None indexFilename: str | None
@dataclass
class LoadModelParams:
slot: int
isHalf: bool
params: Any

View File

@ -1,17 +1,11 @@
from typing import Any, Protocol, TypeAlias from typing import Any, Protocol, TypeAlias
import numpy as np import numpy as np
from voice_changer.utils.LoadModelParams import LoadModelParams
AudioInOut: TypeAlias = np.ndarray[Any, np.dtype[np.int16]] AudioInOut: TypeAlias = np.ndarray[Any, np.dtype[np.int16]]
class VoiceChangerModel(Protocol): class VoiceChangerModel(Protocol):
# loadModel: Callable[..., dict[str, Any]]
def loadModel(self, params: LoadModelParams):
...
def get_processing_sampling_rate(self) -> int: def get_processing_sampling_rate(self) -> int:
... ...
@ -21,9 +15,7 @@ class VoiceChangerModel(Protocol):
def inference(self, data: tuple[Any, ...]) -> Any: def inference(self, data: tuple[Any, ...]) -> Any:
... ...
def generate_input( def generate_input(self, newData: AudioInOut, inputSize: int, crossfadeSize: int) -> tuple[Any, ...]:
self, newData: AudioInOut, inputSize: int, crossfadeSize: int
) -> tuple[Any, ...]:
... ...
def update_settings(self, key: str, val: Any) -> bool: def update_settings(self, key: str, val: Any) -> bool:

View File

@ -11,4 +11,4 @@ class VoiceChangerParams:
hubert_base_jp: str hubert_base_jp: str
hubert_soft: str hubert_soft: str
nsf_hifigan: str nsf_hifigan: str
rvc_sample_mode: str sample_mode: str