WIP: refactoring

This commit is contained in:
wataru 2023-06-16 00:56:18 +09:00
parent 328ea46161
commit be42bb682d
53 changed files with 3257 additions and 1687 deletions

3
.gitignore vendored
View File

@ -43,12 +43,15 @@ docker/cudnn/
server/pretrain/
server/weights/
server/model_dir/
server/model_dir2/
server/weights_/
server/weights__/
server/models/
server/samples.json
server/samples_0003_t.json
server/samples_0003_o.json
server/samples_0003_o2.json
server/samples_0003_t2.json
server/test_official_v1_v2.json
server/test_ddpn_v1_v2.json

View File

@ -1 +1,10 @@
<!doctype html><html style="width:100%;height:100%;overflow:hidden"><head><meta charset="utf-8"/><title>Voice Changer Client Demo</title><script defer="defer" src="index.js"></script></head><body style="width:100%;height:100%;margin:0"><div id="app" style="width:100%;height:100%"></div></body></html>
<!DOCTYPE html>
<html style="width: 100%; height: 100%; overflow: hidden">
<head>
<meta charset="utf-8" />
<title>Voice Changer Client Demo</title>
<script defer src="index.js"></script></head>
<body style="width: 100%; height: 100%; margin: 0px">
<div id="app" style="width: 100%; height: 100%"></div>
</body>
</html>

File diff suppressed because one or more lines are too long

View File

@ -1,31 +0,0 @@
/*! regenerator-runtime -- Copyright (c) 2014-present, Facebook, Inc. -- license (MIT): https://github.com/facebook/regenerator/blob/main/LICENSE */
/**
* @license React
* react-dom.production.min.js
*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
/**
* @license React
* react.production.min.js
*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
/**
* @license React
* scheduler.production.min.js
*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

View File

@ -27,7 +27,6 @@ export const useMessageBuilder_old = (): MessageBuilderStateAndMethod => {
lang = "en"
}
console.log(messagesRef.current)
return messagesRef.current.find(x => { return x.file == file && x.id == id })?.message[lang] || "unknwon message"
}
return {

View File

@ -39,7 +39,7 @@ export const ModelSlotManagerDialog = () => {
if (mode != "localFile") {
return <></>
}
if (!serverSetting.serverSetting.modelSlots) {
if (!serverSetting.serverSetting.slotInfos) {
return <></>
}
@ -114,7 +114,7 @@ export const ModelSlotManagerDialog = () => {
}
const slots = serverSetting.serverSetting.modelSlots.map((x, index) => {
const slots = serverSetting.serverSetting.slotInfos.map((x, index) => {
let modelFileName = ""
if (uploadData?.slot == index) {
modelFileName = (uploadData.model?.name || "").replace(/^.*[\\\/]/, '')
@ -248,7 +248,7 @@ export const ModelSlotManagerDialog = () => {
}, [
mode,
serverSetting.serverSetting.modelSlots,
serverSetting.serverSetting.slotInfos,
serverSetting.fileUploadSettings,
serverSetting.uploadProgress,
serverSetting.setFileUploadSetting,

View File

@ -13,19 +13,22 @@ export const MergeLabDialog = () => {
// スロットが変更されたときの初期化処理
const newSlotChangeKey = useMemo(() => {
if (!serverSetting.serverSetting.modelSlots) {
if (!serverSetting.serverSetting.slotInfos) {
return ""
}
return serverSetting.serverSetting.modelSlots.reduce((prev, cur) => {
return serverSetting.serverSetting.slotInfos.reduce((prev, cur) => {
return prev + "_" + cur.modelFile
}, "")
}, [serverSetting.serverSetting.modelSlots])
}, [serverSetting.serverSetting.slotInfos])
const filterItems = useMemo(() => {
return serverSetting.serverSetting.modelSlots.reduce((prev, cur) => {
return serverSetting.serverSetting.slotInfos.reduce((prev, cur) => {
const key = `${cur.modelType},${cur.samplingRate},${cur.embChannels}`
const val = { type: cur.modelType, samplingRate: cur.samplingRate, embChannels: cur.embChannels }
const existKeys = Object.keys(prev)
if (cur.voiceChangerType == null) {
return prev
}
if (cur.modelFile.length == 0) {
return prev
}
@ -41,7 +44,7 @@ export const MergeLabDialog = () => {
}, [newSlotChangeKey])
const models = useMemo(() => {
return serverSetting.serverSetting.modelSlots.filter(x => {
return serverSetting.serverSetting.slotInfos.filter(x => {
const filterVals = filterItems[currentFilter]
if (!filterVals) {
return false

View File

@ -9,11 +9,11 @@ export const ModelSwitchRow = (_props: ModelSwitchRowProps) => {
const appState = useAppState()
const modelSwitchRow = useMemo(() => {
const slot = appState.serverSetting.serverSetting.modelSlotIndex
const slot = appState.serverSetting.serverSetting.slotIndex
const onSwitchModelClicked = async (slot: number) => {
// Quick hack for same slot is selected. 下桁が実際のSlotID
const dummyModelSlotIndex = (Math.floor(Date.now() / 1000)) * 1000 + slot
await appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, modelSlotIndex: dummyModelSlotIndex })
await appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, slotIndex: dummyModelSlotIndex })
setTimeout(() => { // quick hack
appState.getInfo()
}, 1000 * 2)
@ -24,7 +24,7 @@ export const ModelSwitchRow = (_props: ModelSwitchRowProps) => {
}
const modelSlots = appState.serverSetting.serverSetting.modelSlots
const modelSlots = appState.serverSetting.serverSetting.slotInfos
let options: React.JSX.Element[] = []
if (modelSlots) {
options = modelSlots.map((x, index) => {

View File

@ -18,14 +18,17 @@ export const ModelSlotArea = (_props: ModelSlotAreaProps) => {
const modelTiles = useMemo(() => {
if (!serverSetting.serverSetting.modelSlots) {
if (!serverSetting.serverSetting.slotInfos) {
return []
}
return serverSetting.serverSetting.modelSlots.map((x, index) => {
return serverSetting.serverSetting.slotInfos.map((x, index) => {
if (x.voiceChangerType == null) {
return null
}
if (x.modelFile.length == 0) {
return null
}
const tileContainerClass = index == serverSetting.serverSetting.modelSlotIndex ? "model-slot-tile-container-selected" : "model-slot-tile-container"
const tileContainerClass = index == serverSetting.serverSetting.slotIndex ? "model-slot-tile-container-selected" : "model-slot-tile-container"
const name = x.name.length > 8 ? x.name.substring(0, 7) + "..." : x.name
const iconElem = x.iconFile.length > 0 ?
<img className="model-slot-tile-icon" src={x.iconFile} alt={x.name} /> :
@ -33,7 +36,7 @@ export const ModelSlotArea = (_props: ModelSlotAreaProps) => {
const clickAction = async () => {
const dummyModelSlotIndex = (Math.floor(Date.now() / 1000)) * 1000 + index
await serverSetting.updateServerSettings({ ...serverSetting.serverSetting, modelSlotIndex: dummyModelSlotIndex })
await serverSetting.updateServerSettings({ ...serverSetting.serverSetting, slotIndex: dummyModelSlotIndex })
setTimeout(() => { // quick hack
getInfo()
}, 1000 * 2)
@ -50,7 +53,7 @@ export const ModelSlotArea = (_props: ModelSlotAreaProps) => {
</div >
)
}).filter(x => x != null)
}, [serverSetting.serverSetting.modelSlots, serverSetting.serverSetting.modelSlotIndex])
}, [serverSetting.serverSetting.slotInfos, serverSetting.serverSetting.slotIndex])
const modelSlotArea = useMemo(() => {

View File

@ -21,11 +21,12 @@ export const CharacterArea = (_props: CharacterAreaProps) => {
}, [])
const selected = useMemo(() => {
if (serverSetting.serverSetting.modelSlotIndex == undefined) {
console.log("serverSetting.serverSetting.slotInfos::", serverSetting.serverSetting.slotIndex, serverSetting.serverSetting.slotInfos)
if (serverSetting.serverSetting.slotIndex == undefined) {
return
}
return serverSetting.serverSetting.modelSlots[serverSetting.serverSetting.modelSlotIndex]
}, [serverSetting.serverSetting.modelSlotIndex, serverSetting.serverSetting.modelSlots])
return serverSetting.serverSetting.slotInfos[serverSetting.serverSetting.slotIndex]
}, [serverSetting.serverSetting.slotIndex, serverSetting.serverSetting.slotInfos])
useEffect(() => {

View File

@ -27,7 +27,6 @@ export const useMessageBuilder = (): MessageBuilderStateAndMethod => {
lang = "en"
}
console.log(messagesRef.current)
return messagesRef.current.find(x => { return x.file == file && x.id == id })?.message[lang] || "unknwon message"
}
return {

View File

@ -1,4 +1,4 @@
import { ClientType, MergeModelRequest, OnnxExporterInfo, ServerInfo, ServerSettingKey } from "./const";
import { ClientType, MergeModelRequest, OnnxExporterInfo, ServerInfo, ServerSettingKey, VoiceChangerType } from "./const";
type FileChunk = {
@ -166,16 +166,12 @@ export class ServerConfigurator {
})
}
loadModel = async (slot: number, isHalf: boolean, params: string = "{}") => {
if (isHalf == undefined || isHalf == null) {
console.warn("isHalf is invalid value", isHalf)
isHalf = false
}
loadModel = async (slot: number, voiceChangerType: VoiceChangerType, params: string = "{}") => {
const url = this.serverUrl + "/load_model"
const info = new Promise<ServerInfo>(async (resolve) => {
const formData = new FormData();
formData.append("slot", "" + slot);
formData.append("isHalf", "" + isHalf);
formData.append("voiceChangerType", voiceChangerType);
formData.append("params", params);
const request = new Request(url, {

View File

@ -3,7 +3,7 @@ import { VoiceChangerWorkletNode, VoiceChangerWorkletListener } from "./VoiceCha
import workerjs from "raw-loader!../worklet/dist/index.js";
import { VoiceFocusDeviceTransformer, VoiceFocusTransformDevice } from "amazon-chime-sdk-js";
import { createDummyMediaStream, validateUrl } from "./util";
import { ClientType, DefaultVoiceChangerClientSetting, MergeModelRequest, ServerSettingKey, VoiceChangerClientSetting, WorkletNodeSetting, WorkletSetting } from "./const";
import { ClientType, DefaultVoiceChangerClientSetting, MergeModelRequest, ServerSettingKey, VoiceChangerClientSetting, VoiceChangerType, WorkletNodeSetting, WorkletSetting } from "./const";
import { ServerConfigurator } from "./ServerConfigurator";
// オーディオデータの流れ
@ -298,10 +298,10 @@ export class VoiceChangerClient {
}
loadModel = (
slot: number,
isHalf: boolean,
voiceChangerType: VoiceChangerType,
params: string,
) => {
return this.configurator.loadModel(slot, isHalf, params)
return this.configurator.loadModel(slot, voiceChangerType, params)
}
uploadAssets = (params: string) => {
return this.configurator.uploadAssets(params)

View File

@ -16,6 +16,16 @@ export const ClientType = {
} as const
export type ClientType = typeof ClientType[keyof typeof ClientType]
export const VoiceChangerType = {
"MMVCv15": "MMVCv15",
"MMVCv13": "MMVCv13",
"so-vits-svc-40": "so-vits-svc-40",
"DDSP-SVC": "DDSP-SVC",
"RVC": "RVC"
} as const
export type VoiceChangerType = typeof VoiceChangerType[keyof typeof VoiceChangerType]
///////////////////////
// サーバセッティング
///////////////////////
@ -122,7 +132,8 @@ export const ServerSettingKey = {
"rvcQuality": "rvcQuality",
"modelSamplingRate": "modelSamplingRate",
"silenceFront": "silenceFront",
"modelSlotIndex": "modelSlotIndex",
// "modelSlotIndex": "modelSlotIndex",
"slotIndex": "slotIndex",
"useEnhancer": "useEnhancer",
"useDiff": "useDiff",
@ -181,7 +192,8 @@ export type VoiceChangerServerSetting = {
rvcQuality: number // 0:low, 1:high
silenceFront: number // 0:off, 1:on
modelSamplingRate: ModelSamplingRate // 32000,40000,48000
modelSlotIndex: number,
// modelSlotIndex: number,
slotIndex: number,
useEnhancer: number// DDSP-SVC
useDiff: number// DDSP-SVC
@ -197,7 +209,31 @@ export type VoiceChangerServerSetting = {
enableDirectML: number
}
type ModelSlot = {
// type ModelSlot = {
// modelFile: string
// featureFile: string,
// indexFile: string,
// defaultTune: number,
// defaultIndexRatio: number,
// defaultProtect: number,
// modelType: ModelType,
// embChannels: number,
// f0: boolean,
// samplingRate: number
// deprecated: boolean
// name: string,
// description: string,
// credit: string,
// termsOfUseUrl: string,
// iconFile: string
// }
type SlotInfo = {
voiceChangerType: VoiceChangerType | null
modelFile: string
featureFile: string,
indexFile: string,
@ -233,7 +269,8 @@ export type ServerInfo = VoiceChangerServerSetting & {
pyTorchModelFile: string,
onnxModelFile: string,
onnxExecutionProviders: OnnxExecutionProvider[]
modelSlots: ModelSlot[]
// modelSlots: ModelSlot[]
slotInfos: SlotInfo[]
serverAudioInputDevices: ServerAudioDevice[]
serverAudioOutputDevices: ServerAudioDevice[]
sampleModels: RVCSampleModel[]
@ -311,7 +348,8 @@ export const DefaultServerSetting: ServerInfo = {
rvcQuality: 0,
modelSamplingRate: 48000,
silenceFront: 1,
modelSlotIndex: 0,
// modelSlotIndex: 0,
slotIndex: 0,
sampleModels: [],
gpus: [],
@ -331,7 +369,8 @@ export const DefaultServerSetting: ServerInfo = {
pyTorchModelFile: "",
onnxModelFile: "",
onnxExecutionProviders: [],
modelSlots: [],
// modelSlots: [],
slotInfos: [],
serverAudioInputDevices: [],
serverAudioOutputDevices: []
}

View File

@ -1,5 +1,5 @@
import { useState, useMemo, useEffect } from "react"
import { VoiceChangerServerSetting, ServerInfo, ServerSettingKey, INDEXEDDB_KEY_SERVER, INDEXEDDB_KEY_MODEL_DATA, ClientType, DefaultServerSetting_MMVCv13, DefaultServerSetting_MMVCv15, DefaultServerSetting_so_vits_svc_40v2, DefaultServerSetting_so_vits_svc_40, DefaultServerSetting_so_vits_svc_40_c, DefaultServerSetting_RVC, OnnxExporterInfo, DefaultServerSetting_DDSP_SVC, MAX_MODEL_SLOT_NUM, Framework, MergeModelRequest } from "../const"
import { VoiceChangerServerSetting, ServerInfo, ServerSettingKey, INDEXEDDB_KEY_SERVER, INDEXEDDB_KEY_MODEL_DATA, ClientType, DefaultServerSetting_MMVCv13, DefaultServerSetting_MMVCv15, DefaultServerSetting_so_vits_svc_40v2, DefaultServerSetting_so_vits_svc_40, DefaultServerSetting_so_vits_svc_40_c, DefaultServerSetting_RVC, OnnxExporterInfo, DefaultServerSetting_DDSP_SVC, MAX_MODEL_SLOT_NUM, MergeModelRequest, VoiceChangerType } from "../const"
import { VoiceChangerClient } from "../VoiceChangerClient"
import { useIndexedDB } from "./useIndexedDB"
import { ModelLoadException } from "../exceptions"
@ -18,12 +18,7 @@ export type ModelAssetName = typeof ModelAssetName[keyof typeof ModelAssetName]
export type FileUploadSetting = {
isHalf: boolean
uploaded: boolean
defaultTune: number
defaultIndexRatio: number
defaultProtect: number
framework: Framework
params: string
mmvcv13Config: ModelData | null
@ -52,12 +47,7 @@ export type FileUploadSetting = {
}
export const InitialFileUploadSetting: FileUploadSetting = {
isHalf: true,
uploaded: false,
defaultTune: 0,
defaultIndexRatio: 1,
defaultProtect: 0.5,
framework: Framework.PyTorch,
params: "{}",
mmvcv13Config: null,
@ -400,43 +390,39 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
}
// const configFileName = fileUploadSetting.configFile?.filename || "-"
const files: { kind: string, name: string }[] = []
if (fileUploadSetting.mmvcv13Config?.filename) files.push({ kind: "mmvcv13Config", name: fileUploadSetting.mmvcv13Config.filename })
if (fileUploadSetting.mmvcv13Model?.filename) files.push({ kind: "mmvcv13Model", name: fileUploadSetting.mmvcv13Model.filename })
if (fileUploadSetting.mmvcv15Config?.filename) files.push({ kind: "mmvcv15Config", name: fileUploadSetting.mmvcv15Config.filename })
if (fileUploadSetting.mmvcv15Model?.filename) files.push({ kind: "mmvcv15Model", name: fileUploadSetting.mmvcv15Model.filename })
if (fileUploadSetting.soVitsSvc40Config?.filename) files.push({ kind: "soVitsSvc40Config", name: fileUploadSetting.soVitsSvc40Config.filename })
if (fileUploadSetting.soVitsSvc40Model?.filename) files.push({ kind: "soVitsSvc40Model", name: fileUploadSetting.soVitsSvc40Model.filename })
if (fileUploadSetting.soVitsSvc40Cluster?.filename) files.push({ kind: "soVitsSvc40Cluster", name: fileUploadSetting.soVitsSvc40Cluster.filename })
if (fileUploadSetting.rvcModel?.filename) files.push({ kind: "rvcModel", name: fileUploadSetting.rvcModel.filename })
if (fileUploadSetting.rvcIndex?.filename) files.push({ kind: "rvcIndex", name: fileUploadSetting.rvcIndex.filename })
if (fileUploadSetting.ddspSvcModel?.filename) files.push({ kind: "ddspSvcModel", name: fileUploadSetting.ddspSvcModel.filename })
if (fileUploadSetting.ddspSvcModelConfig?.filename) files.push({ kind: "ddspSvcModelConfig", name: fileUploadSetting.ddspSvcModelConfig.filename })
if (fileUploadSetting.ddspSvcDiffusion?.filename) files.push({ kind: "ddspSvcDiffusion", name: fileUploadSetting.ddspSvcDiffusion.filename })
if (fileUploadSetting.ddspSvcDiffusionConfig?.filename) files.push({ kind: "ddspSvcDiffusionConfig", name: fileUploadSetting.ddspSvcDiffusionConfig.filename })
const params = JSON.stringify({
defaultTune: fileUploadSetting.defaultTune || 0,
defaultIndexRatio: fileUploadSetting.defaultIndexRatio || 1,
defaultProtect: fileUploadSetting.defaultProtect || 0.5,
sampleId: fileUploadSetting.isSampleMode ? fileUploadSetting.sampleId || "" : "",
rvcIndexDownload: fileUploadSetting.rvcIndexDownload || false,
files: fileUploadSetting.isSampleMode ? {} : {
mmvcv13Config: fileUploadSetting.mmvcv13Config?.filename || "",
mmvcv13Model: fileUploadSetting.mmvcv13Model?.filename || "",
mmvcv15Config: fileUploadSetting.mmvcv15Config?.filename || "",
mmvcv15Model: fileUploadSetting.mmvcv15Model?.filename || "",
soVitsSvc40Config: fileUploadSetting.soVitsSvc40Config?.filename || "",
soVitsSvc40Model: fileUploadSetting.soVitsSvc40Model?.filename || "",
soVitsSvc40Cluster: fileUploadSetting.soVitsSvc40Cluster?.filename || "",
soVitsSvc40v2Config: fileUploadSetting.soVitsSvc40v2Config?.filename || "",
soVitsSvc40v2Model: fileUploadSetting.soVitsSvc40v2Model?.filename || "",
soVitsSvc40v2Cluster: fileUploadSetting.soVitsSvc40v2Cluster?.filename || "",
rvcModel: fileUploadSetting.rvcModel?.filename || "",
rvcIndex: fileUploadSetting.rvcIndex?.filename || "",
rvcFeature: fileUploadSetting.rvcFeature?.filename || "",
ddspSvcModel: fileUploadSetting.ddspSvcModel?.filename ? "ddsp_mod/" + fileUploadSetting.ddspSvcModel?.filename : "",
ddspSvcModelConfig: fileUploadSetting.ddspSvcModelConfig?.filename ? "ddsp_mod/" + fileUploadSetting.ddspSvcModelConfig?.filename : "",
ddspSvcDiffusion: fileUploadSetting.ddspSvcDiffusion?.filename ? "ddsp_diff/" + fileUploadSetting.ddspSvcDiffusion?.filename : "",
ddspSvcDiffusionConfig: fileUploadSetting.ddspSvcDiffusionConfig?.filename ? "ddsp_diff/" + fileUploadSetting.ddspSvcDiffusionConfig.filename : "",
}
files: fileUploadSetting.isSampleMode ? [] : files
})
if (fileUploadSetting.isHalf == undefined) {
fileUploadSetting.isHalf = false
}
console.log("PARAMS:", params)
const voiceChangerType = VoiceChangerType.RVC
const loadPromise = props.voiceChangerClient.loadModel(
slot,
fileUploadSetting.isHalf,
voiceChangerType,
params,
)
@ -460,12 +446,7 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
const storeToCache = (slot: number, fileUploadSetting: FileUploadSetting) => {
try {
const saveData: FileUploadSetting = {
isHalf: fileUploadSetting.isHalf, // キャッシュとしては不使用。guiで上書きされる。
uploaded: false, // キャッシュから読み込まれるときには、まだuploadされていないから。
defaultTune: fileUploadSetting.defaultTune,
defaultIndexRatio: fileUploadSetting.defaultIndexRatio,
defaultProtect: fileUploadSetting.defaultProtect,
framework: fileUploadSetting.framework,
params: fileUploadSetting.params,
mmvcv13Config: fileUploadSetting.mmvcv13Config ? { data: fileUploadSetting.mmvcv13Config.data, filename: fileUploadSetting.mmvcv13Config.filename } : null,

View File

@ -8,9 +8,10 @@
"editor.defaultFormatter": null, // Prettier 使
"editor.formatOnSave": true //
},
"python.formatting.blackArgs": ["--line-length", "550"],
"flake8.args": [
"--ignore=E501,E402,E722,E741,E203,W503"
// "--max-line-length=150",
// "--max-line-length=150"
// "--max-complexity=20"
]
}

View File

@ -3,9 +3,7 @@ class NoModeLoadedException(Exception):
self.framework = framework
def __str__(self):
return repr(
f"No model for {self.framework} loaded. Please confirm the model uploaded."
)
return repr(f"No model for {self.framework} loaded. Please confirm the model uploaded.")
class HalfPrecisionChangingException(Exception):
@ -36,3 +34,17 @@ class DeviceCannotSupportHalfPrecisionException(Exception):
class VoiceChangerIsNotSelectedException(Exception):
def __str__(self):
return repr("Voice Changer is not selected.")
class SlotConfigNotFoundException(Exception):
def __init__(self, modelDir, slotIndex):
self.modelDir = modelDir
self.slotIndex = slotIndex
def __str__(self):
return repr(f"Config for slot {self.slotIndex} is not found. (modelDir:{self.modelDir})")
class WeightDownladException(Exception):
def __str__(self):
return repr("Failed to download weight.")

View File

@ -1,4 +1,3 @@
from concurrent.futures import ThreadPoolExecutor
import sys
from distutils.util import strtobool
@ -7,24 +6,24 @@ import socket
import platform
import os
import argparse
from Downloader import download, download_no_tqdm
from voice_changer.RVC.SampleDownloader import (
checkRvcModelExist,
downloadInitialSampleModels,
)
from Exceptions import WeightDownladException
from utils.downloader.SampleDownloader import downloadInitialSamples
from utils.downloader.WeightDownloader import downloadWeight
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
import uvicorn
from mods.ssl import create_self_signed_cert
from voice_changer.VoiceChangerManager import VoiceChangerManager
from sio.MMVC_SocketIOApp import MMVC_SocketIOApp
from restapi.MMVC_Rest import MMVC_Rest
from const import (
NATIVE_CLIENT_FILE_MAC,
NATIVE_CLIENT_FILE_WIN,
SSL_KEY_DIR,
getRVCSampleJsonAndModelIds,
)
import subprocess
import multiprocessing as mp
@ -35,56 +34,23 @@ setup_loggers()
def setupArgParser():
parser = argparse.ArgumentParser()
parser.add_argument(
"--logLevel",
type=str,
default="critical",
help="Log level info|critical. (default: critical)",
)
parser.add_argument("--logLevel", type=str, default="critical", help="Log level info|critical. (default: critical)")
parser.add_argument("-p", type=int, default=18888, help="port")
parser.add_argument("--https", type=strtobool, default=False, help="use https")
parser.add_argument(
"--httpsKey", type=str, default="ssl.key", help="path for the key of https"
)
parser.add_argument(
"--httpsCert", type=str, default="ssl.cert", help="path for the cert of https"
)
parser.add_argument(
"--httpsSelfSigned",
type=strtobool,
default=True,
help="generate self-signed certificate",
)
parser.add_argument("--httpsKey", type=str, default="ssl.key", help="path for the key of https")
parser.add_argument("--httpsCert", type=str, default="ssl.cert", help="path for the cert of https")
parser.add_argument("--httpsSelfSigned", type=strtobool, default=True, help="generate self-signed certificate")
parser.add_argument("--model_dir", type=str, help="path to model files")
parser.add_argument(
"--rvc_sample_mode", type=str, default="production", help="rvc_sample_mode"
)
parser.add_argument("--sample_mode", type=str, default="production", help="sample_mode")
parser.add_argument(
"--content_vec_500", type=str, help="path to content_vec_500 model(pytorch)"
)
parser.add_argument(
"--content_vec_500_onnx", type=str, help="path to content_vec_500 model(onnx)"
)
parser.add_argument(
"--content_vec_500_onnx_on",
type=strtobool,
default=False,
help="use or not onnx for content_vec_500",
)
parser.add_argument(
"--hubert_base", type=str, help="path to hubert_base model(pytorch)"
)
parser.add_argument(
"--hubert_base_jp", type=str, help="path to hubert_base_jp model(pytorch)"
)
parser.add_argument(
"--hubert_soft", type=str, help="path to hubert_soft model(pytorch)"
)
parser.add_argument(
"--nsf_hifigan", type=str, help="path to nsf_hifigan model(pytorch)"
)
parser.add_argument("--content_vec_500", type=str, help="path to content_vec_500 model(pytorch)")
parser.add_argument("--content_vec_500_onnx", type=str, help="path to content_vec_500 model(onnx)")
parser.add_argument("--content_vec_500_onnx_on", type=strtobool, default=False, help="use or not onnx for content_vec_500")
parser.add_argument("--hubert_base", type=str, help="path to hubert_base model(pytorch)")
parser.add_argument("--hubert_base_jp", type=str, help="path to hubert_base_jp model(pytorch)")
parser.add_argument("--hubert_soft", type=str, help="path to hubert_soft model(pytorch)")
parser.add_argument("--nsf_hifigan", type=str, help="path to nsf_hifigan model(pytorch)")
return parser
@ -111,76 +77,19 @@ def printMessage(message, level=0):
print(f"\033[47m {message}\033[0m")
def downloadWeight():
# content_vec_500 = (args.content_vec_500,)
# content_vec_500_onnx = (args.content_vec_500_onnx,)
# content_vec_500_onnx_on = (args.content_vec_500_onnx_on,)
hubert_base = args.hubert_base
hubert_base_jp = args.hubert_base_jp
hubert_soft = args.hubert_soft
nsf_hifigan = args.nsf_hifigan
# file exists check (currently only for rvc)
downloadParams = []
if os.path.exists(hubert_base) is False:
downloadParams.append(
{
"url": "https://huggingface.co/ddPn08/rvc-webui-models/resolve/main/embeddings/hubert_base.pt",
"saveTo": hubert_base,
"position": 0,
}
)
if os.path.exists(hubert_base_jp) is False:
downloadParams.append(
{
"url": "https://huggingface.co/rinna/japanese-hubert-base/resolve/main/fairseq/model.pt",
"saveTo": hubert_base_jp,
"position": 1,
}
)
if os.path.exists(hubert_soft) is False:
downloadParams.append(
{
"url": "https://huggingface.co/wok000/weights/resolve/main/ddsp-svc30/embedder/hubert-soft-0d54a1f4.pt",
"saveTo": hubert_soft,
"position": 2,
}
)
if os.path.exists(nsf_hifigan) is False:
downloadParams.append(
{
"url": "https://huggingface.co/wok000/weights/resolve/main/ddsp-svc30/nsf_hifigan_20221211/model.bin",
"saveTo": nsf_hifigan,
"position": 3,
}
)
nsf_hifigan_config = os.path.join(os.path.dirname(nsf_hifigan), "config.json")
if os.path.exists(nsf_hifigan_config) is False:
downloadParams.append(
{
"url": "https://huggingface.co/wok000/weights/raw/main/ddsp-svc30/nsf_hifigan_20221211/config.json",
"saveTo": nsf_hifigan_config,
"position": 4,
}
)
with ThreadPoolExecutor() as pool:
pool.map(download, downloadParams)
if (
os.path.exists(hubert_base) is False
or os.path.exists(hubert_base_jp) is False
or os.path.exists(hubert_soft) is False
or os.path.exists(nsf_hifigan) is False
or os.path.exists(nsf_hifigan_config) is False
):
printMessage("RVC用のモデルファイルのダウンロードに失敗しました。", level=2)
printMessage("failed to download weight for rvc", level=2)
parser = setupArgParser()
args, unknown = parser.parse_known_args()
voiceChangerParams = VoiceChangerParams(
model_dir=args.model_dir,
content_vec_500=args.content_vec_500,
content_vec_500_onnx=args.content_vec_500_onnx,
content_vec_500_onnx_on=args.content_vec_500_onnx_on,
hubert_base=args.hubert_base,
hubert_base_jp=args.hubert_base_jp,
hubert_soft=args.hubert_soft,
nsf_hifigan=args.nsf_hifigan,
sample_mode=args.sample_mode,
)
printMessage(f"Booting PHASE :{__name__}", level=2)
@ -199,24 +108,6 @@ def localServer(logLevel: str = "critical"):
if __name__ == "MMVCServerSIO":
mp.freeze_support()
voiceChangerParams = VoiceChangerParams(
model_dir=args.model_dir,
content_vec_500=args.content_vec_500,
content_vec_500_onnx=args.content_vec_500_onnx,
content_vec_500_onnx_on=args.content_vec_500_onnx_on,
hubert_base=args.hubert_base,
hubert_base_jp=args.hubert_base_jp,
hubert_soft=args.hubert_soft,
nsf_hifigan=args.nsf_hifigan,
rvc_sample_mode=args.rvc_sample_mode,
)
if (
os.path.exists(voiceChangerParams.hubert_base) is False
or os.path.exists(voiceChangerParams.hubert_base_jp) is False
):
printMessage("RVC用のモデルファイルのダウンロードに失敗しました。", level=2)
printMessage("failed to download weight for rvc", level=2)
voiceChangerManager = VoiceChangerManager.get_instance(voiceChangerParams)
app_fastapi = MMVC_Rest.get_instance(voiceChangerManager, voiceChangerParams)
@ -230,20 +121,16 @@ if __name__ == "__main__":
mp.freeze_support()
printMessage("Voice Changerを起動しています。", level=2)
# ダウンロード
downloadWeight()
os.makedirs(args.model_dir, exist_ok=True)
# ダウンロード(Weight)
try:
sampleJsons = []
sampleJsonUrls, sampleModels = getRVCSampleJsonAndModelIds(args.rvc_sample_mode)
for url in sampleJsonUrls:
filename = os.path.basename(url)
download_no_tqdm({"url": url, "saveTo": filename, "position": 0})
sampleJsons.append(filename)
if checkRvcModelExist(args.model_dir) is False:
downloadInitialSampleModels(sampleJsons, sampleModels, args.model_dir)
downloadWeight(voiceChangerParams)
except WeightDownladException:
printMessage("RVC用のモデルファイルのダウンロードに失敗しました。", level=2)
printMessage("failed to download weight for rvc", level=2)
# ダウンロード(Sample)
try:
downloadInitialSamples(args.sample_mode, args.model_dir)
except Exception as e:
print("[Voice Changer] loading sample failed", e)
@ -280,9 +167,7 @@ if __name__ == "__main__":
)
key_path = os.path.join(SSL_KEY_DIR, keyname)
cert_path = os.path.join(SSL_KEY_DIR, certname)
printMessage(
f"protocol: HTTPS(self-signed), key:{key_path}, cert:{cert_path}", level=1
)
printMessage(f"protocol: HTTPS(self-signed), key:{key_path}, cert:{cert_path}", level=1)
elif args.https and args.httpsSelfSigned == 0:
# HTTPS
@ -336,16 +221,12 @@ if __name__ == "__main__":
p.start()
try:
if sys.platform.startswith("win"):
process = subprocess.Popen(
[NATIVE_CLIENT_FILE_WIN, "-u", f"http://localhost:{PORT}/"]
)
process = subprocess.Popen([NATIVE_CLIENT_FILE_WIN, "-u", f"http://localhost:{PORT}/"])
return_code = process.wait()
print("client closed.")
p.terminate()
elif sys.platform.startswith("darwin"):
process = subprocess.Popen(
[NATIVE_CLIENT_FILE_MAC, "-u", f"http://localhost:{PORT}/"]
)
process = subprocess.Popen([NATIVE_CLIENT_FILE_MAC, "-u", f"http://localhost:{PORT}/"])
return_code = process.wait()
print("client closed.")
p.terminate()

View File

@ -1,44 +0,0 @@
from dataclasses import dataclass, field
import json
from const import ModelType
@dataclass
class RVCModelSample:
id: str = ""
lang: str = ""
tag: list[str] = field(default_factory=lambda: [])
name: str = ""
modelUrl: str = ""
indexUrl: str = ""
termsOfUseUrl: str = ""
icon: str = ""
credit: str = ""
description: str = ""
sampleRate: int = 48000
modelType: str = ""
f0: bool = True
def getModelSamples(jsonFiles: list[str], modelType: ModelType):
try:
samples: list[RVCModelSample] = []
for file in jsonFiles:
with open(file, "r", encoding="utf-8") as f:
jsonDict = json.load(f)
modelList = jsonDict[modelType]
if modelType == "RVC":
for s in modelList:
modelSample = RVCModelSample(**s)
samples.append(modelSample)
else:
raise RuntimeError(f"Unknown model type {modelType}")
return samples
except Exception as e:
print("[Voice Changer] loading sample info error:", e)
return None

View File

@ -5,33 +5,21 @@ import tempfile
from typing import Literal, TypeAlias
ModelType: TypeAlias = Literal[
"MMVCv15",
VoiceChangerType: TypeAlias = Literal[
"MMVCv13",
"so-vits-svc-40v2",
"MMVCv15",
"so-vits-svc-40",
"so-vits-svc-40_c",
"DDSP-SVC",
"RVC",
]
ERROR_NO_ONNX_SESSION = "ERROR_NO_ONNX_SESSION"
tmpdir = tempfile.TemporaryDirectory()
# print("generate tmpdir:::",tmpdir)
SSL_KEY_DIR = os.path.join(tmpdir.name, "keys") if hasattr(sys, "_MEIPASS") else "keys"
MODEL_DIR = os.path.join(tmpdir.name, "logs") if hasattr(sys, "_MEIPASS") else "logs"
UPLOAD_DIR = (
os.path.join(tmpdir.name, "upload_dir")
if hasattr(sys, "_MEIPASS")
else "upload_dir"
)
NATIVE_CLIENT_FILE_WIN = (
os.path.join(sys._MEIPASS, "voice-changer-native-client.exe") # type: ignore
if hasattr(sys, "_MEIPASS")
else "voice-changer-native-client"
)
UPLOAD_DIR = os.path.join(tmpdir.name, "upload_dir") if hasattr(sys, "_MEIPASS") else "upload_dir"
NATIVE_CLIENT_FILE_WIN = os.path.join(sys._MEIPASS, "voice-changer-native-client.exe") if hasattr(sys, "_MEIPASS") else "voice-changer-native-client" # type: ignore
NATIVE_CLIENT_FILE_MAC = (
os.path.join(
sys._MEIPASS, # type: ignore
@ -44,25 +32,12 @@ NATIVE_CLIENT_FILE_MAC = (
else "voice-changer-native-client"
)
HUBERT_ONNX_MODEL_PATH = (
os.path.join(sys._MEIPASS, "model_hubert/hubert_simple.onnx") # type: ignore
if hasattr(sys, "_MEIPASS")
else "model_hubert/hubert_simple.onnx"
)
TMP_DIR = (
os.path.join(tmpdir.name, "tmp_dir") if hasattr(sys, "_MEIPASS") else "tmp_dir"
)
TMP_DIR = os.path.join(tmpdir.name, "tmp_dir") if hasattr(sys, "_MEIPASS") else "tmp_dir"
os.makedirs(TMP_DIR, exist_ok=True)
def getFrontendPath():
frontend_path = (
os.path.join(sys._MEIPASS, "dist")
if hasattr(sys, "_MEIPASS")
else "../client/demo/dist"
)
frontend_path = os.path.join(sys._MEIPASS, "dist") if hasattr(sys, "_MEIPASS") else "../client/demo/dist"
return frontend_path
@ -100,84 +75,84 @@ class ServerAudioDeviceTypes(Enum):
audiooutput = "audiooutput"
class RVCSampleMode(Enum):
production = "production"
testOfficial = "testOfficial"
testDDPNTorch = "testDDPNTorch"
testDDPNONNX = "testDDPNONNX"
testONNXFull = "testONNXFull"
RVCSampleMode: TypeAlias = Literal[
"production",
"testOfficial",
"testDDPNTorch",
"testDDPNONNX",
"testONNXFull",
]
def getRVCSampleJsonAndModelIds(mode: RVCSampleMode):
if mode == RVCSampleMode.production.value:
def getSampleJsonAndModelIds(mode: RVCSampleMode):
if mode == "production":
return [
# "https://huggingface.co/wok000/vcclient_model/raw/main/samples_0001.json",
# "https://huggingface.co/wok000/vcclient_model/raw/main/samples_0002.json",
"https://huggingface.co/wok000/vcclient_model/raw/main/samples_0003_t.json",
"https://huggingface.co/wok000/vcclient_model/raw/main/samples_0003_o.json",
"https://huggingface.co/wok000/vcclient_model/raw/main/samples_0003_t2.json",
"https://huggingface.co/wok000/vcclient_model/raw/main/samples_0003_o2.json",
], [
("TokinaShigure_o", True),
("KikotoMahiro_o", False),
("Amitaro_o", False),
("Tsukuyomi-chan_o", False),
("TokinaShigure_o", {"useIndex": True}),
("KikotoMahiro_o", {"useIndex": False}),
("Amitaro_o", {"useIndex": False}),
("Tsukuyomi-chan_o", {"useIndex": False}),
]
elif mode == RVCSampleMode.testOfficial.value:
elif mode == "testOfficial":
return [
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_official_v1_v2.json",
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_ddpn_v1_v2.json",
], [
("test-official-v1-f0-48k-l9-hubert_t", True),
("test-official-v1-nof0-48k-l9-hubert_t", False),
("test-official-v2-f0-40k-l12-hubert_t", False),
("test-official-v2-nof0-40k-l12-hubert_t", False),
("test-official-v1-f0-48k-l9-hubert_o", True),
("test-official-v1-nof0-48k-l9-hubert_o", False),
("test-official-v2-f0-40k-l12-hubert_o", False),
("test-official-v2-nof0-40k-l12-hubert_o", False),
("test-official-v1-f0-48k-l9-hubert_t", {"useIndex": True}),
("test-official-v1-nof0-48k-l9-hubert_t", {"useIndex": False}),
("test-official-v2-f0-40k-l12-hubert_t", {"useIndex": False}),
("test-official-v2-nof0-40k-l12-hubert_t", {"useIndex": False}),
("test-official-v1-f0-48k-l9-hubert_o", {"useIndex": True}),
("test-official-v1-nof0-48k-l9-hubert_o", {"useIndex": False}),
("test-official-v2-f0-40k-l12-hubert_o", {"useIndex": False}),
("test-official-v2-nof0-40k-l12-hubert_o", {"useIndex": False}),
]
elif mode == RVCSampleMode.testDDPNTorch.value:
elif mode == "testDDPNTorch":
return [
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_official_v1_v2.json",
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_ddpn_v1_v2.json",
], [
("test-ddpn-v1-f0-48k-l9-hubert_t", False),
("test-ddpn-v1-nof0-48k-l9-hubert_t", False),
("test-ddpn-v2-f0-40k-l12-hubert_t", False),
("test-ddpn-v2-nof0-40k-l12-hubert_t", False),
("test-ddpn-v2-f0-40k-l12-hubert_jp_t", False),
("test-ddpn-v2-nof0-40k-l12-hubert_jp_t", False),
("test-ddpn-v1-f0-48k-l9-hubert_t", {"useIndex": False}),
("test-ddpn-v1-nof0-48k-l9-hubert_t", {"useIndex": False}),
("test-ddpn-v2-f0-40k-l12-hubert_t", {"useIndex": False}),
("test-ddpn-v2-nof0-40k-l12-hubert_t", {"useIndex": False}),
("test-ddpn-v2-f0-40k-l12-hubert_jp_t", {"useIndex": False}),
("test-ddpn-v2-nof0-40k-l12-hubert_jp_t", {"useIndex": False}),
]
elif mode == RVCSampleMode.testDDPNONNX.value:
elif mode == "testDDPNONNX":
return [
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_official_v1_v2.json",
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_ddpn_v1_v2.json",
], [
("test-ddpn-v1-f0-48k-l9-hubert_o", False),
("test-ddpn-v1-nof0-48k-l9-hubert_o", False),
("test-ddpn-v2-f0-40k-l12-hubert_o", False),
("test-ddpn-v2-nof0-40k-l12-hubert_o", False),
("test-ddpn-v2-f0-40k-l12-hubert_jp_o", False),
("test-ddpn-v2-nof0-40k-l12-hubert_jp_o", False),
("test-ddpn-v1-f0-48k-l9-hubert_o", {"useIndex": False}),
("test-ddpn-v1-nof0-48k-l9-hubert_o", {"useIndex": False}),
("test-ddpn-v2-f0-40k-l12-hubert_o", {"useIndex": False}),
("test-ddpn-v2-nof0-40k-l12-hubert_o", {"useIndex": False}),
("test-ddpn-v2-f0-40k-l12-hubert_jp_o", {"useIndex": False}),
("test-ddpn-v2-nof0-40k-l12-hubert_jp_o", {"useIndex": False}),
]
elif mode == RVCSampleMode.testONNXFull.value:
elif mode == "testONNXFull":
return [
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_official_v1_v2.json",
"https://huggingface.co/wok000/vcclient_model/raw/main/test/test_ddpn_v1_v2.json",
], [
("test-official-v1-f0-48k-l9-hubert_o_full", False),
("test-official-v1-nof0-48k-l9-hubert_o_full", False),
("test-official-v2-f0-40k-l12-hubert_o_full", False),
("test-official-v2-nof0-40k-l12-hubert_o_full", False),
("test-ddpn-v1-f0-48k-l9-hubert_o_full", False),
("test-ddpn-v1-nof0-48k-l9-hubert_o_full", False),
("test-ddpn-v2-f0-40k-l12-hubert_o_full", False),
("test-ddpn-v2-nof0-40k-l12-hubert_o_full", False),
("test-ddpn-v2-f0-40k-l12-hubert_jp_o_full", False),
("test-ddpn-v2-nof0-40k-l12-hubert_jp_o_full", False),
("test-official-v1-f0-48k-l9-hubert_o_full", {"useIndex": False}),
("test-official-v1-nof0-48k-l9-hubert_o_full", {"useIndex": False}),
("test-official-v2-f0-40k-l12-hubert_o_full", {"useIndex": False}),
("test-official-v2-nof0-40k-l12-hubert_o_full", {"useIndex": False}),
("test-ddpn-v1-f0-48k-l9-hubert_o_full", {"useIndex": False}),
("test-ddpn-v1-nof0-48k-l9-hubert_o_full", {"useIndex": False}),
("test-ddpn-v2-f0-40k-l12-hubert_o_full", {"useIndex": False}),
("test-ddpn-v2-nof0-40k-l12-hubert_o_full", {"useIndex": False}),
("test-ddpn-v2-f0-40k-l12-hubert_jp_o_full", {"useIndex": False}),
("test-ddpn-v2-nof0-40k-l12-hubert_jp_o_full", {"useIndex": False}),
]
else:
return [], []
RVC_MODEL_DIRNAME = "rvc"
RVC_MAX_SLOT_NUM = 10
MAX_SLOT_NUM = 10

View File

@ -0,0 +1,37 @@
from dataclasses import dataclass, field
from typing import TypeAlias, Union, Any
from const import VoiceChangerType
@dataclass
class ModelSample:
id: str = ""
voiceChangerType: VoiceChangerType | None = None
@dataclass
class RVCModelSample(ModelSample):
voiceChangerType: VoiceChangerType = "RVC"
lang: str = ""
tag: list[str] = field(default_factory=lambda: [])
name: str = ""
modelUrl: str = ""
indexUrl: str = ""
termsOfUseUrl: str = ""
icon: str = ""
credit: str = ""
description: str = ""
sampleRate: int = 48000
modelType: str = ""
f0: bool = True
ModelSamples: TypeAlias = Union[ModelSample, RVCModelSample]
def generateModelSample(params: Any) -> ModelSamples:
if params["voiceChangerType"] == "RVC":
return RVCModelSample(**params)
else:
return ModelSample(**{k: v for k, v in params.items() if k in ModelSample.__annotations__})

59
server/data/ModelSlot.py Normal file
View File

@ -0,0 +1,59 @@
from typing import TypeAlias, Union
from const import EnumInferenceTypes, EnumEmbedderTypes, VoiceChangerType
from dataclasses import dataclass, asdict
import os
import json
@dataclass
class ModelSlot:
voiceChangerType: VoiceChangerType | None = None
@dataclass
class RVCModelSlot(ModelSlot):
voiceChangerType: VoiceChangerType = "RVC"
modelFile: str = ""
indexFile: str = ""
defaultTune: int = 0
defaultIndexRatio: int = 1
defaultProtect: float = 0.5
isONNX: bool = False
modelType: str = EnumInferenceTypes.pyTorchRVC.value
samplingRate: int = -1
f0: bool = True
embChannels: int = 256
embOutputLayer: int = 9
useFinalProj: bool = True
deprecated: bool = False
embedder: str = EnumEmbedderTypes.hubert.value
name: str = ""
description: str = ""
credit: str = ""
termsOfUseUrl: str = ""
sampleId: str = ""
iconFile: str = ""
ModelSlots: TypeAlias = Union[ModelSlot, RVCModelSlot]
def loadSlotInfo(model_dir: str, slotIndex: int) -> ModelSlots:
slotDir = os.path.join(model_dir, str(slotIndex))
jsonFile = os.path.join(slotDir, "params.json")
if not os.path.exists(jsonFile):
return ModelSlot()
jsonDict = json.load(open(os.path.join(slotDir, "params.json")))
slotInfo = ModelSlot(**{k: v for k, v in jsonDict.items() if k in ModelSlot.__annotations__})
if slotInfo.voiceChangerType == "RVC":
return RVCModelSlot(**jsonDict)
else:
return ModelSlot()
def saveSlotInfo(model_dir: str, slotIndex: int, slotInfo: ModelSlots):
slotDir = os.path.join(model_dir, str(slotIndex))
json.dump(asdict(slotInfo), open(os.path.join(slotDir, "params.json"), "w"))

View File

@ -69,19 +69,14 @@ class MMVC_Rest:
StaticFiles(directory=f"{getFrontendPath()}", html=True),
name="static",
)
app_fastapi.mount(
"/tmp", StaticFiles(directory=f"{TMP_DIR}"), name="static"
)
app_fastapi.mount(
"/upload_dir", StaticFiles(directory=f"{UPLOAD_DIR}"), name="static"
)
app_fastapi.mount("/tmp", StaticFiles(directory=f"{TMP_DIR}"), name="static")
app_fastapi.mount("/upload_dir", StaticFiles(directory=f"{UPLOAD_DIR}"), name="static")
if sys.platform.startswith("darwin"):
p1 = os.path.dirname(sys._MEIPASS)
p2 = os.path.dirname(p1)
p3 = os.path.dirname(p2)
model_dir = os.path.join(p3, voiceChangerParams.model_dir)
print("mac model_dir:", model_dir)
app_fastapi.mount(
f"/{voiceChangerParams.model_dir}",
StaticFiles(directory=model_dir),

View File

@ -1,6 +1,4 @@
import json
import os
import shutil
from typing import Union
from fastapi import APIRouter
from fastapi.encoders import jsonable_encoder
@ -10,8 +8,7 @@ from fastapi import UploadFile, File, Form
from restapi.mods.FileUploader import upload_file, concat_file_chunks
from voice_changer.VoiceChangerManager import VoiceChangerManager
from const import MODEL_DIR, UPLOAD_DIR, ModelType
from voice_changer.utils.LoadModelParams import LoadModelParams
from const import MODEL_DIR, UPLOAD_DIR, VoiceChangerType
os.makedirs(UPLOAD_DIR, exist_ok=True)
@ -24,123 +21,130 @@ class MMVC_Rest_Fileuploader:
self.router = APIRouter()
self.router.add_api_route("/info", self.get_info, methods=["GET"])
self.router.add_api_route("/performance", self.get_performance, methods=["GET"])
self.router.add_api_route(
"/upload_file", self.post_upload_file, methods=["POST"]
)
self.router.add_api_route(
"/concat_uploaded_file", self.post_concat_uploaded_file, methods=["POST"]
)
self.router.add_api_route(
"/update_settings", self.post_update_settings, methods=["POST"]
)
self.router.add_api_route("/upload_file", self.post_upload_file, methods=["POST"])
self.router.add_api_route("/concat_uploaded_file", self.post_concat_uploaded_file, methods=["POST"])
self.router.add_api_route("/update_settings", self.post_update_settings, methods=["POST"])
self.router.add_api_route("/load_model", self.post_load_model, methods=["POST"])
self.router.add_api_route("/model_type", self.post_model_type, methods=["POST"])
self.router.add_api_route("/model_type", self.get_model_type, methods=["GET"])
# self.router.add_api_route("/model_type", self.get_model_type, methods=["GET"])
self.router.add_api_route("/onnx", self.get_onnx, methods=["GET"])
self.router.add_api_route(
"/merge_model", self.post_merge_models, methods=["POST"]
)
self.router.add_api_route(
"/update_model_default", self.post_update_model_default, methods=["POST"]
)
self.router.add_api_route(
"/update_model_info", self.post_update_model_info, methods=["POST"]
)
self.router.add_api_route(
"/upload_model_assets", self.post_upload_model_assets, methods=["POST"]
)
self.router.add_api_route("/merge_model", self.post_merge_models, methods=["POST"])
self.router.add_api_route("/update_model_default", self.post_update_model_default, methods=["POST"])
self.router.add_api_route("/update_model_info", self.post_update_model_info, methods=["POST"])
self.router.add_api_route("/upload_model_assets", self.post_upload_model_assets, methods=["POST"])
def post_upload_file(self, file: UploadFile = File(...), filename: str = Form(...)):
try:
res = upload_file(UPLOAD_DIR, file, filename)
json_compatible_item_data = jsonable_encoder(res)
return JSONResponse(content=json_compatible_item_data)
except Exception as e:
print("[Voice Changer] ", e)
def post_concat_uploaded_file(
self, filename: str = Form(...), filenameChunkNum: int = Form(...)
):
def post_concat_uploaded_file(self, filename: str = Form(...), filenameChunkNum: int = Form(...)):
try:
res = concat_file_chunks(UPLOAD_DIR, filename, filenameChunkNum, UPLOAD_DIR)
json_compatible_item_data = jsonable_encoder(res)
return JSONResponse(content=json_compatible_item_data)
except Exception as e:
print("[Voice Changer] ", e)
def get_info(self):
try:
info = self.voiceChangerManager.get_info()
json_compatible_item_data = jsonable_encoder(info)
return JSONResponse(content=json_compatible_item_data)
except Exception as e:
print("[Voice Changer] ", e)
def get_performance(self):
try:
info = self.voiceChangerManager.get_performance()
json_compatible_item_data = jsonable_encoder(info)
return JSONResponse(content=json_compatible_item_data)
except Exception as e:
print("[Voice Changer] ", e)
def post_update_settings(
self, key: str = Form(...), val: Union[int, str, float] = Form(...)
):
def post_update_settings(self, key: str = Form(...), val: Union[int, str, float] = Form(...)):
try:
print("[Voice Changer] update configuration:", key, val)
info = self.voiceChangerManager.update_settings(key, val)
json_compatible_item_data = jsonable_encoder(info)
return JSONResponse(content=json_compatible_item_data)
except Exception as e:
print("[Voice Changer] ", e)
def post_load_model(
self,
slot: int = Form(...),
isHalf: bool = Form(...),
voiceChangerType: str = Form(...),
params: str = Form(...),
):
paramDict = json.loads(params)
# print("paramDict", paramDict)
# Change Filepath
newFilesDict = {}
for key, val in paramDict["files"].items():
if val != "-" and val != "":
uploadPath = os.path.join(UPLOAD_DIR, val)
storePath = os.path.join(UPLOAD_DIR, f"{slot}", val)
storeDir = os.path.dirname(storePath)
os.makedirs(storeDir, exist_ok=True)
shutil.move(uploadPath, storePath)
newFilesDict[key] = storePath
paramDict["files"] = newFilesDict
props: LoadModelParams = LoadModelParams(
slot=slot, isHalf=isHalf, params=paramDict
)
info = self.voiceChangerManager.loadModel(props)
try:
info = self.voiceChangerManager.loadModel(slot, voiceChangerType, params)
json_compatible_item_data = jsonable_encoder(info)
return JSONResponse(content=json_compatible_item_data)
except Exception as e:
print("[Voice Changer] ", e)
def post_model_type(self, modelType: ModelType = Form(...)):
info = self.voiceChangerManager.switchModelType(modelType)
json_compatible_item_data = jsonable_encoder(info)
def post_model_type(self, modelType: VoiceChangerType = Form(...)):
try:
# info = self.voiceChangerManager.switchModelType(modelType)
# json_compatible_item_data = jsonable_encoder(info)
json_compatible_item_data = jsonable_encoder({"status": "ok"})
return JSONResponse(content=json_compatible_item_data)
except Exception as e:
print("[Voice Changer] ", e)
def get_model_type(self):
info = self.voiceChangerManager.getModelType()
json_compatible_item_data = jsonable_encoder(info)
return JSONResponse(content=json_compatible_item_data)
# def get_model_type(self):
# try:
# # info = self.voiceChangerManager.getModelType()
# # json_compatible_item_data = jsonable_encoder(info)
# print(
# "-------------- get_model_type",
# )
# json_compatible_item_data = jsonable_encoder({"status": "ok"})
# return JSONResponse(content=json_compatible_item_data)
# except Exception as e:
# print("[Voice Changer] ", e)
def get_onnx(self):
try:
info = self.voiceChangerManager.export2onnx()
json_compatible_item_data = jsonable_encoder(info)
return JSONResponse(content=json_compatible_item_data)
except Exception as e:
print("[Voice Changer] ", e)
def post_merge_models(self, request: str = Form(...)):
try:
print(request)
info = self.voiceChangerManager.merge_models(request)
json_compatible_item_data = jsonable_encoder(info)
return JSONResponse(content=json_compatible_item_data)
except Exception as e:
print("[Voice Changer] ", e)
def post_update_model_default(self):
try:
info = self.voiceChangerManager.update_model_default()
json_compatible_item_data = jsonable_encoder(info)
return JSONResponse(content=json_compatible_item_data)
except Exception as e:
print("[Voice Changer] ", e)
def post_update_model_info(self, newData: str = Form(...)):
try:
info = self.voiceChangerManager.update_model_info(newData)
json_compatible_item_data = jsonable_encoder(info)
return JSONResponse(content=json_compatible_item_data)
except Exception as e:
print("[Voice Changer] ", e)
def post_upload_model_assets(self, params: str = Form(...)):
try:
info = self.voiceChangerManager.upload_model_assets(params)
json_compatible_item_data = jsonable_encoder(info)
return JSONResponse(content=json_compatible_item_data)
except Exception as e:
print("[Voice Changer] ", e)

View File

@ -1,14 +0,0 @@
from fastapi.responses import FileResponse
import os
def mod_get_model(modelFile: str):
modelPath = os.path.join("MMVC_Trainer/logs", modelFile)
return FileResponse(path=modelPath)
def mod_delete_model(modelFile: str):
modelPath = os.path.join("MMVC_Trainer/logs", modelFile)
os.remove(modelPath)
return {"Model deleted": f"{modelFile}"}

View File

@ -1,23 +0,0 @@
from fastapi.responses import JSONResponse
from fastapi.encoders import jsonable_encoder
from restapi.utils.files import get_file_list
import os
def mod_get_models():
gModels = get_file_list(f'MMVC_Trainer/logs/G*.pth')
dModels = get_file_list(f'MMVC_Trainer/logs/D*.pth')
configs = get_file_list(f'MMVC_Trainer/logs/config.json')
models = []
models.extend(gModels)
models.extend(dModels)
models.extend(configs)
models = [ os.path.basename(x) for x in models]
models = sorted(models)
data = {
"models":models
}
json_compatible_item_data = jsonable_encoder(data)
return JSONResponse(content=json_compatible_item_data)

View File

@ -1,26 +0,0 @@
from fastapi.responses import JSONResponse
from fastapi.encoders import jsonable_encoder
import os
MULTI_SPEAKER_SETTING_PATH = "MMVC_Trainer/dataset/multi_speaker_correspondence.txt"
def mod_get_multi_speaker_setting():
data = {}
if os.path.isfile(MULTI_SPEAKER_SETTING_PATH) == False:
with open(MULTI_SPEAKER_SETTING_PATH, "w") as f:
f.write("")
f.flush()
f.close()
with open(MULTI_SPEAKER_SETTING_PATH, "r") as f:
setting = f.read()
data["multi_speaker_setting"] = setting
json_compatible_item_data = jsonable_encoder(data)
return JSONResponse(content=json_compatible_item_data)
def mod_post_multi_speaker_setting(setting:str):
with open(MULTI_SPEAKER_SETTING_PATH, "w") as f:
f.write(setting)
f.flush()
f.close()
return {"Write Multispeaker setting": f"{setting}"}

View File

@ -1,15 +0,0 @@
import shutil
from restapi.mods.Trainer_MultiSpeakerSetting import MULTI_SPEAKER_SETTING_PATH
def mod_delete_speaker(speaker:str):
shutil.rmtree(f"MMVC_Trainer/dataset/textful/{speaker}")
with open(MULTI_SPEAKER_SETTING_PATH, "r") as f:
setting = f.readlines()
filtered = filter(lambda x: x.startswith(f"{speaker}|")==False, setting)
with open(MULTI_SPEAKER_SETTING_PATH, "w") as f:
f.writelines(list(filtered))
f.flush()
f.close()
return {"Speaker deleted": f"{speaker}"}

View File

@ -1,28 +0,0 @@
from fastapi.responses import JSONResponse
from fastapi.encoders import jsonable_encoder
import os, base64
def mod_get_speaker_voice(speaker:str, voice:str):
wav_file = f'MMVC_Trainer/dataset/textful/{speaker}/wav/{voice}.wav'
text_file = f'MMVC_Trainer/dataset/textful/{speaker}/text/{voice}.txt'
readable_text_file = f'MMVC_Trainer/dataset/textful/{speaker}/readable_text/{voice}.txt'
data = {}
if os.path.exists(wav_file):
with open(wav_file, "rb") as f:
wav_data = f.read()
wav_data_base64 = base64.b64encode(wav_data).decode('utf-8')
data["wav"] = wav_data_base64
if os.path.exists(text_file):
with open(text_file, "r") as f:
text_data = f.read()
data["text"] = text_data
if os.path.exists(readable_text_file):
with open(readable_text_file, "r") as f:
text_data = f.read()
data["readable_text"] = text_data
json_compatible_item_data = jsonable_encoder(data)
return JSONResponse(content=json_compatible_item_data)

View File

@ -1,22 +0,0 @@
from fastapi.responses import JSONResponse
from fastapi.encoders import jsonable_encoder
from restapi.utils.files import get_file_list
import os
def mod_get_speaker_voices(speaker:str):
voices = get_file_list(f'MMVC_Trainer/dataset/textful/{speaker}/wav/*.wav')
texts = get_file_list(f'MMVC_Trainer/dataset/textful/{speaker}/text/*.txt')
readable_texts = get_file_list(f'MMVC_Trainer/dataset/textful/{speaker}/readable_text/*.txt')
items = voices
items.extend(texts)
items.extend(readable_texts)
items = [ os.path.splitext(os.path.basename(x))[0] for x in items]
items = sorted(set(items))
data = {
"voices":items
}
json_compatible_item_data = jsonable_encoder(data)
return JSONResponse(content=json_compatible_item_data)

View File

@ -1,15 +0,0 @@
from fastapi.responses import JSONResponse
from fastapi.encoders import jsonable_encoder
from restapi.utils.files import get_dir_list
import os
# CreateはFileUploaderで実装。
def mod_get_speakers():
os.makedirs("MMVC_Trainer/dataset/textful", exist_ok=True)
speakers = get_dir_list("MMVC_Trainer/dataset/textful/")
data = {
"speakers":sorted(speakers)
}
json_compatible_item_data = jsonable_encoder(data)
return JSONResponse(content=json_compatible_item_data)

View File

@ -1,176 +0,0 @@
import subprocess,os
from restapi.utils.files import get_file_list
from fastapi.responses import JSONResponse
from fastapi.encoders import jsonable_encoder
LOG_DIR = "info"
train_proc = None
SUCCESS = 0
ERROR = -1
### Submodule for Pre train
def sync_exec(cmd:str, log_path:str, cwd=None):
shortCmdStr = cmd[:20]
try:
with open(log_path, 'w') as log_file:
if cwd == None:
proc = subprocess.run(cmd, shell=True, text=True, stdout=log_file, stderr=log_file)
else:
proc = subprocess.run(cmd, shell=True, text=True, stdout=log_file, stderr=log_file, cwd=cwd)
print(f"{shortCmdStr} returncode:{proc.returncode}")
if proc.returncode != 0:
print(f"{shortCmdStr} exception:")
return (ERROR, f"returncode:{proc.returncode}")
except Exception as e:
print(f"{shortCmdStr} exception:", str(e))
return (ERROR, str(e))
return (SUCCESS, "success")
def sync_exec_with_stdout(cmd:str, log_path:str):
shortCmdStr = cmd[:20]
try:
with open(log_path, 'w') as log_file:
proc = subprocess.run(cmd, shell=True, text=True, stdout=subprocess.PIPE,
stderr=log_file, cwd="MMVC_Trainer")
print(f"STDOUT{shortCmdStr}",proc.stdout)
except Exception as e:
print(f"{shortCmdStr} exception:", str(e))
return (ERROR, str(e))
return (SUCCESS, proc.stdout)
def create_dataset():
cmd = "python3 create_dataset_jtalk.py -f train_config -s 24000 -m dataset/multi_speaker_correspondence.txt"
log_file = os.path.join(LOG_DIR, "log_create_dataset_jtalk.txt")
res = sync_exec(cmd, log_file, "MMVC_Trainer")
return res
def set_batch_size(batch:int):
cmd = "sed -i 's/\"batch_size\": [0-9]*/\"batch_size\": " + str(batch) + "/' MMVC_Trainer/configs/baseconfig.json"
log_file = os.path.join(LOG_DIR, "log_set_batch_size.txt")
res = sync_exec(cmd, log_file)
return res
def set_dummy_device_count():
cmd = 'sed -ie "s/torch.cuda.device_count()/1/" MMVC_Trainer/train_ms.py'
log_file = os.path.join(LOG_DIR, "log_set_dummy_device_count.txt")
res = sync_exec(cmd, log_file)
return res
### Submodule for Train
def exec_training(enable_finetuning:bool, GModel:str, DModel:str):
global train_proc
log_file = os.path.join(LOG_DIR, "training.txt")
# トレーニング開始確認(二重起動回避)
if train_proc != None:
status = train_proc.poll()
if status != None:
print("Training have ended.", status)
train_proc = None
else:
print("Training have stated.")
return (ERROR, "Training have started")
try:
with open(log_file, 'w') as log_file:
if enable_finetuning == True:
GModelPath = os.path.join("logs", GModel) # 実行時にcwdを指定しているのでフォルダはlogsでよい。
DModelPath = os.path.join("logs", DModel)
cmd = f'python3 train_ms.py -c configs/train_config.json -m ./ -fg {GModelPath} -fd {DModelPath}'
else:
cmd = 'python3 train_ms.py -c configs/train_config.json -m ./'
print("exec:",cmd)
train_proc = subprocess.Popen("exec "+cmd, shell=True, text=True, stdout=log_file, stderr=log_file, cwd="MMVC_Trainer")
print("Training stated")
print(f"returncode:{train_proc.returncode}")
except Exception as e:
print("start training exception:", str(e))
return (ERROR, str(e))
return (SUCCESS, "success")
def stop_training():
global train_proc
if train_proc == None:
print("Training have not stated.")
return (ERROR, "Training have not stated.")
status = train_proc.poll()
if status != None:
print("Training have already ended.", status)
train_proc = None
return (ERROR, "Training have already ended. " + status)
else:
train_proc.kill()
print("Training have stoped.")
return (SUCCESS, "success")
### Main
def mod_post_pre_training(batch:int):
res = set_batch_size(batch)
if res[0] == ERROR:
return {"result":"failed", "detail": f"Preprocess(set_batch_size) failed. {res[1]}"}
res = set_dummy_device_count()
if res[0] == ERROR:
return {"result":"failed", "detail": f"Preprocess(set_dummy_device_count) failed. {res[1]}"}
res = create_dataset()
if res[0] == ERROR:
return {"result":"failed", "detail": f"Preprocess failed(create_dataset). {res[1]}"}
return {"result":"success", "detail": f"Preprocess succeeded. {res[1]}"}
def mod_post_start_training(enable_finetuning:str, GModel:str, DModel:str):
print("START_TRAINING:::::::", enable_finetuning, GModel, DModel)
res = exec_training(enable_finetuning, GModel, DModel)
if res[0] == ERROR:
return {"result":"failed", "detail": f"Start training failed. {res[1]}"}
return {"result":"success", "detail": f"Start training succeeded. {res[1]}"}
def mod_post_stop_training():
res = stop_training()
if res[0] == ERROR:
return {"result":"failed", "detail": f"Stop training failed. {res[1]}"}
return {"result":"success", "detail": f"Stop training succeeded. {res[1]}"}
### DEBUG
def mod_get_related_files():
files = get_file_list(os.path.join(LOG_DIR,"*"))
files.extend([
"MMVC_Trainer/dataset/multi_speaker_correspondence.txt",
"MMVC_Trainer/train_ms.py",
])
files.extend(
get_file_list("MMVC_Trainer/configs/*")
)
res = []
for f in files:
size = os.path.getsize(f)
data = ""
if size < 1024*1024:
with open(f, "r") as input:
data = input.read()
res.append({
"name":f,
"size":size,
"data":data
})
json_compatible_item_data = jsonable_encoder(res)
return JSONResponse(content=json_compatible_item_data)
def mod_get_tail_training_log(num:int):
training_log_file = os.path.join(LOG_DIR, "training.txt")
res = sync_exec(f"cat {training_log_file} | sed -e 's/.*\r//' > /tmp/out","/dev/null")
cmd = f'tail -n {num} /tmp/out'
res = sync_exec_with_stdout(cmd, "/dev/null")
if res[0] == ERROR:
return {"result":"failed", "detail": f"Tail training log failed. {res[1]}"}
return {"result":"success", "detail":res[1]}

View File

@ -1,26 +0,0 @@
import os
import glob
# def get_file_list(top_dir):
# for root, dirs, files in os.walk(top_dir):
# for dir in dirs:
# dirPath = os.path.join(root, dir)
# print(f'dirPath = {dirPath}')
# for file in files:
# filePath = os.path.join(root, file)
# print(f'filePath = {filePath}')
def get_dir_list(top_dir):
dirlist = []
files = os.listdir(top_dir)
for filename in files:
if os.path.isdir(os.path.join(top_dir, filename)):
dirlist.append(filename)
return dirlist
def get_file_list(top_dir):
return glob.glob(top_dir)

View File

@ -0,0 +1,165 @@
import json
import os
from concurrent.futures import ThreadPoolExecutor
from typing import Any, Tuple
from const import RVCSampleMode, getSampleJsonAndModelIds
from data.ModelSample import ModelSamples, generateModelSample
from data.ModelSlot import RVCModelSlot, loadSlotInfo, saveSlotInfo
from voice_changer.RVC.ModelSlotGenerator import _setInfoByONNX, _setInfoByPytorch
from utils.downloader.Downloader import download, download_no_tqdm
def downloadInitialSamples(mode: RVCSampleMode, model_dir: str):
sampleJsonUrls, sampleModels = getSampleJsonAndModelIds(mode)
sampleJsons = _downloadSampleJsons(sampleJsonUrls)
if os.path.exists(model_dir):
print("[Voice Changer] model_dir is already exists. skil download samples.")
return
samples = _generateSampleList(sampleJsons)
slotIndex = list(range(len(sampleModels)))
_downloadSamples(samples, sampleModels, model_dir, slotIndex)
pass
def downloadSample(mode: RVCSampleMode, modelId: str, model_dir: str, slotIndex: int, params: Any):
sampleJsonUrls, _sampleModels = getSampleJsonAndModelIds(mode)
sampleJsons = _generateSampleJsons(sampleJsonUrls)
samples = _generateSampleList(sampleJsons)
_downloadSamples(samples, [(modelId, params)], model_dir, [slotIndex])
pass
def getSampleInfos(mode: RVCSampleMode):
sampleJsonUrls, _sampleModels = getSampleJsonAndModelIds(mode)
sampleJsons = _generateSampleJsons(sampleJsonUrls)
samples = _generateSampleList(sampleJsons)
return samples
def _downloadSampleJsons(sampleJsonUrls: list[str]):
sampleJsons = []
for url in sampleJsonUrls:
filename = os.path.basename(url)
download_no_tqdm({"url": url, "saveTo": filename, "position": 0})
sampleJsons.append(filename)
return sampleJsons
def _generateSampleJsons(sampleJsonUrls: list[str]):
sampleJsons = []
for url in sampleJsonUrls:
filename = os.path.basename(url)
sampleJsons.append(filename)
return sampleJsons
def _generateSampleList(sampleJsons: list[str]):
samples: list[ModelSamples] = []
for file in sampleJsons:
with open(file, "r", encoding="utf-8") as f:
jsonDict = json.load(f)
for vcType in jsonDict:
for sampleParams in jsonDict[vcType]:
sample = generateModelSample(sampleParams)
samples.append(sample)
return samples
def _downloadSamples(samples: list[ModelSamples], sampleModelIds: list[Tuple[str, Any]], model_dir: str, slotIndex: list[int]):
downloadParams = []
line_num = 0
for i, initSampleId in enumerate(sampleModelIds):
targetSampleId = initSampleId[0]
targetSampleParams = initSampleId[1]
tagetSlotIndex = slotIndex[i]
# 初期サンプルをサーチ
match = False
for sample in samples:
print("sample", sample)
if sample.id == targetSampleId:
match = True
break
if match is False:
print(f"[Voice Changer] initiail sample not found. {targetSampleId}")
continue
# 検出されたら、、、
slotDir = os.path.join(model_dir, str(tagetSlotIndex))
if sample.voiceChangerType == "RVC":
slotInfo: RVCModelSlot = RVCModelSlot()
os.makedirs(slotDir, exist_ok=True)
modelFilePath = os.path.join(
slotDir,
os.path.basename(sample.modelUrl),
)
downloadParams.append(
{
"url": sample.modelUrl,
"saveTo": modelFilePath,
"position": line_num,
}
)
slotInfo.modelFile = modelFilePath
line_num += 1
if targetSampleParams["useIndex"] is True and hasattr(sample, "indexUrl") and sample.indexUrl != "":
indexPath = os.path.join(
slotDir,
os.path.basename(sample.indexUrl),
)
downloadParams.append(
{
"url": sample.indexUrl,
"saveTo": indexPath,
"position": line_num,
}
)
slotInfo.indexFile = indexPath
line_num += 1
if hasattr(sample, "icon") and sample.icon != "":
iconPath = os.path.join(
slotDir,
os.path.basename(sample.icon),
)
downloadParams.append(
{
"url": sample.icon,
"saveTo": iconPath,
"position": line_num,
}
)
slotInfo.iconFile = iconPath
line_num += 1
slotInfo.sampleId = sample.id
slotInfo.credit = sample.credit
slotInfo.description = sample.description
slotInfo.name = sample.name
slotInfo.termsOfUseUrl = sample.termsOfUseUrl
slotInfo.defaultTune = 0
slotInfo.defaultIndexRatio = 1
slotInfo.defaultProtect = 0.5
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
saveSlotInfo(model_dir, tagetSlotIndex, slotInfo)
# ダウンロード
print("[Voice Changer] Downloading model files...")
with ThreadPoolExecutor() as pool:
pool.map(download, downloadParams)
# メタデータ作成
print("[Voice Changer] Generating metadata...")
for targetSlotIndex in slotIndex:
slotInfo = loadSlotInfo(model_dir, targetSlotIndex)
if slotInfo.voiceChangerType == "RVC":
if slotInfo.isONNX:
_setInfoByONNX(slotInfo)
else:
_setInfoByPytorch(slotInfo)
saveSlotInfo(model_dir, targetSlotIndex, slotInfo)

View File

@ -0,0 +1,64 @@
import os
from concurrent.futures import ThreadPoolExecutor
from utils.downloader.Downloader import download
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
from Exceptions import WeightDownladException
def downloadWeight(voiceChangerParams: VoiceChangerParams):
hubert_base = voiceChangerParams.hubert_base
hubert_base_jp = voiceChangerParams.hubert_base_jp
hubert_soft = voiceChangerParams.hubert_soft
nsf_hifigan = voiceChangerParams.nsf_hifigan
# file exists check (currently only for rvc)
downloadParams = []
if os.path.exists(hubert_base) is False:
downloadParams.append(
{
"url": "https://huggingface.co/ddPn08/rvc-webui-models/resolve/main/embeddings/hubert_base.pt",
"saveTo": hubert_base,
"position": 0,
}
)
if os.path.exists(hubert_base_jp) is False:
downloadParams.append(
{
"url": "https://huggingface.co/rinna/japanese-hubert-base/resolve/main/fairseq/model.pt",
"saveTo": hubert_base_jp,
"position": 1,
}
)
if os.path.exists(hubert_soft) is False:
downloadParams.append(
{
"url": "https://huggingface.co/wok000/weights/resolve/main/ddsp-svc30/embedder/hubert-soft-0d54a1f4.pt",
"saveTo": hubert_soft,
"position": 2,
}
)
if os.path.exists(nsf_hifigan) is False:
downloadParams.append(
{
"url": "https://huggingface.co/wok000/weights/resolve/main/ddsp-svc30/nsf_hifigan_20221211/model.bin",
"saveTo": nsf_hifigan,
"position": 3,
}
)
nsf_hifigan_config = os.path.join(os.path.dirname(nsf_hifigan), "config.json")
if os.path.exists(nsf_hifigan_config) is False:
downloadParams.append(
{
"url": "https://huggingface.co/wok000/weights/raw/main/ddsp-svc30/nsf_hifigan_20221211/config.json",
"saveTo": nsf_hifigan_config,
"position": 4,
}
)
with ThreadPoolExecutor() as pool:
pool.map(download, downloadParams)
if os.path.exists(hubert_base) is False or os.path.exists(hubert_base_jp) is False or os.path.exists(hubert_soft) is False or os.path.exists(nsf_hifigan) is False or os.path.exists(nsf_hifigan_config) is False:
raise WeightDownladException()

View File

@ -0,0 +1,134 @@
import time
import numpy as np
import librosa
import sounddevice as sd
from voice_changer.Local.AudioDeviceList import ServerAudioDevice
from voice_changer.VoiceChanger import VoiceChanger
from voice_changer.utils.Timer import Timer
class ServerDevice:
def __init__(self):
self.voiceChanger: VoiceChanger | None = None
pass
def audio_callback(self, indata: np.ndarray, outdata: np.ndarray, frames, times, status):
if self.voiceChanger is None:
print("[Voice Changer] voiceChanger is None")
return
try:
indata = indata * self.voiceChanger.settings.serverInputAudioGain
with Timer("all_inference_time") as t:
unpackedData = librosa.to_mono(indata.T) * 32768.0
out_wav, times = self.voiceChanger.on_request(unpackedData)
outputChunnels = outdata.shape[1]
outdata[:] = np.repeat(out_wav, outputChunnels).reshape(-1, outputChunnels) / 32768.0
outdata[:] = outdata * self.voiceChanger.settings.serverOutputAudioGain
all_inference_time = t.secs
performance = [all_inference_time] + times
if self.voiceChanger.emitTo is not None:
self.voiceChanger.emitTo(performance)
self.voiceChanger.settings.performance = [round(x * 1000) for x in performance]
except Exception as e:
print("[Voice Changer] ex:", e)
def getServerAudioDevice(self, audioDeviceList: list[ServerAudioDevice], index: int):
serverAudioDevice = [x for x in audioDeviceList if x.index == index]
if len(serverAudioDevice) > 0:
return serverAudioDevice[0]
else:
return None
def serverLocal(self, _vc: VoiceChanger):
self.voiceChanger = _vc
vc = self.voiceChanger
currentInputDeviceId = -1
currentModelSamplingRate = -1
currentOutputDeviceId = -1
currentInputChunkNum = -1
while True:
if vc.settings.serverAudioStated == 0 or vc.settings.serverInputDeviceId == -1 or vc is None:
vc.settings.inputSampleRate = 48000
time.sleep(2)
else:
sd._terminate()
sd._initialize()
sd.default.device[0] = vc.settings.serverInputDeviceId
currentInputDeviceId = vc.settings.serverInputDeviceId
sd.default.device[1] = vc.settings.serverOutputDeviceId
currentOutputDeviceId = vc.settings.serverOutputDeviceId
currentInputChannelNum = vc.settings.serverAudioInputDevices
serverInputAudioDevice = self.getServerAudioDevice(vc.settings.serverAudioInputDevices, currentInputDeviceId)
serverOutputAudioDevice = self.getServerAudioDevice(vc.settings.serverAudioOutputDevices, currentOutputDeviceId)
print(serverInputAudioDevice, serverOutputAudioDevice)
if serverInputAudioDevice is None or serverOutputAudioDevice is None:
time.sleep(2)
print("serverInputAudioDevice or serverOutputAudioDevice is None")
continue
currentInputChannelNum = serverInputAudioDevice.maxInputChannels
currentOutputChannelNum = serverOutputAudioDevice.maxOutputChannels
currentInputChunkNum = vc.settings.serverReadChunkSize
block_frame = currentInputChunkNum * 128
# sample rate precheck(alsa cannot use 40000?)
try:
currentModelSamplingRate = self.voiceChanger.voiceChangerModel.get_processing_sampling_rate()
except Exception as e:
print("[Voice Changer] ex: get_processing_sampling_rate", e)
continue
try:
with sd.Stream(
callback=self.audio_callback,
blocksize=block_frame,
samplerate=currentModelSamplingRate,
dtype="float32",
channels=[currentInputChannelNum, currentOutputChannelNum],
):
pass
vc.settings.serverInputAudioSampleRate = currentModelSamplingRate
vc.settings.inputSampleRate = currentModelSamplingRate
print(f"[Voice Changer] sample rate {vc.settings.serverInputAudioSampleRate}")
except Exception as e:
print(
"[Voice Changer] ex: fallback to device default samplerate",
e,
)
vc.settings.serverInputAudioSampleRate = serverInputAudioDevice.default_samplerate
vc.settings.inputSampleRate = vc.settings.serverInputAudioSampleRate
# main loop
try:
with sd.Stream(
callback=self.audio_callback,
blocksize=block_frame,
samplerate=vc.settings.serverInputAudioSampleRate,
dtype="float32",
channels=[currentInputChannelNum, currentOutputChannelNum],
):
while vc.settings.serverAudioStated == 1 and currentInputDeviceId == vc.settings.serverInputDeviceId and currentOutputDeviceId == vc.settings.serverOutputDeviceId and currentModelSamplingRate == self.voiceChanger.voiceChangerModel.get_processing_sampling_rate() and currentInputChunkNum == vc.settings.serverReadChunkSize:
time.sleep(2)
print(
"[Voice Changer] server audio",
vc.settings.performance,
)
print(
"[Voice Changer] info:",
vc.settings.serverAudioStated,
currentInputDeviceId,
currentOutputDeviceId,
vc.settings.serverInputAudioSampleRate,
currentInputChunkNum,
)
except Exception as e:
print("[Voice Changer] ex:", e)
time.sleep(2)

View File

@ -3,26 +3,26 @@ from const import EnumInferenceTypes, EnumEmbedderTypes
from dataclasses import dataclass
@dataclass
class ModelSlot:
modelFile: str = ""
indexFile: str = ""
defaultTune: int = 0
defaultIndexRatio: int = 1
defaultProtect: float = 0.5
isONNX: bool = False
modelType: str = EnumInferenceTypes.pyTorchRVC.value
samplingRate: int = -1
f0: bool = True
embChannels: int = 256
embOutputLayer: int = 9
useFinalProj: bool = True
deprecated: bool = False
embedder: str = EnumEmbedderTypes.hubert.value
# @dataclass
# class ModelSlot:
# modelFile: str = ""
# indexFile: str = ""
# defaultTune: int = 0
# defaultIndexRatio: int = 1
# defaultProtect: float = 0.5
# isONNX: bool = False
# modelType: str = EnumInferenceTypes.pyTorchRVC.value
# samplingRate: int = -1
# f0: bool = True
# embChannels: int = 256
# embOutputLayer: int = 9
# useFinalProj: bool = True
# deprecated: bool = False
# embedder: str = EnumEmbedderTypes.hubert.value
name: str = ""
description: str = ""
credit: str = ""
termsOfUseUrl: str = ""
sampleId: str = ""
iconFile: str = ""
# name: str = ""
# description: str = ""
# credit: str = ""
# termsOfUseUrl: str = ""
# sampleId: str = ""
# iconFile: str = ""

View File

@ -1,9 +1,45 @@
from const import EnumEmbedderTypes, EnumInferenceTypes
from voice_changer.RVC.ModelSlot import ModelSlot
from const import UPLOAD_DIR, EnumEmbedderTypes, EnumInferenceTypes
import torch
import onnxruntime
import json
import os
import shutil
from data.ModelSlot import ModelSlot, RVCModelSlot, saveSlotInfo
def setSlotAsRVC(model_dir: str, slot: int, paramDict):
slotInfo: RVCModelSlot = RVCModelSlot()
slotDir = os.path.join(model_dir, str(slot))
os.makedirs(slotDir, exist_ok=True)
print("RVC SLot Load", slot, paramDict)
for f in paramDict["files"]:
srcPath = os.path.join(UPLOAD_DIR, f["name"])
dstPath = os.path.join(slotDir, f["name"])
if f["kind"] == "rvcModel":
slotInfo.modelFile = dstPath
slotInfo.name = os.path.splitext(f["name"])[0]
elif f["kind"] == "rvcIndex":
slotInfo.indexFile = dstPath
else:
print(f"[Voice Changer] unknown file kind {f['kind']}")
shutil.move(srcPath, dstPath)
slotInfo.defaultTune = 0
slotInfo.defaultIndexRatio = 1
slotInfo.defaultProtect = 0.5
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
if slotInfo.isONNX:
_setInfoByONNX(slotInfo)
else:
_setInfoByPytorch(slotInfo)
saveSlotInfo(model_dir, slot, slotInfo)
print("[Voice Changer] new model added:", slotInfo)
def _setInfoByPytorch(slot: ModelSlot):
@ -15,22 +51,14 @@ def _setInfoByPytorch(slot: ModelSlot):
slot.f0 = True if cpt["f0"] == 1 else False
version = cpt.get("version", "v1")
if version is None or version == "v1":
slot.modelType = (
EnumInferenceTypes.pyTorchRVC.value
if slot.f0
else EnumInferenceTypes.pyTorchRVCNono.value
)
slot.modelType = EnumInferenceTypes.pyTorchRVC.value if slot.f0 else EnumInferenceTypes.pyTorchRVCNono.value
slot.embChannels = 256
slot.embOutputLayer = 9
slot.useFinalProj = True
slot.embedder = EnumEmbedderTypes.hubert.value
print("[Voice Changer] Official Model(pyTorch) : v1")
else:
slot.modelType = (
EnumInferenceTypes.pyTorchRVCv2.value
if slot.f0
else EnumInferenceTypes.pyTorchRVCv2Nono.value
)
slot.modelType = EnumInferenceTypes.pyTorchRVCv2.value if slot.f0 else EnumInferenceTypes.pyTorchRVCv2Nono.value
slot.embChannels = 768
slot.embOutputLayer = 12
slot.useFinalProj = False
@ -40,37 +68,21 @@ def _setInfoByPytorch(slot: ModelSlot):
else:
# DDPN RVC
slot.f0 = True if cpt["f0"] == 1 else False
slot.modelType = (
EnumInferenceTypes.pyTorchWebUI.value
if slot.f0
else EnumInferenceTypes.pyTorchWebUINono.value
)
slot.modelType = EnumInferenceTypes.pyTorchWebUI.value if slot.f0 else EnumInferenceTypes.pyTorchWebUINono.value
slot.embChannels = cpt["config"][17]
slot.embOutputLayer = (
cpt["embedder_output_layer"] if "embedder_output_layer" in cpt else 9
)
slot.embOutputLayer = cpt["embedder_output_layer"] if "embedder_output_layer" in cpt else 9
if slot.embChannels == 256:
slot.useFinalProj = True
else:
slot.useFinalProj = False
# DDPNモデルの情報を表示
if (
slot.embChannels == 256
and slot.embOutputLayer == 9
and slot.useFinalProj is True
):
if slot.embChannels == 256 and slot.embOutputLayer == 9 and slot.useFinalProj is True:
print("[Voice Changer] DDPN Model(pyTorch) : Official v1 like")
elif (
slot.embChannels == 768
and slot.embOutputLayer == 12
and slot.useFinalProj is False
):
elif slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False:
print("[Voice Changer] DDPN Model(pyTorch): Official v2 like")
else:
print(
f"[Voice Changer] DDPN Model(pyTorch): ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}"
)
print(f"[Voice Changer] DDPN Model(pyTorch): ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}")
slot.embedder = cpt["embedder_name"]
if slot.embedder.endswith("768"):
@ -91,9 +103,8 @@ def _setInfoByPytorch(slot: ModelSlot):
def _setInfoByONNX(slot: ModelSlot):
tmp_onnx_session = onnxruntime.InferenceSession(
slot.modelFile, providers=["CPUExecutionProvider"]
)
print("......................................_setInfoByONNX")
tmp_onnx_session = onnxruntime.InferenceSession(slot.modelFile, providers=["CPUExecutionProvider"])
modelmeta = tmp_onnx_session.get_modelmeta()
try:
metadata = json.loads(modelmeta.custom_metadata_map["metadata"])
@ -101,16 +112,8 @@ def _setInfoByONNX(slot: ModelSlot):
# slot.modelType = metadata["modelType"]
slot.embChannels = metadata["embChannels"]
slot.embOutputLayer = (
metadata["embOutputLayer"] if "embOutputLayer" in metadata else 9
)
slot.useFinalProj = (
metadata["useFinalProj"]
if "useFinalProj" in metadata
else True
if slot.embChannels == 256
else False
)
slot.embOutputLayer = metadata["embOutputLayer"] if "embOutputLayer" in metadata else 9
slot.useFinalProj = metadata["useFinalProj"] if "useFinalProj" in metadata else True if slot.embChannels == 256 else False
if slot.embChannels == 256:
slot.useFinalProj = True
@ -118,22 +121,12 @@ def _setInfoByONNX(slot: ModelSlot):
slot.useFinalProj = False
# ONNXモデルの情報を表示
if (
slot.embChannels == 256
and slot.embOutputLayer == 9
and slot.useFinalProj is True
):
if slot.embChannels == 256 and slot.embOutputLayer == 9 and slot.useFinalProj is True:
print("[Voice Changer] ONNX Model: Official v1 like")
elif (
slot.embChannels == 768
and slot.embOutputLayer == 12
and slot.useFinalProj is False
):
elif slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False:
print("[Voice Changer] ONNX Model: Official v2 like")
else:
print(
f"[Voice Changer] ONNX Model: ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}"
)
print(f"[Voice Changer] ONNX Model: ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}")
if "embedder" not in metadata:
slot.embedder = EnumEmbedderTypes.hubert.value
@ -149,11 +142,9 @@ def _setInfoByONNX(slot: ModelSlot):
# raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder")
slot.f0 = metadata["f0"]
slot.modelType = (
EnumInferenceTypes.onnxRVC.value
if slot.f0
else EnumInferenceTypes.onnxRVCNono.value
)
print("slot.modelType1", slot.modelType)
slot.modelType = EnumInferenceTypes.onnxRVC.value if slot.f0 else EnumInferenceTypes.onnxRVCNono.value
print("slot.modelType2", slot.modelType)
slot.samplingRate = metadata["samplingRate"]
slot.deprecated = False

View File

@ -1,14 +1,12 @@
import sys
import os
from dataclasses import asdict
from dataclasses import dataclass, asdict
from typing import cast
import numpy as np
import torch
import torchaudio
from ModelSample import getModelSamples
from voice_changer.RVC.ModelSlot import ModelSlot
from voice_changer.RVC.SampleDownloader import downloadModelFiles
from data.ModelSlot import loadSlotInfo
from voice_changer.RVC.RVCSlotInfo import RVCSlotInfo
# avoiding parse arg error in RVC
sys.argv = ["MMVCServerSIO.py"]
@ -31,7 +29,6 @@ from voice_changer.RVC.ModelSlotGenerator import (
)
from voice_changer.RVC.RVCSettings import RVCSettings
from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager
from voice_changer.utils.LoadModelParams import LoadModelParams
from voice_changer.utils.VoiceChangerModel import AudioInOut
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
from voice_changer.RVC.onnxExporter.export2onnx import export2onnx
@ -40,147 +37,104 @@ from voice_changer.RVC.pipeline.PipelineGenerator import createPipeline
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
from voice_changer.RVC.pipeline.Pipeline import Pipeline
from Exceptions import DeviceCannotSupportHalfPrecisionException, NoModeLoadedException
from Exceptions import DeviceCannotSupportHalfPrecisionException
from const import (
RVC_MODEL_DIRNAME,
UPLOAD_DIR,
getRVCSampleJsonAndModelIds,
)
import shutil
import json
class RVC:
initialLoad: bool = True
settings: RVCSettings = RVCSettings()
# initialLoad: bool = True
# currentSlot: int = 0
# needSwitch: bool = False
pipeline: Pipeline | None = None
deviceManager = DeviceManager.get_instance()
audio_buffer: AudioInOut | None = None
prevVol: float = 0
params: VoiceChangerParams
currentSlot: int = 0
needSwitch: bool = False
def __init__(self, params: VoiceChangerParams):
self.pitchExtractor = PitchExtractorManager.getPitchExtractor(
self.settings.f0Detector
)
self.params = params
def __init__(self, slotIndex: int, params: VoiceChangerParams):
self.params: VoiceChangerParams = params
self.settings: RVCSettings = RVCSettings()
self.deviceManager = DeviceManager.get_instance()
EmbedderManager.initialize(params)
self.loadSlots()
print("[Voice Changer] RVC initialization: ", params)
# サンプルカタログ作成
sampleJsons: list[str] = []
sampleJsonUrls, _sampleModels = getRVCSampleJsonAndModelIds(
params.rvc_sample_mode
)
for url in sampleJsonUrls:
filename = os.path.basename(url)
sampleJsons.append(filename)
sampleModels = getModelSamples(sampleJsons, "RVC")
if sampleModels is not None:
self.settings.sampleModels = sampleModels
self.pipeline: Pipeline | None = None
self.pitchExtractor = PitchExtractorManager.getPitchExtractor(self.settings.f0Detector)
# 起動時にスロットにモデルがある場合はロードしておく
if len(self.settings.modelSlots) > 0:
for i, slot in enumerate(self.settings.modelSlots):
if len(slot.modelFile) > 0:
self.prepareModel(i)
self.settings.modelSlotIndex = i
self.switchModel(self.settings.modelSlotIndex)
self.initialLoad = False
break
self.audio_buffer: AudioInOut | None = None
print("[Voice Changer] RVC Slot initialization. global params:", params)
self.slotIndex = slotIndex
self.slotInfo: RVCSlotInfo = loadSlotInfo(params.model_dir, self.slotIndex)
self.prevVol = 0.0
print("[Voice Changer] RVC Slot initialization. slot info:", self.slotInfo)
def getSampleInfo(self, id: str):
sampleInfos = list(filter(lambda x: x.id == id, self.settings.sampleModels))
if len(sampleInfos) > 0:
return sampleInfos[0]
else:
None
# def loadModel(self, props: LoadModelParams):
# target_slot_idx = props.slot
# params = props.params
# slotInfo: ModelSlot = ModelSlot()
def moveToModelDir(self, file: str, dstDir: str):
dst = os.path.join(dstDir, os.path.basename(file))
if os.path.exists(dst):
os.remove(dst)
shutil.move(file, dst)
return dst
# print("loadModel", params)
# # サンプルが指定されたときはダウンロードしてメタデータをでっちあげる
# if len(params["sampleId"]) > 0:
# sampleId = params["sampleId"]
# sampleInfo = self.getSampleInfo(sampleId)
# useIndex = params["rvcIndexDownload"]
def loadModel(self, props: LoadModelParams):
target_slot_idx = props.slot
params = props.params
slotInfo: ModelSlot = ModelSlot()
# if sampleInfo is None:
# print("[Voice Changer] sampleInfo is None")
# return
# modelPath, indexPath, iconPath = downloadModelFiles(sampleInfo, useIndex)
# slotInfo.modelFile = modelPath
# if indexPath is not None:
# slotInfo.indexFile = indexPath
# if iconPath is not None:
# slotInfo.iconFile = iconPath
print("loadModel", params)
# サンプルが指定されたときはダウンロードしてメタデータをでっちあげる
if len(params["sampleId"]) > 0:
sampleId = params["sampleId"]
sampleInfo = self.getSampleInfo(sampleId)
useIndex = params["rvcIndexDownload"]
# slotInfo.sampleId = sampleInfo.id
# slotInfo.credit = sampleInfo.credit
# slotInfo.description = sampleInfo.description
# slotInfo.name = sampleInfo.name
# slotInfo.termsOfUseUrl = sampleInfo.termsOfUseUrl
if sampleInfo is None:
print("[Voice Changer] sampleInfo is None")
return
modelPath, indexPath, iconPath = downloadModelFiles(sampleInfo, useIndex)
slotInfo.modelFile = modelPath
if indexPath is not None:
slotInfo.indexFile = indexPath
if iconPath is not None:
slotInfo.iconFile = iconPath
# # slotInfo.samplingRate = sampleInfo.sampleRate
# # slotInfo.modelType = sampleInfo.modelType
# # slotInfo.f0 = sampleInfo.f0
# else:
# slotInfo.modelFile = params["files"]["rvcModel"]
# slotInfo.indexFile = params["files"]["rvcIndex"] if "rvcIndex" in params["files"] else None
slotInfo.sampleId = sampleInfo.id
slotInfo.credit = sampleInfo.credit
slotInfo.description = sampleInfo.description
slotInfo.name = sampleInfo.name
slotInfo.termsOfUseUrl = sampleInfo.termsOfUseUrl
# slotInfo.defaultTune = params["defaultTune"]
# slotInfo.defaultIndexRatio = params["defaultIndexRatio"]
# slotInfo.defaultProtect = params["defaultProtect"]
# slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
# slotInfo.samplingRate = sampleInfo.sampleRate
# slotInfo.modelType = sampleInfo.modelType
# slotInfo.f0 = sampleInfo.f0
else:
slotInfo.modelFile = params["files"]["rvcModel"]
slotInfo.indexFile = (
params["files"]["rvcIndex"] if "rvcIndex" in params["files"] else None
)
# if slotInfo.isONNX:
# _setInfoByONNX(slotInfo)
# else:
# _setInfoByPytorch(slotInfo)
slotInfo.defaultTune = params["defaultTune"]
slotInfo.defaultIndexRatio = params["defaultIndexRatio"]
slotInfo.defaultProtect = params["defaultProtect"]
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
# # メタデータを見て、永続化モデルフォルダに移動させる
# # その際に、メタデータのファイル格納場所も書き換える
# slotDir = os.path.join(self.params.model_dir, RVC_MODEL_DIRNAME, str(target_slot_idx))
# os.makedirs(slotDir, exist_ok=True)
# slotInfo.modelFile = self.moveToModelDir(slotInfo.modelFile, slotDir)
# if slotInfo.indexFile is not None and len(slotInfo.indexFile) > 0:
# slotInfo.indexFile = self.moveToModelDir(slotInfo.indexFile, slotDir)
# if slotInfo.iconFile is not None and len(slotInfo.iconFile) > 0:
# slotInfo.iconFile = self.moveToModelDir(slotInfo.iconFile, slotDir)
# json.dump(asdict(slotInfo), open(os.path.join(slotDir, "params.json"), "w"))
# self.loadSlots()
if slotInfo.isONNX:
_setInfoByONNX(slotInfo)
else:
_setInfoByPytorch(slotInfo)
# # 初回のみロード(起動時にスロットにモデルがあった場合はinitialLoadはFalseになっている)
# if self.initialLoad:
# self.prepareModel(target_slot_idx)
# self.settings.modelSlotIndex = target_slot_idx
# self.switchModel(self.settings.modelSlotIndex)
# self.initialLoad = False
# elif target_slot_idx == self.currentSlot:
# self.prepareModel(target_slot_idx)
# メタデータを見て、永続化モデルフォルダに移動させる
# その際に、メタデータのファイル格納場所も書き換える
slotDir = os.path.join(
self.params.model_dir, RVC_MODEL_DIRNAME, str(target_slot_idx)
)
os.makedirs(slotDir, exist_ok=True)
slotInfo.modelFile = self.moveToModelDir(slotInfo.modelFile, slotDir)
if slotInfo.indexFile is not None and len(slotInfo.indexFile) > 0:
slotInfo.indexFile = self.moveToModelDir(slotInfo.indexFile, slotDir)
if slotInfo.iconFile is not None and len(slotInfo.iconFile) > 0:
slotInfo.iconFile = self.moveToModelDir(slotInfo.iconFile, slotDir)
json.dump(asdict(slotInfo), open(os.path.join(slotDir, "params.json"), "w"))
self.loadSlots()
# 初回のみロード(起動時にスロットにモデルがあった場合はinitialLoadはFalseになっている)
if self.initialLoad:
self.prepareModel(target_slot_idx)
self.settings.modelSlotIndex = target_slot_idx
self.switchModel(self.settings.modelSlotIndex)
self.initialLoad = False
elif target_slot_idx == self.currentSlot:
self.prepareModel(target_slot_idx)
return self.get_info()
# return self.get_info()
def loadSlots(self):
dirname = os.path.join(self.params.model_dir, RVC_MODEL_DIRNAME)
@ -189,9 +143,7 @@ class RVC:
modelSlots: list[ModelSlot] = []
for slot_idx in range(len(self.settings.modelSlots)):
slotDir = os.path.join(
self.params.model_dir, RVC_MODEL_DIRNAME, str(slot_idx)
)
slotDir = os.path.join(self.params.model_dir, RVC_MODEL_DIRNAME, str(slot_idx))
jsonDict = os.path.join(slotDir, "params.json")
if os.path.exists(jsonDict):
jsonDict = json.load(open(os.path.join(slotDir, "params.json")))
@ -205,76 +157,56 @@ class RVC:
if key in self.settings.intData:
# 設定前処理
val = cast(int, val)
if key == "modelSlotIndex":
if val < 0:
return True
val = val % 1000 # Quick hack for same slot is selected
if (
self.settings.modelSlots[val].modelFile is None
or self.settings.modelSlots[val].modelFile == ""
):
print("[Voice Changer] slot does not have model.")
return True
self.prepareModel(val)
# 設定
setattr(self.settings, key, val)
if key == "gpu":
self.deviceManager.setForceTensor(False)
self.prepareModel(self.settings.modelSlotIndex)
self.prepareModel()
elif key in self.settings.floatData:
setattr(self.settings, key, float(val))
elif key in self.settings.strData:
setattr(self.settings, key, str(val))
if key == "f0Detector" and self.pipeline is not None:
pitchExtractor = PitchExtractorManager.getPitchExtractor(
self.settings.f0Detector
)
pitchExtractor = PitchExtractorManager.getPitchExtractor(self.settings.f0Detector)
self.pipeline.setPitchExtractor(pitchExtractor)
else:
return False
return True
def prepareModel(self, slot: int):
if slot < 0:
print("[Voice Changer] Prepare Model of slot skip:", slot)
return self.get_info()
modelSlot = self.settings.modelSlots[slot]
print("[Voice Changer] Prepare Model of slot:", slot)
def prepareModel(self):
print("[Voice Changer] Prepare Model of slot:", self.slotIndex)
# pipelineの生成
self.next_pipeline = createPipeline(
modelSlot, self.settings.gpu, self.settings.f0Detector
)
self.pipeline = createPipeline(self.slotInfo, self.settings.gpu, self.settings.f0Detector)
# その他の設定
self.next_trans = modelSlot.defaultTune
self.next_index_ratio = modelSlot.defaultIndexRatio
self.next_protect = modelSlot.defaultProtect
self.next_samplingRate = modelSlot.samplingRate
self.next_framework = "ONNX" if modelSlot.isONNX else "PyTorch"
# self.needSwitch = True
print("[Voice Changer] Prepare done.")
self.switchModel(slot)
# # その他の設定
# self.next_trans = modelSlot.defaultTune
# self.next_index_ratio = modelSlot.defaultIndexRatio
# self.next_protect = modelSlot.defaultProtect
# self.next_samplingRate = modelSlot.samplingRate
# self.next_framework = "ONNX" if modelSlot.isONNX else "PyTorch"
# # self.needSwitch = True
# print("[Voice Changer] Prepare done.")
# self.switchModel(slot)
return self.get_info()
def switchModel(self, slot: int):
print("[Voice Changer] Switching model..")
self.pipeline = self.next_pipeline
self.settings.tran = self.next_trans
self.settings.indexRatio = self.next_index_ratio
self.settings.protect = self.next_protect
self.settings.modelSamplingRate = self.next_samplingRate
self.settings.framework = self.next_framework
# def switchModel(self, slot: int):
# print("[Voice Changer] Switching model..")
# self.pipeline = self.next_pipeline
# self.settings.tran = self.next_trans
# self.settings.indexRatio = self.next_index_ratio
# self.settings.protect = self.next_protect
# self.settings.modelSamplingRate = self.next_samplingRate
# self.settings.framework = self.next_framework
# self.currentSlot = self.settings.modelSlotIndex # prepareModelから呼ばれるということはupdate_settingsの中で呼ばれるということなので、まだmodelSlotIndexは更新されていない
self.currentSlot = slot
print(
"[Voice Changer] Switching model..done",
)
# # self.currentSlot = self.settings.modelSlotIndex # prepareModelから呼ばれるということはupdate_settingsの中で呼ばれるということなので、まだmodelSlotIndexは更新されていない
# self.currentSlot = slot
# print(
# "[Voice Changer] Switching model..done",
# )
def get_info(self):
data = asdict(self.settings)
@ -293,9 +225,7 @@ class RVC:
crossfadeSize: int,
solaSearchFrame: int = 0,
):
newData = (
newData.astype(np.float32) / 32768.0
) # RVCのモデルのサンプリングレートで入ってきている。extraDataLength, Crossfade等も同じSRで処理(★1)
newData = newData.astype(np.float32) / 32768.0 # RVCのモデルのサンプリングレートで入ってきている。extraDataLength, Crossfade等も同じSRで処理(★1)
if self.audio_buffer is not None:
# 過去のデータに連結
@ -303,18 +233,14 @@ class RVC:
else:
self.audio_buffer = newData
convertSize = (
inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
)
convertSize = inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
if convertSize % 128 != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
convertSize = convertSize + (128 - (convertSize % 128))
# バッファがたまっていない場合はzeroで補う
if self.audio_buffer.shape[0] < convertSize:
self.audio_buffer = np.concatenate(
[np.zeros([convertSize]), self.audio_buffer]
)
self.audio_buffer = np.concatenate([np.zeros([convertSize]), self.audio_buffer])
convertOffset = -1 * convertSize
self.audio_buffer = self.audio_buffer[convertOffset:] # 変換対象の部分だけ抽出
@ -324,9 +250,7 @@ class RVC:
else:
device = torch.device("cpu")
audio_buffer = torch.from_numpy(self.audio_buffer).to(
device=device, dtype=torch.float32
)
audio_buffer = torch.from_numpy(self.audio_buffer).to(device=device, dtype=torch.float32)
# 出力部分だけ切り出して音量を確認。(TODO:段階的消音にする)
cropOffset = -1 * (inputSize + crossfadeSize)
@ -339,21 +263,14 @@ class RVC:
return (audio_buffer, convertSize, vol)
def inference(self, data):
if self.settings.modelSlotIndex < 0:
print(
"[Voice Changer] wait for loading model...",
self.settings.modelSlotIndex,
self.currentSlot,
)
raise NoModeLoadedException("model_common")
# if self.needSwitch:
# if self.settings.modelSlotIndex < 0:
# print(
# f"[Voice Changer] Switch model {self.currentSlot} -> {self.settings.modelSlotIndex}"
# "[Voice Changer] wait for loading model...",
# self.settings.modelSlotIndex,
# self.currentSlot,
# )
# self.switchModel()
# self.needSwitch = False
# raise NoModeLoadedException("model_common")
# half = self.deviceManager.halfPrecisionAvailable(self.settings.gpu)
half = self.pipeline.isHalf
audio = data[0]
@ -363,18 +280,16 @@ class RVC:
if vol < self.settings.silentThreshold:
return np.zeros(convertSize).astype(np.int16)
audio = torchaudio.functional.resample(
audio, self.settings.modelSamplingRate, 16000, rolloff=0.99
)
audio = torchaudio.functional.resample(audio, self.settings.modelSamplingRate, 16000, rolloff=0.99)
repeat = 3 if half else 1
repeat *= self.settings.rvcQuality # 0 or 3
sid = 0
f0_up_key = self.settings.tran
index_rate = self.settings.indexRatio
protect = self.settings.protect
if_f0 = 1 if self.settings.modelSlots[self.currentSlot].f0 else 0
embOutputLayer = self.settings.modelSlots[self.currentSlot].embOutputLayer
useFinalProj = self.settings.modelSlots[self.currentSlot].useFinalProj
if_f0 = 1 if self.slotInfo.f0 else 0
embOutputLayer = self.slotInfo.embOutputLayer
useFinalProj = self.slotInfo.useFinalProj
try:
audio_out = self.pipeline.exec(
sid,
@ -382,8 +297,7 @@ class RVC:
f0_up_key,
index_rate,
if_f0,
self.settings.extraConvertSize
/ self.settings.modelSamplingRate, # extaraDataSizeの秒数。RVCのモデルのサンプリングレートで処理(★1)。
self.settings.extraConvertSize / self.settings.modelSamplingRate, # extaraDataSizeの秒数。RVCのモデルのサンプリングレートで処理(★1)。
embOutputLayer,
useFinalProj,
repeat,
@ -393,9 +307,7 @@ class RVC:
return result
except DeviceCannotSupportHalfPrecisionException as e:
print(
"[Device Manager] Device cannot support half precision. Fallback to float...."
)
print("[Device Manager] Device cannot support half precision. Fallback to float....")
self.deviceManager.setForceTensor(True)
self.prepareModel(self.settings.modelSlotIndex)
raise e
@ -405,7 +317,7 @@ class RVC:
def __del__(self):
del self.pipeline
# print("---------- REMOVING ---------------")
print("---------- REMOVING ---------------")
remove_path = os.path.join("RVC")
sys.path = [x for x in sys.path if x.endswith(remove_path) is False]
@ -461,9 +373,7 @@ class RVC:
"sampleId": "",
"files": {"rvcModel": storeFile},
}
props: LoadModelParams = LoadModelParams(
slot=targetSlot, isHalf=True, params=params
)
props: LoadModelParams = LoadModelParams(slot=targetSlot, isHalf=True, params=params)
self.loadModel(props)
self.prepareModel(targetSlot)
self.settings.modelSlotIndex = targetSlot
@ -471,12 +381,8 @@ class RVC:
def update_model_default(self):
print("[Voice Changer] UPDATE MODEL DEFAULT!!")
slotDir = os.path.join(
self.params.model_dir, RVC_MODEL_DIRNAME, str(self.currentSlot)
)
params = json.load(
open(os.path.join(slotDir, "params.json"), "r", encoding="utf-8")
)
slotDir = os.path.join(self.params.model_dir, RVC_MODEL_DIRNAME, str(self.currentSlot))
params = json.load(open(os.path.join(slotDir, "params.json"), "r", encoding="utf-8"))
params["defaultTune"] = self.settings.tran
params["defaultIndexRatio"] = self.settings.indexRatio
params["defaultProtect"] = self.settings.protect
@ -488,14 +394,10 @@ class RVC:
print("[Voice Changer] UPDATE MODEL INFO", newData)
newDataDict = json.loads(newData)
try:
slotDir = os.path.join(
self.params.model_dir, RVC_MODEL_DIRNAME, str(newDataDict["slot"])
)
slotDir = os.path.join(self.params.model_dir, RVC_MODEL_DIRNAME, str(newDataDict["slot"]))
except Exception as e:
print("Exception::::", e)
params = json.load(
open(os.path.join(slotDir, "params.json"), "r", encoding="utf-8")
)
params = json.load(open(os.path.join(slotDir, "params.json"), "r", encoding="utf-8"))
params[newDataDict["key"]] = newDataDict["val"]
json.dump(params, open(os.path.join(slotDir, "params.json"), "w"))
self.loadSlots()
@ -504,9 +406,7 @@ class RVC:
print("[Voice Changer] UPLOAD ASSETS", params)
paramsDict = json.loads(params)
uploadPath = os.path.join(UPLOAD_DIR, paramsDict["file"])
storeDir = os.path.join(
self.params.model_dir, RVC_MODEL_DIRNAME, str(paramsDict["slot"])
)
storeDir = os.path.join(self.params.model_dir, RVC_MODEL_DIRNAME, str(paramsDict["slot"]))
storePath = os.path.join(
storeDir,
paramsDict["file"],

View File

@ -1,8 +1,4 @@
from dataclasses import dataclass, field
from ModelSample import RVCModelSample
from const import RVC_MAX_SLOT_NUM
from voice_changer.RVC.ModelSlot import ModelSlot
@dataclass
@ -17,11 +13,6 @@ class RVCSettings:
clusterInferRatio: float = 0.1
framework: str = "PyTorch" # PyTorch or ONNX
modelSlots: list[ModelSlot] = field(
default_factory=lambda: [ModelSlot() for _x in range(RVC_MAX_SLOT_NUM)]
)
sampleModels: list[RVCModelSample] = field(default_factory=lambda: [])
indexRatio: float = 0
protect: float = 0.5

View File

@ -0,0 +1,28 @@
from dataclasses import dataclass
from voice_changer.VoiceChanger import SlotInfo
@dataclass
class RVCSlotInfo(SlotInfo):
modelFile: str = ""
indexFile: str = ""
defaultTune: int = 0
defaultIndexRatio: float = 0
defaultProtect: float = 1
isONNX: bool = False
modelType: str = ""
samplingRate: int = 40000
f0: bool = True
embChannels: int = 256
embOutputLayer: int = 12
useFinalProj: bool = False
deprecated: bool = False
embedder: str = ""
name: str = ""
description: str = ""
credit: str = ""
termsOfUseUrl: str = ""
sampleId: str = ""
iconFile: str = ""

View File

@ -1,174 +1,52 @@
from concurrent.futures import ThreadPoolExecutor
from dataclasses import asdict
import os
from const import RVC_MODEL_DIRNAME, TMP_DIR
from Downloader import download, download_no_tqdm
from ModelSample import RVCModelSample, getModelSamples
import json
# from concurrent.futures import ThreadPoolExecutor
# from dataclasses import asdict
# import os
# from const import RVC_MODEL_DIRNAME, TMP_DIR
# from Downloader import download, download_no_tqdm
# from ModelSample import RVCModelSample, getModelSamples
# import json
from voice_changer.RVC.ModelSlot import ModelSlot
from voice_changer.RVC.ModelSlotGenerator import _setInfoByONNX, _setInfoByPytorch
# from voice_changer.RVC.ModelSlot import ModelSlot
# from voice_changer.RVC.ModelSlotGenerator import _setInfoByONNX, _setInfoByPytorch
def checkRvcModelExist(model_dir: str):
rvcModelDir = os.path.join(model_dir, RVC_MODEL_DIRNAME)
if not os.path.exists(rvcModelDir):
return False
return True
# def downloadModelFiles(sampleInfo: RVCModelSample, useIndex: bool = True):
# downloadParams = []
# modelPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.modelUrl))
# downloadParams.append(
# {
# "url": sampleInfo.modelUrl,
# "saveTo": modelPath,
# "position": 0,
# }
# )
def downloadInitialSampleModels(
sampleJsons: list[str], sampleModelIds: list[str], model_dir: str
):
sampleModels = getModelSamples(sampleJsons, "RVC")
if sampleModels is None:
return
# indexPath = None
# if useIndex is True and hasattr(sampleInfo, "indexUrl") and sampleInfo.indexUrl != "":
# print("[Voice Changer] Download sample with index.")
# indexPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.indexUrl))
# downloadParams.append(
# {
# "url": sampleInfo.indexUrl,
# "saveTo": indexPath,
# "position": 1,
# }
# )
downloadParams = []
slot_count = 0
line_num = 0
for initSampleId in sampleModelIds:
# 初期サンプルをサーチ
match = False
for sample in sampleModels:
if sample.id == initSampleId[0]:
match = True
break
if match is False:
print(f"[Voice Changer] initiail sample not found. {initSampleId[0]}")
continue
# iconPath = None
# if hasattr(sampleInfo, "icon") and sampleInfo.icon != "":
# iconPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.icon))
# downloadParams.append(
# {
# "url": sampleInfo.icon,
# "saveTo": iconPath,
# "position": 2,
# }
# )
# 検出されたら、、、
slotInfo: ModelSlot = ModelSlot()
# sampleParams: Any = {"files": {}}
slotDir = os.path.join(model_dir, RVC_MODEL_DIRNAME, str(slot_count))
os.makedirs(slotDir, exist_ok=True)
modelFilePath = os.path.join(
slotDir,
os.path.basename(sample.modelUrl),
)
downloadParams.append(
{
"url": sample.modelUrl,
"saveTo": modelFilePath,
"position": line_num,
}
)
slotInfo.modelFile = modelFilePath
line_num += 1
if (
initSampleId[1] is True
and hasattr(sample, "indexUrl")
and sample.indexUrl != ""
):
indexPath = os.path.join(
slotDir,
os.path.basename(sample.indexUrl),
)
downloadParams.append(
{
"url": sample.indexUrl,
"saveTo": indexPath,
"position": line_num,
}
)
slotInfo.indexFile = indexPath
line_num += 1
if hasattr(sample, "icon") and sample.icon != "":
iconPath = os.path.join(
slotDir,
os.path.basename(sample.icon),
)
downloadParams.append(
{
"url": sample.icon,
"saveTo": iconPath,
"position": line_num,
}
)
slotInfo.iconFile = iconPath
line_num += 1
slotInfo.sampleId = sample.id
slotInfo.credit = sample.credit
slotInfo.description = sample.description
slotInfo.name = sample.name
slotInfo.termsOfUseUrl = sample.termsOfUseUrl
slotInfo.defaultTune = 0
slotInfo.defaultIndexRatio = 1
slotInfo.defaultProtect = 0.5
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
# この時点ではまだファイルはダウンロードされていない
# if slotInfo.isONNX:
# _setInfoByONNX(slotInfo)
# else:
# _setInfoByPytorch(slotInfo)
json.dump(asdict(slotInfo), open(os.path.join(slotDir, "params.json"), "w"))
slot_count += 1
# ダウンロード
print("[Voice Changer] Downloading model files...")
with ThreadPoolExecutor() as pool:
pool.map(download, downloadParams)
# メタデータ作成
print("[Voice Changer] Generating metadata...")
for slotId in range(slot_count):
slotDir = os.path.join(model_dir, RVC_MODEL_DIRNAME, str(slotId))
jsonDict = json.load(open(os.path.join(slotDir, "params.json")))
slotInfo = ModelSlot(**jsonDict)
if slotInfo.isONNX:
_setInfoByONNX(slotInfo)
else:
_setInfoByPytorch(slotInfo)
json.dump(asdict(slotInfo), open(os.path.join(slotDir, "params.json"), "w"))
def downloadModelFiles(sampleInfo: RVCModelSample, useIndex: bool = True):
downloadParams = []
modelPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.modelUrl))
downloadParams.append(
{
"url": sampleInfo.modelUrl,
"saveTo": modelPath,
"position": 0,
}
)
indexPath = None
if (
useIndex is True
and hasattr(sampleInfo, "indexUrl")
and sampleInfo.indexUrl != ""
):
print("[Voice Changer] Download sample with index.")
indexPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.indexUrl))
downloadParams.append(
{
"url": sampleInfo.indexUrl,
"saveTo": indexPath,
"position": 1,
}
)
iconPath = None
if hasattr(sampleInfo, "icon") and sampleInfo.icon != "":
iconPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.icon))
downloadParams.append(
{
"url": sampleInfo.icon,
"saveTo": iconPath,
"position": 2,
}
)
print("[Voice Changer] Downloading model files...", end="")
with ThreadPoolExecutor() as pool:
pool.map(download_no_tqdm, downloadParams)
print("")
return modelPath, indexPath, iconPath
# print("[Voice Changer] Downloading model files...", end="")
# with ThreadPoolExecutor() as pool:
# pool.map(download_no_tqdm, downloadParams)
# print("")
# return modelPath, indexPath, iconPath

View File

@ -14,7 +14,7 @@ class RVCInferencer(Inferencer):
dev = DeviceManager.get_instance().getDevice(gpu)
isHalf = DeviceManager.get_instance().halfPrecisionAvailable(gpu)
print("LLLLLLLLLLLLLLLOOOOOOOOOOOOOOOOOOOOOOO", file)
cpt = torch.load(file, map_location="cpu")
model = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=isHalf)

View File

@ -4,7 +4,7 @@ import torch
from onnxsim import simplify
import onnx
from const import TMP_DIR, EnumInferenceTypes
from voice_changer.RVC.ModelSlot import ModelSlot
from data.ModelSlot import ModelSlot
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
from voice_changer.RVC.onnxExporter.SynthesizerTrnMs256NSFsid_ONNX import (
SynthesizerTrnMs256NSFsid_ONNX,
@ -30,9 +30,7 @@ def export2onnx(gpu: int, modelSlot: ModelSlot):
modelFile = modelSlot.modelFile
output_file = os.path.splitext(os.path.basename(modelFile))[0] + ".onnx"
output_file_simple = (
os.path.splitext(os.path.basename(modelFile))[0] + "_simple.onnx"
)
output_file_simple = os.path.splitext(os.path.basename(modelFile))[0] + "_simple.onnx"
output_path = os.path.join(TMP_DIR, output_file)
output_path_simple = os.path.join(TMP_DIR, output_file_simple)
metadata = {
@ -52,9 +50,7 @@ def export2onnx(gpu: int, modelSlot: ModelSlot):
if gpuMomory > 0:
_export2onnx(modelFile, output_path, output_path_simple, True, metadata)
else:
print(
"[Voice Changer] Warning!!! onnx export with float32. maybe size is doubled."
)
print("[Voice Changer] Warning!!! onnx export with float32. maybe size is doubled.")
_export2onnx(modelFile, output_path, output_path_simple, False, metadata)
return output_file_simple

View File

@ -1,8 +1,8 @@
import os
import traceback
import faiss
from voice_changer.RVC.RVCSlotInfo import RVCSlotInfo
from voice_changer.RVC.ModelSlot import ModelSlot
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager
from voice_changer.RVC.inferencer.InferencerManager import InferencerManager
@ -10,15 +10,13 @@ from voice_changer.RVC.pipeline.Pipeline import Pipeline
from voice_changer.RVC.pitchExtractor.PitchExtractorManager import PitchExtractorManager
def createPipeline(modelSlot: ModelSlot, gpu: int, f0Detector: str):
def createPipeline(slotInfo: RVCSlotInfo, gpu: int, f0Detector: str):
dev = DeviceManager.get_instance().getDevice(gpu)
half = DeviceManager.get_instance().halfPrecisionAvailable(gpu)
# Inferencer 生成
try:
inferencer = InferencerManager.getInferencer(
modelSlot.modelType, modelSlot.modelFile, gpu
)
inferencer = InferencerManager.getInferencer(slotInfo.modelType, slotInfo.modelFile, gpu)
except Exception as e:
print("[Voice Changer] exception! loading inferencer", e)
traceback.print_exc()
@ -26,7 +24,7 @@ def createPipeline(modelSlot: ModelSlot, gpu: int, f0Detector: str):
# Embedder 生成
try:
embedder = EmbedderManager.getEmbedder(
modelSlot.embedder,
slotInfo.embedder,
# emmbedderFilename,
half,
dev,
@ -39,14 +37,14 @@ def createPipeline(modelSlot: ModelSlot, gpu: int, f0Detector: str):
pitchExtractor = PitchExtractorManager.getPitchExtractor(f0Detector)
# index, feature
index = _loadIndex(modelSlot)
index = _loadIndex(slotInfo)
pipeline = Pipeline(
embedder,
inferencer,
pitchExtractor,
index,
modelSlot.samplingRate,
slotInfo.samplingRate,
dev,
half,
)
@ -54,21 +52,21 @@ def createPipeline(modelSlot: ModelSlot, gpu: int, f0Detector: str):
return pipeline
def _loadIndex(modelSlot: ModelSlot):
def _loadIndex(slotInfo: RVCSlotInfo):
# Indexのロード
print("[Voice Changer] Loading index...")
# ファイル指定がない場合はNone
if modelSlot.indexFile is None:
if slotInfo.indexFile is None:
print("[Voice Changer] Index is None, not used")
return None
# ファイル指定があってもファイルがない場合はNone
if os.path.exists(modelSlot.indexFile) is not True:
if os.path.exists(slotInfo.indexFile) is not True:
return None
try:
print("Try loading...", modelSlot.indexFile)
index = faiss.read_index(modelSlot.indexFile)
print("Try loading...", slotInfo.indexFile)
index = faiss.read_index(slotInfo.indexFile)
except:
print("[Voice Changer] load index failed. Use no index.")
traceback.print_exc()

View File

@ -28,7 +28,7 @@ class DioPitchExtractor(PitchExtractor):
f0_floor=f0_min,
f0_ceil=f0_max,
channels_in_octave=2,
frame_period=10,
frame_period=10, # 10ms, <= 160(window) / 16000(samplerate) * 1000(ms)
)
f0 = pyworld.stonemask(audio.astype(np.double), _f0, t, sr)
f0 = np.pad(f0.astype("float"), (start_frame, n_frames - len(f0) - start_frame))

View File

@ -27,7 +27,7 @@ class HarvestPitchExtractor(PitchExtractor):
audio.astype(np.double),
fs=sr,
f0_ceil=f0_max,
frame_period=10,
frame_period=10, # 10ms, <= 160(window) / 16000(samplerate) * 1000(ms)
)
f0 = pyworld.stonemask(audio.astype(np.double), f0, t, sr)
f0 = signal.medfilt(f0, 3)

View File

@ -1,21 +1,21 @@
from typing import Any, Union, cast
import socketio
from const import TMP_DIR, ModelType
from const import TMP_DIR, VoiceChangerType
import torch
import os
import traceback
import numpy as np
from dataclasses import dataclass, asdict, field
import resampy
from data.ModelSlot import loadSlotInfo
from voice_changer.IORecorder import IORecorder
from voice_changer.Local.AudioDeviceList import ServerAudioDevice, list_audio_device
from voice_changer.utils.LoadModelParams import LoadModelParams
from voice_changer.utils.Timer import Timer
from voice_changer.utils.VoiceChangerModel import VoiceChangerModel, AudioInOut
from voice_changer.utils.VoiceChangerModel import AudioInOut
from Exceptions import (
DeviceCannotSupportHalfPrecisionException,
DeviceChangingException,
@ -26,15 +26,22 @@ from Exceptions import (
VoiceChangerIsNotSelectedException,
)
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
import threading
import time
import sounddevice as sd
import librosa
# import threading
# import time
# import sounddevice as sd
# import librosa
import json
STREAM_INPUT_FILE = os.path.join(TMP_DIR, "in.wav")
STREAM_OUTPUT_FILE = os.path.join(TMP_DIR, "out.wav")
@dataclass
class SlotInfo:
voiceChangerType: VoiceChangerType | None = None
@dataclass
class VoiceChangerSettings:
inputSampleRate: int = 48000 # 48000 or 24000
@ -45,9 +52,7 @@ class VoiceChangerSettings:
recordIO: int = 0 # 0:off, 1:on
serverAudioInputDevices: list[ServerAudioDevice] = field(default_factory=lambda: [])
serverAudioOutputDevices: list[ServerAudioDevice] = field(
default_factory=lambda: []
)
serverAudioOutputDevices: list[ServerAudioDevice] = field(default_factory=lambda: [])
enableServerAudio: int = 0 # 0:off, 1:on
serverAudioStated: int = 0 # 0:off, 1:on
@ -93,160 +98,131 @@ class VoiceChangerSettings:
class VoiceChanger:
settings: VoiceChangerSettings = VoiceChangerSettings()
voiceChanger: VoiceChangerModel | None = None
ioRecorder: IORecorder
sola_buffer: AudioInOut
namespace: socketio.AsyncNamespace | None = None
# settings: VoiceChangerSettings = VoiceChangerSettings()
# voiceChangerModel: VoiceChangerModel | None = None
#
#
# namespace: socketio.AsyncNamespace | None = None
localPerformanceShowTime = 0.0
# localPerformanceShowTime = 0.0
emitTo = None
# emitTo = None
def audio_callback(
self, indata: np.ndarray, outdata: np.ndarray, frames, times, status
):
try:
indata = indata * self.settings.serverInputAudioGain
with Timer("all_inference_time") as t:
unpackedData = librosa.to_mono(indata.T) * 32768.0
out_wav, times = self.on_request(unpackedData)
outputChunnels = outdata.shape[1]
outdata[:] = (
np.repeat(out_wav, outputChunnels).reshape(-1, outputChunnels)
/ 32768.0
)
outdata[:] = outdata * self.settings.serverOutputAudioGain
all_inference_time = t.secs
performance = [all_inference_time] + times
if self.emitTo is not None:
self.emitTo(performance)
self.settings.performance = [round(x * 1000) for x in performance]
except Exception as e:
print("[Voice Changer] ex:", e)
# def audio_callback(self, indata: np.ndarray, outdata: np.ndarray, frames, times, status):
# try:
# indata = indata * self.settings.serverInputAudioGain
# with Timer("all_inference_time") as t:
# unpackedData = librosa.to_mono(indata.T) * 32768.0
# out_wav, times = self.on_request(unpackedData)
# outputChunnels = outdata.shape[1]
# outdata[:] = np.repeat(out_wav, outputChunnels).reshape(-1, outputChunnels) / 32768.0
# outdata[:] = outdata * self.settings.serverOutputAudioGain
# all_inference_time = t.secs
# performance = [all_inference_time] + times
# if self.emitTo is not None:
# self.emitTo(performance)
# self.settings.performance = [round(x * 1000) for x in performance]
# except Exception as e:
# print("[Voice Changer] ex:", e)
def getServerAudioDevice(
self, audioDeviceList: list[ServerAudioDevice], index: int
):
serverAudioDevice = [x for x in audioDeviceList if x.index == index]
if len(serverAudioDevice) > 0:
return serverAudioDevice[0]
else:
return None
# def getServerAudioDevice(self, audioDeviceList: list[ServerAudioDevice], index: int):
# serverAudioDevice = [x for x in audioDeviceList if x.index == index]
# if len(serverAudioDevice) > 0:
# return serverAudioDevice[0]
# else:
# return None
def serverLocal(self, _vc):
vc: VoiceChanger = _vc
# def serverLocal(self, _vc):
# vc: VoiceChanger = _vc
currentInputDeviceId = -1
currentModelSamplingRate = -1
currentOutputDeviceId = -1
currentInputChunkNum = -1
while True:
if (
vc.settings.serverAudioStated == 0
or vc.settings.serverInputDeviceId == -1
or vc.voiceChanger is None
):
vc.settings.inputSampleRate = 48000
time.sleep(2)
else:
sd._terminate()
sd._initialize()
# currentInputDeviceId = -1
# currentModelSamplingRate = -1
# currentOutputDeviceId = -1
# currentInputChunkNum = -1
# while True:
# if vc.settings.serverAudioStated == 0 or vc.settings.serverInputDeviceId == -1 or vc.voiceChanger is None:
# vc.settings.inputSampleRate = 48000
# time.sleep(2)
# else:
# sd._terminate()
# sd._initialize()
sd.default.device[0] = vc.settings.serverInputDeviceId
currentInputDeviceId = vc.settings.serverInputDeviceId
sd.default.device[1] = vc.settings.serverOutputDeviceId
currentOutputDeviceId = vc.settings.serverOutputDeviceId
# sd.default.device[0] = vc.settings.serverInputDeviceId
# currentInputDeviceId = vc.settings.serverInputDeviceId
# sd.default.device[1] = vc.settings.serverOutputDeviceId
# currentOutputDeviceId = vc.settings.serverOutputDeviceId
currentInputChannelNum = vc.settings.serverAudioInputDevices
# currentInputChannelNum = vc.settings.serverAudioInputDevices
serverInputAudioDevice = self.getServerAudioDevice(
vc.settings.serverAudioInputDevices, currentInputDeviceId
)
serverOutputAudioDevice = self.getServerAudioDevice(
vc.settings.serverAudioOutputDevices, currentOutputDeviceId
)
print(serverInputAudioDevice, serverOutputAudioDevice)
if serverInputAudioDevice is None or serverOutputAudioDevice is None:
time.sleep(2)
print("serverInputAudioDevice or serverOutputAudioDevice is None")
continue
# serverInputAudioDevice = self.getServerAudioDevice(vc.settings.serverAudioInputDevices, currentInputDeviceId)
# serverOutputAudioDevice = self.getServerAudioDevice(vc.settings.serverAudioOutputDevices, currentOutputDeviceId)
# print(serverInputAudioDevice, serverOutputAudioDevice)
# if serverInputAudioDevice is None or serverOutputAudioDevice is None:
# time.sleep(2)
# print("serverInputAudioDevice or serverOutputAudioDevice is None")
# continue
currentInputChannelNum = serverInputAudioDevice.maxInputChannels
currentOutputChannelNum = serverOutputAudioDevice.maxOutputChannels
# currentInputChannelNum = serverInputAudioDevice.maxInputChannels
# currentOutputChannelNum = serverOutputAudioDevice.maxOutputChannels
currentInputChunkNum = vc.settings.serverReadChunkSize
block_frame = currentInputChunkNum * 128
# currentInputChunkNum = vc.settings.serverReadChunkSize
# block_frame = currentInputChunkNum * 128
# sample rate precheck(alsa cannot use 40000?)
try:
currentModelSamplingRate = (
self.voiceChanger.get_processing_sampling_rate()
)
except Exception as e:
print("[Voice Changer] ex: get_processing_sampling_rate", e)
continue
try:
with sd.Stream(
callback=self.audio_callback,
blocksize=block_frame,
samplerate=currentModelSamplingRate,
dtype="float32",
channels=[currentInputChannelNum, currentOutputChannelNum],
):
pass
vc.settings.serverInputAudioSampleRate = currentModelSamplingRate
vc.settings.inputSampleRate = currentModelSamplingRate
print(
f"[Voice Changer] sample rate {vc.settings.serverInputAudioSampleRate}"
)
except Exception as e:
print(
"[Voice Changer] ex: fallback to device default samplerate",
e,
)
vc.settings.serverInputAudioSampleRate = (
serverInputAudioDevice.default_samplerate
)
vc.settings.inputSampleRate = vc.settings.serverInputAudioSampleRate
# # sample rate precheck(alsa cannot use 40000?)
# try:
# currentModelSamplingRate = self.voiceChanger.get_processing_sampling_rate()
# except Exception as e:
# print("[Voice Changer] ex: get_processing_sampling_rate", e)
# continue
# try:
# with sd.Stream(
# callback=self.audio_callback,
# blocksize=block_frame,
# samplerate=currentModelSamplingRate,
# dtype="float32",
# channels=[currentInputChannelNum, currentOutputChannelNum],
# ):
# pass
# vc.settings.serverInputAudioSampleRate = currentModelSamplingRate
# vc.settings.inputSampleRate = currentModelSamplingRate
# print(f"[Voice Changer] sample rate {vc.settings.serverInputAudioSampleRate}")
# except Exception as e:
# print(
# "[Voice Changer] ex: fallback to device default samplerate",
# e,
# )
# vc.settings.serverInputAudioSampleRate = serverInputAudioDevice.default_samplerate
# vc.settings.inputSampleRate = vc.settings.serverInputAudioSampleRate
# main loop
try:
with sd.Stream(
callback=self.audio_callback,
blocksize=block_frame,
samplerate=vc.settings.serverInputAudioSampleRate,
dtype="float32",
channels=[currentInputChannelNum, currentOutputChannelNum],
):
while (
vc.settings.serverAudioStated == 1
and currentInputDeviceId == vc.settings.serverInputDeviceId
and currentOutputDeviceId
== vc.settings.serverOutputDeviceId
and currentModelSamplingRate
== self.voiceChanger.get_processing_sampling_rate()
and currentInputChunkNum == vc.settings.serverReadChunkSize
):
time.sleep(2)
print(
"[Voice Changer] server audio",
self.settings.performance,
)
print(
"[Voice Changer] info:",
vc.settings.serverAudioStated,
currentInputDeviceId,
currentOutputDeviceId,
vc.settings.serverInputAudioSampleRate,
currentInputChunkNum,
)
# # main loop
# try:
# with sd.Stream(
# callback=self.audio_callback,
# blocksize=block_frame,
# samplerate=vc.settings.serverInputAudioSampleRate,
# dtype="float32",
# channels=[currentInputChannelNum, currentOutputChannelNum],
# ):
# while vc.settings.serverAudioStated == 1 and currentInputDeviceId == vc.settings.serverInputDeviceId and currentOutputDeviceId == vc.settings.serverOutputDeviceId and currentModelSamplingRate == self.voiceChanger.get_processing_sampling_rate() and currentInputChunkNum == vc.settings.serverReadChunkSize:
# time.sleep(2)
# print(
# "[Voice Changer] server audio",
# self.settings.performance,
# )
# print(
# "[Voice Changer] info:",
# vc.settings.serverAudioStated,
# currentInputDeviceId,
# currentOutputDeviceId,
# vc.settings.serverInputAudioSampleRate,
# currentInputChunkNum,
# )
except Exception as e:
print("[Voice Changer] ex:", e)
time.sleep(2)
# except Exception as e:
# print("[Voice Changer] ex:", e)
# time.sleep(2)
def __init__(self, params: VoiceChangerParams):
def __init__(self, params: VoiceChangerParams, slotIndex: int):
# 初期化
self.settings = VoiceChangerSettings()
self.onnx_session = None
@ -255,147 +231,80 @@ class VoiceChanger:
self.currentCrossFadeOverlapSize = 0 # setting
self.crossfadeSize = 0 # calculated
self.voiceChanger = None
self.modelType: ModelType | None = None
self.voiceChangerModel = None
self.modelType: VoiceChangerType | None = None
self.params = params
self.gpu_num = torch.cuda.device_count()
self.prev_audio = np.zeros(4096)
self.mps_enabled: bool = (
getattr(torch.backends, "mps", None) is not None
and torch.backends.mps.is_available()
)
self.ioRecorder: IORecorder | None = None
self.sola_buffer: AudioInOut | None = None
audioinput, audiooutput = list_audio_device()
self.settings.serverAudioInputDevices = audioinput
self.settings.serverAudioOutputDevices = audiooutput
thread = threading.Thread(target=self.serverLocal, args=(self,))
thread.start()
print(
f"VoiceChanger Initialized (GPU_NUM:{self.gpu_num}, mps_enabled:{self.mps_enabled})"
)
def switchModelType(self, modelType: ModelType):
try:
if self.voiceChanger is not None:
# return {"status": "ERROR", "msg": "vc is already selected. currently re-select is not implemented"}
del self.voiceChanger
self.voiceChanger = None
self.modelType = modelType
if self.modelType == "MMVCv15":
from voice_changer.MMVCv15.MMVCv15 import MMVCv15
self.voiceChanger = MMVCv15() # type: ignore
elif self.modelType == "MMVCv13":
from voice_changer.MMVCv13.MMVCv13 import MMVCv13
self.voiceChanger = MMVCv13()
elif self.modelType == "so-vits-svc-40v2":
from voice_changer.SoVitsSvc40v2.SoVitsSvc40v2 import SoVitsSvc40v2
self.voiceChanger = SoVitsSvc40v2(self.params)
elif (
self.modelType == "so-vits-svc-40"
or self.modelType == "so-vits-svc-40_c"
):
from voice_changer.SoVitsSvc40.SoVitsSvc40 import SoVitsSvc40
self.voiceChanger = SoVitsSvc40(self.params)
elif self.modelType == "DDSP-SVC":
from voice_changer.DDSP_SVC.DDSP_SVC import DDSP_SVC
self.voiceChanger = DDSP_SVC(self.params)
elif self.modelType == "RVC":
self.slotIndex = slotIndex
self.slotInfo = loadSlotInfo(params.model_dir, self.slotIndex)
if self.slotInfo.voiceChangerType is None:
print(f"[Voice Changer] Voice Changer Type is None for slot {slotIndex} is not found.")
return
elif self.slotInfo.voiceChangerType == "RVC":
from voice_changer.RVC.RVC import RVC
self.voiceChanger = RVC(self.params)
self.voiceChangerModel = RVC(self.slotIndex, self.params)
else:
from voice_changer.MMVCv13.MMVCv13 import MMVCv13
print(f"[Voice Changer] unknwon voice changer type. {self.slotInfo.voiceChangerType}")
self.voiceChanger = MMVCv13()
except Exception as e:
print(e)
print(traceback.format_exc())
return {"status": "OK", "msg": "vc is switched."}
# thread = threading.Thread(target=self.serverLocal, args=(self,))
# thread.start()
def getModelType(self):
if self.modelType is not None:
return {"status": "OK", "vc": self.modelType}
else:
return {"status": "OK", "vc": "none"}
def loadModel(self, props: LoadModelParams):
try:
if self.voiceChanger is None:
raise VoiceChangerIsNotSelectedException(
"Voice Changer is not selected."
)
return self.voiceChanger.loadModel(props)
except Exception as e:
print(traceback.format_exc())
print("[Voice Changer] Model Load Error! Check your model is valid.", e)
return {"status": "NG"}
def prepareModel(self):
self.voiceChangerModel.prepareModel()
def get_info(self):
data = asdict(self.settings)
if self.voiceChanger is not None:
data.update(self.voiceChanger.get_info())
if self.voiceChangerModel is not None:
data.update(self.voiceChangerModel.get_info())
devCount = torch.cuda.device_count()
gpus = []
for id in range(devCount):
name = torch.cuda.get_device_name(id)
memory = torch.cuda.get_device_properties(id).total_memory
gpu = {"id": id, "name": name, "memory": memory}
gpus.append(gpu)
data["gpus"] = gpus
return data
def get_performance(self):
return self.settings.performance
def update_settings(self, key: str, val: Any):
if self.voiceChanger is None:
if self.voiceChangerModel is None:
print("[Voice Changer] Voice Changer is not selected.")
return self.get_info()
return
if key in self.settings.intData:
setattr(self.settings, key, int(val))
if key == "crossFadeOffsetRate" or key == "crossFadeEndRate":
self.crossfadeSize = 0
if key == "recordIO" and val == 1:
if hasattr(self, "ioRecorder"):
if self.ioRecorder is not None:
self.ioRecorder.close()
self.ioRecorder = IORecorder(
STREAM_INPUT_FILE, STREAM_OUTPUT_FILE, self.settings.inputSampleRate
)
self.ioRecorder = IORecorder(STREAM_INPUT_FILE, STREAM_OUTPUT_FILE, self.settings.inputSampleRate)
if key == "recordIO" and val == 0:
if hasattr(self, "ioRecorder"):
if self.ioRecorder is not None:
self.ioRecorder.close()
self.ioRecorder = None
pass
if key == "recordIO" and val == 2:
if hasattr(self, "ioRecorder"):
if self.ioRecorder is not None:
self.ioRecorder.close()
self.ioRecorder = None
elif key in self.settings.floatData:
setattr(self.settings, key, float(val))
elif key in self.settings.strData:
setattr(self.settings, key, str(val))
else:
ret = self.voiceChanger.update_settings(key, val)
ret = self.voiceChangerModel.update_settings(key, val)
if ret is False:
pass
# print(f"({key} is not mutable variable or unknown variable)")
return self.get_info()
def _generate_strength(self, crossfadeSize: int):
if (
self.crossfadeSize != crossfadeSize
or self.currentCrossFadeOffsetRate != self.settings.crossFadeOffsetRate
or self.currentCrossFadeEndRate != self.settings.crossFadeEndRate
or self.currentCrossFadeOverlapSize != self.settings.crossFadeOverlapSize
):
if self.crossfadeSize != crossfadeSize or self.currentCrossFadeOffsetRate != self.settings.crossFadeOffsetRate or self.currentCrossFadeEndRate != self.settings.crossFadeEndRate or self.currentCrossFadeOverlapSize != self.settings.crossFadeOverlapSize:
self.crossfadeSize = crossfadeSize
self.currentCrossFadeOffsetRate = self.settings.crossFadeOffsetRate
self.currentCrossFadeEndRate = self.settings.crossFadeEndRate
@ -424,32 +333,25 @@ class VoiceChanger:
]
)
print(
f"Generated Strengths: for prev:{self.np_prev_strength.shape}, for cur:{self.np_cur_strength.shape}"
)
print(f"Generated Strengths: for prev:{self.np_prev_strength.shape}, for cur:{self.np_cur_strength.shape}")
# ひとつ前の結果とサイズが変わるため、記録は消去する。
if hasattr(self, "np_prev_audio1") is True:
delattr(self, "np_prev_audio1")
if hasattr(self, "sola_buffer") is True:
if self.sola_buffer is not None:
del self.sola_buffer
self.sola_buffer = None
# receivedData: tuple of short
def on_request(
self, receivedData: AudioInOut
) -> tuple[AudioInOut, list[Union[int, float]]]:
def on_request(self, receivedData: AudioInOut) -> tuple[AudioInOut, list[Union[int, float]]]:
return self.on_request_sola(receivedData)
def on_request_sola(
self, receivedData: AudioInOut
) -> tuple[AudioInOut, list[Union[int, float]]]:
def on_request_sola(self, receivedData: AudioInOut) -> tuple[AudioInOut, list[Union[int, float]]]:
try:
if self.voiceChanger is None:
raise VoiceChangerIsNotSelectedException(
"Voice Changer is not selected."
)
if self.voiceChangerModel is None:
raise VoiceChangerIsNotSelectedException("Voice Changer is not selected.")
processing_sampling_rate = self.voiceChanger.get_processing_sampling_rate()
processing_sampling_rate = self.voiceChangerModel.get_processing_sampling_rate()
# 前処理
with Timer("pre-process") as t:
if self.settings.inputSampleRate != processing_sampling_rate:
@ -470,21 +372,17 @@ class VoiceChanger:
crossfade_frame = min(self.settings.crossFadeOverlapSize, block_frame)
self._generate_strength(crossfade_frame)
data = self.voiceChanger.generate_input(
newData, block_frame, crossfade_frame, sola_search_frame
)
data = self.voiceChangerModel.generate_input(newData, block_frame, crossfade_frame, sola_search_frame)
preprocess_time = t.secs
# 変換処理
with Timer("main-process") as t:
# Inference
audio = self.voiceChanger.inference(data)
audio = self.voiceChangerModel.inference(data)
if hasattr(self, "sola_buffer") is True:
if self.sola_buffer is not None:
np.set_printoptions(threshold=10000)
audio_offset = -1 * (
sola_search_frame + crossfade_frame + block_frame
)
audio_offset = -1 * (sola_search_frame + crossfade_frame + block_frame)
audio = audio[audio_offset:]
# SOLA algorithm from https://github.com/yxlllc/DDSP-SVC, https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI
@ -512,10 +410,7 @@ class VoiceChanger:
print("[Voice Changer] warming up... generating sola buffer.")
result = np.zeros(4096).astype(np.int16)
if (
hasattr(self, "sola_buffer") is True
and sola_offset < sola_search_frame
):
if self.sola_buffer is not None and sola_offset < sola_search_frame:
offset = -1 * (sola_search_frame + crossfade_frame - sola_offset)
end = -1 * (sola_search_frame - sola_offset)
sola_buf_org = audio[offset:end]
@ -545,9 +440,7 @@ class VoiceChanger:
else:
outputData = result
print_convert_processing(
f" Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz"
)
print_convert_processing(f" Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz")
if receivedData.shape[0] != outputData.shape[0]:
# print(
@ -564,9 +457,7 @@ class VoiceChanger:
postprocess_time = t.secs
print_convert_processing(
f" [fin] Input/Output size:{receivedData.shape[0]},{outputData.shape[0]}"
)
print_convert_processing(f" [fin] Input/Output size:{receivedData.shape[0]},{outputData.shape[0]}")
perf = [preprocess_time, mainprocess_time, postprocess_time]
return outputData, perf
@ -586,9 +477,7 @@ class VoiceChanger:
print("[Voice Changer] embedder:", e)
return np.zeros(1).astype(np.int16), [0, 0, 0]
except VoiceChangerIsNotSelectedException:
print(
"[Voice Changer] Voice Changer is not selected. Wait a bit and if there is no improvement, please re-select vc."
)
print("[Voice Changer] Voice Changer is not selected. Wait a bit and if there is no improvement, please re-select vc.")
return np.zeros(1).astype(np.int16), [0, 0, 0]
except DeviceCannotSupportHalfPrecisionException:
# RVC.pyでfallback処理をするので、ここはダミーデータ返すだけ。

View File

@ -1,35 +1,105 @@
import numpy as np
import threading
from data.ModelSample import ModelSamples
from data.ModelSlot import ModelSlots, loadSlotInfo
from utils.downloader.SampleDownloader import downloadSample, getSampleInfos
from voice_changer.Local.ServerDevice import ServerDevice
from voice_changer.RVC.ModelSlotGenerator import setSlotAsRVC
from voice_changer.VoiceChanger import VoiceChanger
from const import ModelType
from voice_changer.utils.LoadModelParams import LoadModelParams
from const import MAX_SLOT_NUM, VoiceChangerType
from voice_changer.utils.VoiceChangerModel import AudioInOut
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
from dataclasses import dataclass, asdict, field
import torch
import json
@dataclass()
class GPUInfo:
id: int
name: str
memory: int
@dataclass()
class VoiceChangerManagerSettings:
slotIndex: int
intData: list[str] = field(default_factory=lambda: ["slotIndex"])
class VoiceChangerManager(object):
_instance = None
voiceChanger: VoiceChanger = None
def __init__(self, params: VoiceChangerParams):
self.voiceChanger: VoiceChanger = None
self.settings: VoiceChangerManagerSettings = VoiceChangerManagerSettings(slotIndex=0)
self.params: VoiceChangerParams = params
self.serverDevice = ServerDevice()
# スタティックな情報を収集
self.sampleModels: list[ModelSamples] = getSampleInfos(self.params.sample_mode)
self.gpus: list[GPUInfo] = self._get_gpuInfos()
def _get_gpuInfos(self):
devCount = torch.cuda.device_count()
gpus = []
for id in range(devCount):
name = torch.cuda.get_device_name(id)
memory = torch.cuda.get_device_properties(id).total_memory
gpu = {"id": id, "name": name, "memory": memory}
gpus.append(gpu)
return gpus
@classmethod
def get_instance(cls, params: VoiceChangerParams):
if cls._instance is None:
cls._instance = cls()
cls._instance.voiceChanger = VoiceChanger(params)
cls._instance = cls(params)
gpu_num = torch.cuda.device_count()
mps_enabled: bool = getattr(torch.backends, "mps", None) is not None and torch.backends.mps.is_available()
print(f"VoiceChanger Initialized (GPU_NUM:{gpu_num}, mps_enabled:{mps_enabled})")
cls._instance.voiceChanger = VoiceChanger(params, cls._instance.settings.slotIndex)
thread = threading.Thread(target=cls._instance.serverDevice.serverLocal, args=(cls._instance.voiceChanger,))
thread.start()
cls._instance.voiceChanger.prepareModel()
return cls._instance
def loadModel(self, props: LoadModelParams):
info = self.voiceChanger.loadModel(props)
if hasattr(info, "status") and info["status"] == "NG":
return info
def loadModel(self, slot: int, voiceChangerType: VoiceChangerType, params: str):
print(slot, voiceChangerType, params)
paramDict = json.loads(params)
if voiceChangerType == "RVC":
if "sampleId" in paramDict and len(paramDict["sampleId"]) > 0:
print("[Voice Canger]: Download RVC sample.")
downloadSample(self.params.sample_mode, paramDict["sampleId"], self.params.model_dir, slot, {"useIndex": paramDict["rvcIndexDownload"]})
else:
info["status"] = "OK"
return info
print("[Voice Canger]: Set uploaded RVC model to slot.")
setSlotAsRVC(self.params.model_dir, slot, paramDict)
return self.get_info()
def get_slotInfos(self):
slotInfos: list[ModelSlots] = []
for slotIndex in range(MAX_SLOT_NUM):
slotInfo = loadSlotInfo(self.params.model_dir, slotIndex)
slotInfos.append(slotInfo)
return slotInfos
def get_info(self):
data = asdict(self.settings)
slotInfos = self.get_slotInfos()
data["slotInfos"] = slotInfos
data["gpus"] = self.gpus
data["sampleModels"] = self.sampleModels
data["status"] = "OK"
if hasattr(self, "voiceChanger"):
info = self.voiceChanger.get_info()
info["status"] = "OK"
return info
data.update(info)
return data
else:
return {"status": "ERROR", "msg": "no model loaded"}
@ -41,12 +111,22 @@ class VoiceChangerManager(object):
return {"status": "ERROR", "msg": "no model loaded"}
def update_settings(self, key: str, val: str | int | float):
if hasattr(self, "voiceChanger"):
info = self.voiceChanger.update_settings(key, val)
info["status"] = "OK"
return info
if key in self.settings.intData:
setattr(self.settings, key, int(val))
if key == "slotIndex":
val = val % 1000 # Quick hack for same slot is selected
setattr(self.settings, key, int(val))
newVoiceChanger = VoiceChanger(self.params, self.settings.slotIndex)
newVoiceChanger.prepareModel()
self.serverDevice.serverLocal(newVoiceChanger)
del self.voiceChanger
self.voiceChanger = newVoiceChanger
elif hasattr(self, "voiceChanger"):
self.voiceChanger.update_settings(key, val)
else:
return {"status": "ERROR", "msg": "no model loaded"}
print(f"[Voice Changer] update is not handled. ({key}:{val})")
return self.get_info()
def changeVoice(self, receivedData: AudioInOut):
if hasattr(self, "voiceChanger") is True:
@ -55,12 +135,6 @@ class VoiceChangerManager(object):
print("Voice Change is not loaded. Did you load a correct model?")
return np.zeros(1).astype(np.int16), []
def switchModelType(self, modelType: ModelType):
return self.voiceChanger.switchModelType(modelType)
def getModelType(self):
return self.voiceChanger.getModelType()
def export2onnx(self):
return self.voiceChanger.export2onnx()

View File

@ -10,10 +10,3 @@ class FilePaths:
clusterTorchModelFilename: str | None
featureFilename: str | None
indexFilename: str | None
@dataclass
class LoadModelParams:
slot: int
isHalf: bool
params: Any

View File

@ -1,17 +1,11 @@
from typing import Any, Protocol, TypeAlias
import numpy as np
from voice_changer.utils.LoadModelParams import LoadModelParams
AudioInOut: TypeAlias = np.ndarray[Any, np.dtype[np.int16]]
class VoiceChangerModel(Protocol):
# loadModel: Callable[..., dict[str, Any]]
def loadModel(self, params: LoadModelParams):
...
def get_processing_sampling_rate(self) -> int:
...
@ -21,9 +15,7 @@ class VoiceChangerModel(Protocol):
def inference(self, data: tuple[Any, ...]) -> Any:
...
def generate_input(
self, newData: AudioInOut, inputSize: int, crossfadeSize: int
) -> tuple[Any, ...]:
def generate_input(self, newData: AudioInOut, inputSize: int, crossfadeSize: int) -> tuple[Any, ...]:
...
def update_settings(self, key: str, val: Any) -> bool:

View File

@ -11,4 +11,4 @@ class VoiceChangerParams:
hubert_base_jp: str
hubert_soft: str
nsf_hifigan: str
rvc_sample_mode: str
sample_mode: str