wip:model upload, set provider

This commit is contained in:
wataru 2023-01-08 00:25:21 +09:00
parent ee910eb395
commit 216143423e
15 changed files with 3060 additions and 580 deletions

View File

@ -1,10 +1 @@
<!DOCTYPE html> <!doctype html><html style="width:100%;height:100%;overflow:hidden"><head><meta charset="utf-8"/><title>Voice Changer Client Demo</title><script defer="defer" src="index.js"></script></head><body style="width:100%;height:100%;margin:0"><div id="app" style="width:100%;height:100%"></div></body></html>
<html style="width: 100%; height: 100%; overflow: hidden">
<head>
<meta charset="utf-8" />
<title>Voice Changer Client Demo</title>
<script defer src="index.js"></script></head>
<body style="width: 100%; height: 100%; margin: 0px">
<div id="app" style="width: 100%; height: 100%"></div>
</body>
</html>

File diff suppressed because one or more lines are too long

2775
client/demo/dist/index.js.LICENSE.txt vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,13 +1,11 @@
import * as React from "react"; import * as React from "react";
import { useEffect, useMemo, useRef, useState } from "react"; import { useEffect, useMemo, useState } from "react";
import { AUDIO_ELEMENT_FOR_PLAY_RESULT, CHROME_EXTENSION } from "./const"; import { AUDIO_ELEMENT_FOR_PLAY_RESULT, CHROME_EXTENSION } from "./const";
import { DefaultVoiceChangerRequestParamas, VoiceChangerOptions, VoiceChangerRequestParamas, DefaultVoiceChangerOptions, VoiceChangerMode, } from "@dannadori/voice-changer-client-js"
import { useServerSetting } from "./101_server_setting"; import { useServerSetting } from "./101_server_setting";
import { useDeviceSetting } from "./102_device_setting"; import { useDeviceSetting } from "./102_device_setting";
import { useConvertSetting } from "./104_convert_setting"; import { useConvertSetting } from "./104_convert_setting";
import { useAdvancedSetting } from "./105_advanced_setting"; import { useAdvancedSetting } from "./105_advanced_setting";
import { useSpeakerSetting } from "./103_speaker_setting"; import { useSpeakerSetting } from "./103_speaker_setting";
import { VoiceChnagerClient } from "@dannadori/voice-changer-client-js";
import { useClient } from "./hooks/useClient"; import { useClient } from "./hooks/useClient";
import { useServerControl } from "./106_server_control"; import { useServerControl } from "./106_server_control";
@ -15,16 +13,21 @@ import { useServerControl } from "./106_server_control";
export const useMicrophoneOptions = () => { export const useMicrophoneOptions = () => {
const [audioContext, setAudioContext] = useState<AudioContext | null>(null) const [audioContext, setAudioContext] = useState<AudioContext | null>(null)
const serverSetting = useServerSetting() const clientState = useClient({
audioContext: audioContext,
audioOutputElementId: AUDIO_ELEMENT_FOR_PLAY_RESULT
})
const serverSetting = useServerSetting({
uploadFile: clientState.uploadFile,
changeOnnxExcecutionProvider: clientState.changeOnnxExcecutionProvider
})
const deviceSetting = useDeviceSetting(audioContext) const deviceSetting = useDeviceSetting(audioContext)
const speakerSetting = useSpeakerSetting() const speakerSetting = useSpeakerSetting()
const convertSetting = useConvertSetting() const convertSetting = useConvertSetting()
const advancedSetting = useAdvancedSetting() const advancedSetting = useAdvancedSetting()
const clientState = useClient({
audioContext: audioContext,
audioOutputElementId: AUDIO_ELEMENT_FOR_PLAY_RESULT
})
const serverControl = useServerControl({ const serverControl = useServerControl({
convertStart: async () => { await clientState.start(serverSetting.mmvcServerUrl, serverSetting.protocol) }, convertStart: async () => { await clientState.start(serverSetting.mmvcServerUrl, serverSetting.protocol) },
convertStop: async () => { clientState.stop() }, convertStop: async () => { clientState.stop() },
@ -52,6 +55,7 @@ export const useMicrophoneOptions = () => {
}, [clientState.clientInitialized, deviceSetting.audioInput, convertSetting.bufferSize, advancedSetting.vfForceDisabled]) }, [clientState.clientInitialized, deviceSetting.audioInput, convertSetting.bufferSize, advancedSetting.vfForceDisabled])
// // const [options, setOptions] = useState<MicrophoneOptionsState>(InitMicrophoneOptionsState) // // const [options, setOptions] = useState<MicrophoneOptionsState>(InitMicrophoneOptionsState)
// const [params, setParams] = useState<VoiceChangerRequestParamas>(DefaultVoiceChangerRequestParamas) // const [params, setParams] = useState<VoiceChangerRequestParamas>(DefaultVoiceChangerRequestParamas)
// const [options, setOptions] = useState<VoiceChangerOptions>(DefaultVoiceChangerOptions) // const [options, setOptions] = useState<VoiceChangerOptions>(DefaultVoiceChangerOptions)

View File

@ -1,7 +1,12 @@
import { DefaultVoiceChangerOptions, OnnxExecutionProvider, Protocol, Framework, fileSelector } from "@dannadori/voice-changer-client-js" import { DefaultVoiceChangerOptions, OnnxExecutionProvider, Protocol, Framework, fileSelector, getInfo, loadModel } from "@dannadori/voice-changer-client-js"
import React from "react" import React, { useEffect } from "react"
import { useMemo, useState } from "react" import { useMemo, useState } from "react"
export type UseServerSettingProps = {
uploadFile: (baseUrl: string, file: File, onprogress: (progress: number, end: boolean) => void) => Promise<void>
changeOnnxExcecutionProvider: (baseUrl: string, provider: OnnxExecutionProvider) => Promise<void>
}
export type ServerSettingState = { export type ServerSettingState = {
serverSetting: JSX.Element; serverSetting: JSX.Element;
mmvcServerUrl: string; mmvcServerUrl: string;
@ -13,7 +18,7 @@ export type ServerSettingState = {
protocol: Protocol; protocol: Protocol;
} }
export const useServerSetting = (): ServerSettingState => { export const useServerSetting = (props: UseServerSettingProps): ServerSettingState => {
const [mmvcServerUrl, setMmvcServerUrl] = useState<string>(DefaultVoiceChangerOptions.mmvcServerUrl) const [mmvcServerUrl, setMmvcServerUrl] = useState<string>(DefaultVoiceChangerOptions.mmvcServerUrl)
const [pyTorchModel, setPyTorchModel] = useState<File | null>(null) const [pyTorchModel, setPyTorchModel] = useState<File | null>(null)
const [configFile, setConfigFile] = useState<File | null>(null) const [configFile, setConfigFile] = useState<File | null>(null)
@ -65,6 +70,36 @@ export const useServerSetting = (): ServerSettingState => {
} }
setOnnxModel(file) setOnnxModel(file)
} }
const onModelUploadClicked = async () => {
if (!pyTorchModel && !onnxModel) {
alert("PyTorchモデルとONNXモデルのどちらか一つ以上指定する必要があります。")
return
}
if (!configFile) {
alert("Configファイルを指定する必要があります。")
return
}
if (pyTorchModel) {
await props.uploadFile(mmvcServerUrl, pyTorchModel, (progress: number, end: boolean) => {
console.log(progress, end)
})
}
if (onnxModel) {
await props.uploadFile(mmvcServerUrl, onnxModel, (progress: number, end: boolean) => {
console.log(progress, end)
})
}
await props.uploadFile(mmvcServerUrl, configFile, (progress: number, end: boolean) => {
console.log(progress, end)
})
const res = await getInfo(mmvcServerUrl)
console.log(res)
const res2 = await loadModel(mmvcServerUrl, configFile, pyTorchModel, onnxModel)
console.log(res2)
}
return ( return (
<> <>
<div className="body-row split-3-3-4 left-padding-1 guided"> <div className="body-row split-3-3-4 left-padding-1 guided">
@ -103,9 +138,17 @@ export const useServerSetting = (): ServerSettingState => {
<div className="body-button" onClick={onOnnxFileLoadClicked}>select</div> <div className="body-button" onClick={onOnnxFileLoadClicked}>select</div>
</div> </div>
</div> </div>
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title left-padding-2"></div>
<div className="body-item-text">
</div>
<div className="body-button-container">
<div className="body-button" onClick={onModelUploadClicked}>upload</div>
</div>
</div>
</> </>
) )
}, [pyTorchModel, configFile, onnxModel]) }, [pyTorchModel, configFile, onnxModel, mmvcServerUrl, props.uploadFile])
const protocolRow = useMemo(() => { const protocolRow = useMemo(() => {
const onProtocolChanged = async (val: Protocol) => { const onProtocolChanged = async (val: Protocol) => {
@ -158,6 +201,7 @@ export const useServerSetting = (): ServerSettingState => {
return return
} }
const onOnnxExecutionProviderChanged = async (val: OnnxExecutionProvider) => { const onOnnxExecutionProviderChanged = async (val: OnnxExecutionProvider) => {
await props.changeOnnxExcecutionProvider(mmvcServerUrl, val)
setOnnxExecutionProvider(val) setOnnxExecutionProvider(val)
} }
return ( return (
@ -177,8 +221,7 @@ export const useServerSetting = (): ServerSettingState => {
</div> </div>
</div> </div>
) )
}, [onnxExecutionProvider, framework]) }, [onnxExecutionProvider, framework, mmvcServerUrl])
const serverSetting = useMemo(() => { const serverSetting = useMemo(() => {
return ( return (

View File

@ -1,4 +1,3 @@
import { VoiceChangerMode } from "@dannadori/voice-changer-client-js"
import React, { useMemo, useState } from "react" import React, { useMemo, useState } from "react"
export type UseServerControlProps = { export type UseServerControlProps = {

View File

@ -1,4 +1,4 @@
import { BufferSize, createDummyMediaStream, Protocol, VoiceChangerMode, VoiceChangerRequestParamas, VoiceChnagerClient } from "@dannadori/voice-changer-client-js" import { BufferSize, createDummyMediaStream, Protocol, VoiceChangerMode, VoiceChangerRequestParamas, VoiceChnagerClient, uploadLargeFile, concatUploadedFile, OnnxExecutionProvider, setOnnxExecutionProvider } from "@dannadori/voice-changer-client-js"
import { useEffect, useMemo, useRef, useState } from "react" import { useEffect, useMemo, useRef, useState } from "react"
export type UseClientProps = { export type UseClientProps = {
@ -17,6 +17,8 @@ export type ClientState = {
changeInputChunkNum: (inputChunkNum: number) => void changeInputChunkNum: (inputChunkNum: number) => void
changeVoiceChangeMode: (voiceChangerMode: VoiceChangerMode) => void changeVoiceChangeMode: (voiceChangerMode: VoiceChangerMode) => void
changeRequestParams: (params: VoiceChangerRequestParamas) => void changeRequestParams: (params: VoiceChangerRequestParamas) => void
uploadFile: (baseUrl: string, file: File, onprogress: (progress: number, end: boolean) => void) => Promise<void>
changeOnnxExcecutionProvider: (baseUrl: string, provider: OnnxExecutionProvider) => Promise<void>
} }
export const useClient = (props: UseClientProps): ClientState => { export const useClient = (props: UseClientProps): ClientState => {
@ -130,8 +132,19 @@ export const useClient = (props: UseClientProps): ClientState => {
} }
}, []) }, [])
const uploadFile = useMemo(() => {
return async (baseUrl: string, file: File, onprogress: (progress: number, end: boolean) => void) => {
const num = await uploadLargeFile(baseUrl, file, onprogress)
const res = await concatUploadedFile(baseUrl, file, num)
console.log("upload", num, res)
}
}, [])
const changeOnnxExcecutionProvider = useMemo(() => {
return async (baseUrl: string, provider: OnnxExecutionProvider) => {
setOnnxExecutionProvider(baseUrl, provider)
}
}, [])
return { return {
clientInitialized, clientInitialized,
@ -141,9 +154,11 @@ export const useClient = (props: UseClientProps): ClientState => {
start, start,
stop, stop,
uploadFile,
changeInput, changeInput,
changeInputChunkNum, changeInputChunkNum,
changeVoiceChangeMode, changeVoiceChangeMode,
changeRequestParams, changeRequestParams,
changeOnnxExcecutionProvider,
} }
} }

View File

@ -38,7 +38,7 @@ export class AudioStreamer extends Duplex {
this.socket.close() this.socket.close()
} }
if (this.protocol === "sio") { if (this.protocol === "sio") {
this.socket = io(this.serverUrl); this.socket = io(this.serverUrl + "/test");
this.socket.on('connect_error', (err) => { this.socket.on('connect_error', (err) => {
this.audioStreamerListeners.notifyException(VOICE_CHANGER_CLIENT_EXCEPTION.ERR_SIO_CONNECT_FAILED, `[SIO] rconnection failed ${err}`) this.audioStreamerListeners.notifyException(VOICE_CHANGER_CLIENT_EXCEPTION.ERR_SIO_CONNECT_FAILED, `[SIO] rconnection failed ${err}`)
}) })
@ -186,11 +186,11 @@ export class AudioStreamer extends Duplex {
} }
private sendBuffer = async (newBuffer: Uint8Array) => { private sendBuffer = async (newBuffer: Uint8Array) => {
if (this.serverUrl.length == 0) { // if (this.serverUrl.length == 0) {
console.warn("no server url") // // console.warn("no server url")
return // // return
// throw "no server url" // // throw "no server url"
} // }
const timestamp = Date.now() const timestamp = Date.now()
// console.log("REQUEST_MESSAGE:", [this.gpu, this.srcId, this.dstId, timestamp, newBuffer.buffer]) // console.log("REQUEST_MESSAGE:", [this.gpu, this.srcId, this.dstId, timestamp, newBuffer.buffer])
// console.log("SERVER_URL", this.serverUrl, this.protocol) // console.log("SERVER_URL", this.serverUrl, this.protocol)
@ -213,7 +213,7 @@ export class AudioStreamer extends Duplex {
newBuffer.buffer]); newBuffer.buffer]);
} else { } else {
const res = await postVoice( const res = await postVoice(
this.serverUrl, this.serverUrl + "/test",
this.requestParamas.gpu, this.requestParamas.gpu,
this.requestParamas.srcId, this.requestParamas.srcId,
this.requestParamas.dstId, this.requestParamas.dstId,

View File

@ -93,7 +93,7 @@ export const DefaultVoiceChangerRequestParamas: VoiceChangerRequestParamas = {
export const DefaultVoiceChangerOptions: VoiceChangerOptions = { export const DefaultVoiceChangerOptions: VoiceChangerOptions = {
audioInput: null, audioInput: null,
mmvcServerUrl: "https://192.168.0.3:18888/test", mmvcServerUrl: "",
protocol: "sio", protocol: "sio",
sampleRate: 48000, sampleRate: 48000,
bufferSize: 1024, bufferSize: 1024,

View File

@ -1,5 +1,6 @@
import { OnnxExecutionProvider } from "./const"
const DEBUG = true const DEBUG = false
const DEBUG_BASE_URL = "http://localhost:18888" const DEBUG_BASE_URL = "http://localhost:18888"
type FileChunk = { type FileChunk = {
@ -7,7 +8,31 @@ type FileChunk = {
chunk: Blob chunk: Blob
} }
const uploadLargeFile = async (baseUrl: string, file: File, onprogress: (progress: number, end: boolean) => void) => { export type ServerInfo = {
pyTorchModelFile: string,
onnxModelFile: string,
configFile: string,
providers: string[]
}
export const getInfo = async (baseUrl: string) => {
const getInfoURL = DEBUG ? `${DEBUG_BASE_URL}/info` : `${baseUrl}/info`
const info = await new Promise<ServerInfo>((resolve) => {
const request = new Request(getInfoURL, {
method: 'GET',
});
fetch(request).then(async (response) => {
const json = await response.json() as ServerInfo
resolve(json)
})
})
return info
}
export const uploadLargeFile = async (baseUrl: string, file: File, onprogress: (progress: number, end: boolean) => void) => {
const uploadURL = DEBUG ? `${DEBUG_BASE_URL}/upload_file` : `${baseUrl}/upload_file` const uploadURL = DEBUG ? `${DEBUG_BASE_URL}/upload_file` : `${baseUrl}/upload_file`
onprogress(0, false) onprogress(0, false)
const size = 1024 * 1024; const size = 1024 * 1024;
@ -56,23 +81,14 @@ const uploadLargeFile = async (baseUrl: string, file: File, onprogress: (progres
return chunkNum return chunkNum
} }
export const concatUploadedFile = async (baseUrl: string, file: File, chunkNum: number) => {
const loadModelURL = DEBUG ? `${DEBUG_BASE_URL}/concat_uploaded_file` : `${baseUrl}/concat_uploaded_file`
export const uploadModelProps = async (baseUrl: string, modelFile: File, configFile: File, onprogress: (progress: number, end: boolean) => void) => { new Promise<void>((resolve) => {
const uploadURL = DEBUG ? `${DEBUG_BASE_URL}/upload_file` : `${baseUrl}/upload_file`
const loadModelURL = DEBUG ? `${DEBUG_BASE_URL}/load_model` : `${baseUrl}/load_model`
onprogress(0, false)
const chunkNum = await uploadLargeFile(baseUrl, modelFile, (progress: number, _end: boolean) => {
onprogress(progress, false)
})
console.log("model uploaded")
const configP = new Promise<void>((resolve) => {
const formData = new FormData(); const formData = new FormData();
formData.append("file", configFile); formData.append("filename", file.name);
formData.append("filename", configFile.name); formData.append("filenameChunkNum", "" + chunkNum);
const request = new Request(uploadURL, { const request = new Request(loadModelURL, {
method: 'POST', method: 'POST',
body: formData, body: formData,
}); });
@ -81,14 +97,14 @@ export const uploadModelProps = async (baseUrl: string, modelFile: File, configF
resolve() resolve()
}) })
}) })
}
await configP export const loadModel = async (baseUrl: string, configFile: File, pyTorchModelFile: File | null, onnxModelFile: File | null) => {
console.log("config uploaded") const loadModelURL = DEBUG ? `${DEBUG_BASE_URL}/load_model` : `${baseUrl}/load_model`
const loadP = new Promise<void>((resolve) => { const loadP = new Promise<void>((resolve) => {
const formData = new FormData(); const formData = new FormData();
formData.append("modelFilename", modelFile.name); formData.append("pyTorchModelFilename", pyTorchModelFile?.name || "-");
formData.append("modelFilenameChunkNum", "" + chunkNum); formData.append("onnxModelFilename", onnxModelFile?.name || "-");
formData.append("configFilename", configFile.name); formData.append("configFilename", configFile.name);
const request = new Request(loadModelURL, { const request = new Request(loadModelURL, {
method: 'POST', method: 'POST',
@ -100,8 +116,70 @@ export const uploadModelProps = async (baseUrl: string, modelFile: File, configF
}) })
}) })
await loadP await loadP
onprogress(100, true)
console.log("model loaded")
} }
export const setOnnxExecutionProvider = async (baseUrl: string, provider: OnnxExecutionProvider) => {
const url = DEBUG ? `${DEBUG_BASE_URL}/set_onnx_provider` : `${baseUrl}/set_onnx_provider`
const loadP = new Promise<void>((resolve) => {
const formData = new FormData();
formData.append("provider", provider);
const request = new Request(url, {
method: 'POST',
body: formData,
});
fetch(request).then(async (response) => {
console.log(await response.json())
resolve()
})
})
await loadP
}
// export const uploadModelProps = async (baseUrl: string, modelFile: File, configFile: File, onprogress: (progress: number, end: boolean) => void) => {
// const uploadURL = DEBUG ? `${DEBUG_BASE_URL}/upload_file` : `${baseUrl}/upload_file`
// const loadModelURL = DEBUG ? `${DEBUG_BASE_URL}/load_model` : `${baseUrl}/load_model`
// onprogress(0, false)
// const chunkNum = await uploadLargeFile(baseUrl, modelFile, (progress: number, _end: boolean) => {
// onprogress(progress, false)
// })
// console.log("model uploaded")
// const configP = new Promise<void>((resolve) => {
// const formData = new FormData();
// formData.append("file", configFile);
// formData.append("filename", configFile.name);
// const request = new Request(uploadURL, {
// method: 'POST',
// body: formData,
// });
// fetch(request).then(async (response) => {
// console.log(await response.text())
// resolve()
// })
// })
// await configP
// console.log("config uploaded")
// const loadP = new Promise<void>((resolve) => {
// const formData = new FormData();
// formData.append("modelFilename", modelFile.name);
// formData.append("modelFilenameChunkNum", "" + chunkNum);
// formData.append("configFilename", configFile.name);
// const request = new Request(loadModelURL, {
// method: 'POST',
// body: formData,
// });
// fetch(request).then(async (response) => {
// console.log(await response.text())
// resolve()
// })
// })
// await loadP
// onprogress(100, true)
// console.log("model loaded")
// }

View File

@ -75,7 +75,7 @@ if __name__ == thisFilename or args.colab == True:
TYPE = args.t TYPE = args.t
PORT = args.p PORT = args.p
CONFIG = args.c CONFIG = args.c
MODEL = args.m MODEL = args.m if args.m != None else None
ONNX_MODEL = args.o if args.o != None else None ONNX_MODEL = args.o if args.o != None else None
# if os.getenv("EX_TB_PORT"): # if os.getenv("EX_TB_PORT"):
@ -83,7 +83,7 @@ if __name__ == thisFilename or args.colab == True:
# exApplitionInfo.external_tensorboard_port = int(EX_TB_PORT) # exApplitionInfo.external_tensorboard_port = int(EX_TB_PORT)
voiceChangerManager = VoiceChangerManager.get_instance() voiceChangerManager = VoiceChangerManager.get_instance()
if CONFIG and MODEL: if CONFIG and (MODEL or ONNX_MODEL):
voiceChangerManager.loadModel(CONFIG, MODEL, ONNX_MODEL) voiceChangerManager.loadModel(CONFIG, MODEL, ONNX_MODEL)
app_fastapi = MMVC_Rest.get_instance(voiceChangerManager) app_fastapi = MMVC_Rest.get_instance(voiceChangerManager)
app_socketio = MMVC_SocketIOApp.get_instance(app_fastapi, voiceChangerManager) app_socketio = MMVC_SocketIOApp.get_instance(app_fastapi, voiceChangerManager)
@ -97,7 +97,7 @@ if __name__ == '__main__':
TYPE = args.t TYPE = args.t
PORT = args.p PORT = args.p
CONFIG = args.c CONFIG = args.c
MODEL = args.m MODEL = args.m if args.m != None else None
ONNX_MODEL = args.o if args.o != None else None ONNX_MODEL = args.o if args.o != None else None
if TYPE != "MMVC" and TYPE != "TRAIN": if TYPE != "MMVC" and TYPE != "TRAIN":
print("Type(-t) should be MMVC or TRAIN") print("Type(-t) should be MMVC or TRAIN")

View File

@ -17,26 +17,47 @@ class MMVC_Rest_Fileuploader:
def __init__(self, voiceChangerManager:VoiceChangerManager): def __init__(self, voiceChangerManager:VoiceChangerManager):
self.voiceChangerManager = voiceChangerManager self.voiceChangerManager = voiceChangerManager
self.router = APIRouter() self.router = APIRouter()
self.router.add_api_route("/info", self.get_info, methods=["GET"])
self.router.add_api_route("/upload_file", self.post_upload_file, methods=["POST"]) self.router.add_api_route("/upload_file", self.post_upload_file, methods=["POST"])
self.router.add_api_route("/concat_uploaded_file", self.post_concat_uploaded_file, methods=["POST"])
self.router.add_api_route("/set_onnx_provider", self.post_set_onnx_provider, methods=["POST"])
self.router.add_api_route("/load_model", self.post_load_model, methods=["POST"]) self.router.add_api_route("/load_model", self.post_load_model, methods=["POST"])
self.router.add_api_route("/load_model_for_train", self.post_load_model_for_train, methods=["POST"]) self.router.add_api_route("/load_model_for_train", self.post_load_model_for_train, methods=["POST"])
self.router.add_api_route("/extract_voices", self.post_load_model, methods=["POST"]) self.router.add_api_route("/extract_voices", self.post_extract_voices, methods=["POST"])
self.onnx_provider=""
def post_upload_file(self, file: UploadFile = File(...), filename: str = Form(...)): def post_upload_file(self, file: UploadFile = File(...), filename: str = Form(...)):
return upload_file(UPLOAD_DIR, file, filename) return upload_file(UPLOAD_DIR, file, filename)
def post_concat_uploaded_file(self, filename: str = Form(...), filenameChunkNum: int = Form(...)):
modelFilePath = concat_file_chunks(
UPLOAD_DIR, filename, filenameChunkNum, UPLOAD_DIR)
return {"concat": f"{modelFilePath}"}
def post_set_onnx_provider(self, provider: str = Form(...)):
res = self.voiceChangerManager.set_onnx_provider(provider)
json_compatible_item_data = jsonable_encoder(res)
return JSONResponse(content=json_compatible_item_data)
def get_info(self):
info = self.voiceChangerManager.get_info()
json_compatible_item_data = jsonable_encoder(info)
return JSONResponse(content=json_compatible_item_data)
def post_load_model( def post_load_model(
self, self,
modelFilename: str = Form(...), pyTorchModelFilename: str = Form(...),
modelFilenameChunkNum: int = Form(...), onnxModelFilename: str = Form(...),
configFilename: str = Form(...) configFilename: str = Form(...)
): ):
modelFilePath = concat_file_chunks(
UPLOAD_DIR, modelFilename, modelFilenameChunkNum, UPLOAD_DIR) pyTorchModelFilePath = os.path.join(UPLOAD_DIR, pyTorchModelFilename) if pyTorchModelFilename != "-" else None
onnxModelFilePath = os.path.join(UPLOAD_DIR, onnxModelFilename) if onnxModelFilename != "-" else None
configFilePath = os.path.join(UPLOAD_DIR, configFilename) configFilePath = os.path.join(UPLOAD_DIR, configFilename)
self.voiceChangerManager.loadModel(configFilePath, modelFilePath) self.voiceChangerManager.loadModel(configFilePath, pyTorchModelFilePath, onnxModelFilePath)
return {"load": f"{modelFilePath}, {configFilePath}"} return {"load": f"{configFilePath}, {pyTorchModelFilePath}, {onnxModelFilePath}"}
def post_load_model_for_train( def post_load_model_for_train(
@ -52,7 +73,7 @@ class MMVC_Rest_Fileuploader:
UPLOAD_DIR, modelDFilename, modelDFilenameChunkNum, MODEL_DIR) UPLOAD_DIR, modelDFilename, modelDFilenameChunkNum, MODEL_DIR)
return {"File saved": f"{modelGFilePath}, {modelDFilePath}"} return {"File saved": f"{modelGFilePath}, {modelDFilePath}"}
def post_load_model( def post_extract_voices(
self, self,
zipFilename: str = Form(...), zipFilename: str = Form(...),
zipFileChunkNum: int = Form(...), zipFileChunkNum: int = Form(...),

View File

@ -14,6 +14,8 @@ def upload_file(upload_dirname:str, file:UploadFile, filename: str):
def concat_file_chunks(upload_dirname:str, filename:str, chunkNum:int, dest_dirname:str): def concat_file_chunks(upload_dirname:str, filename:str, chunkNum:int, dest_dirname:str):
target_file_name = os.path.join(dest_dirname, filename) target_file_name = os.path.join(dest_dirname, filename)
if os.path.exists(target_file_name):
os.unlink(target_file_name)
with open(target_file_name, "ab") as target_file: with open(target_file_name, "ab") as target_file:
for i in range(chunkNum): for i in range(chunkNum):
chunkName = f"{filename}_{i}" chunkName = f"{filename}_{i}"

View File

@ -1,8 +1,7 @@
import torch import torch
import math import math, os, traceback
from scipy.io.wavfile import write, read from scipy.io.wavfile import write, read
import numpy as np import numpy as np
import traceback
import utils import utils
import commons import commons
@ -15,30 +14,20 @@ from mel_processing import spectrogram_torch
from text import text_to_sequence, cleaned_text_to_sequence from text import text_to_sequence, cleaned_text_to_sequence
import onnxruntime import onnxruntime
# providers = ['OpenVINOExecutionProvider',"CUDAExecutionProvider","DmlExecutionProvider", "CPUExecutionProvider"] providers = ['OpenVINOExecutionProvider',"CUDAExecutionProvider","DmlExecutionProvider","CPUExecutionProvider"]
providers = ['OpenVINOExecutionProvider',"CUDAExecutionProvider","DmlExecutionProvider"]
class VoiceChanger(): class VoiceChanger():
# def __init__(self, config, model, onnx_model=None, providers=["CPUExecutionProvider"]): def __init__(self, config, model=None, onnx_model=None):
def __init__(self, config, model, onnx_model=None): # 共通で使用する情報を収集
self.hps = utils.get_hparams_from_file(config) self.hps = utils.get_hparams_from_file(config)
self.net_g = SynthesizerTrn(
len(symbols),
self.hps.data.filter_length // 2 + 1,
self.hps.train.segment_size // self.hps.data.hop_length,
n_speakers=self.hps.data.n_speakers,
**self.hps.model)
self.net_g.eval()
self.gpu_num = torch.cuda.device_count() self.gpu_num = torch.cuda.device_count()
utils.load_checkpoint(model, self.net_g, None)
text_norm = text_to_sequence("a", self.hps.data.text_cleaners) text_norm = text_to_sequence("a", self.hps.data.text_cleaners)
text_norm = commons.intersperse(text_norm, 0) text_norm = commons.intersperse(text_norm, 0)
self.text_norm = torch.LongTensor(text_norm) self.text_norm = torch.LongTensor(text_norm)
self.audio_buffer = torch.zeros(1, 0) self.audio_buffer = torch.zeros(1, 0)
self.prev_audio = np.zeros(1) self.prev_audio = np.zeros(1)
self.mps_enabled = getattr( self.mps_enabled = getattr(torch.backends, "mps", None) is not None and torch.backends.mps.is_available()
torch.backends, "mps", None) is not None and torch.backends.mps.is_available()
print(f"VoiceChanger Initialized (GPU_NUM:{self.gpu_num}, mps_enabled:{self.mps_enabled})") print(f"VoiceChanger Initialized (GPU_NUM:{self.gpu_num}, mps_enabled:{self.mps_enabled})")
@ -46,6 +35,20 @@ class VoiceChanger():
self.crossFadeEndRate = 0 self.crossFadeEndRate = 0
self.unpackedData_length = 0 self.unpackedData_length = 0
# PyTorchモデル生成
if model != None:
self.net_g = SynthesizerTrn(
len(symbols),
self.hps.data.filter_length // 2 + 1,
self.hps.train.segment_size // self.hps.data.hop_length,
n_speakers=self.hps.data.n_speakers,
**self.hps.model)
self.net_g.eval()
utils.load_checkpoint(model, self.net_g, None)
else:
self.net_g = None
# ONNXモデル生成
if onnx_model != None: if onnx_model != None:
ort_options = onnxruntime.SessionOptions() ort_options = onnxruntime.SessionOptions()
ort_options.intra_op_num_threads = 8 ort_options.intra_op_num_threads = 8
@ -54,31 +57,48 @@ class VoiceChanger():
# ort_options.inter_op_num_threads = 8 # ort_options.inter_op_num_threads = 8
self.onnx_session = onnxruntime.InferenceSession( self.onnx_session = onnxruntime.InferenceSession(
onnx_model, onnx_model,
# sess_options=ort_options, providers=providers
providers=providers,
) )
print("ONNX_MDEOL!1", self.onnx_session.get_providers()) # print("ONNX_MDEOL!1", self.onnx_session.get_providers())
# self.onnx_session.set_providers(providers=["CPUExecutionProvider"])
# print("ONNX_MDEOL!1", self.onnx_session.get_providers())
# self.onnx_session.set_providers(providers=["DmlExecutionProvider"])
# print("ONNX_MDEOL!1", self.onnx_session.get_providers())
else:
self.onnx_session = None
# ファイル情報を記録
self.pyTorch_model_file = model
self.onnx_model_file = onnx_model
self.config_file = config
def destroy(self): def destroy(self):
del self.net_g del self.net_g
del self.onnx_session del self.onnx_session
def get_info(self):
print("ONNX_MODEL",self.onnx_model_file)
return {
"pyTorchModelFile":os.path.basename(self.pyTorch_model_file)if self.pyTorch_model_file!=None else "",
"onnxModelFile":os.path.basename(self.onnx_model_file)if self.onnx_model_file!=None else "",
"configFile":os.path.basename(self.config_file),
"providers":self.onnx_session.get_providers() if hasattr(self, "onnx_session") else ""
}
def set_onnx_provider(self, provider:str):
if hasattr(self, "onnx_session"):
self.onnx_session.set_providers(providers=[provider])
print("ONNX_MDEOL!1", self.onnx_session.get_providers())
return {"provider":self.onnx_session.get_providers()}
def on_request(self, gpu, srcId, dstId, timestamp, convertChunkNum, crossFadeLowerValue, crossFadeOffsetRate, crossFadeEndRate, unpackedData): def on_request(self, gpu, srcId, dstId, timestamp, convertChunkNum, crossFadeLowerValue, crossFadeOffsetRate, crossFadeEndRate, unpackedData):
# convertSize = unpackedData.shape[0] + (convertChunkNum * 128) # 128sample/1chunk
convertSize = convertChunkNum * 128 # 128sample/1chunk convertSize = convertChunkNum * 128 # 128sample/1chunk
# print("on_request", unpackedData.shape[0], convertChunkNum* 128 )
if unpackedData.shape[0] * 2 > convertSize: if unpackedData.shape[0] * 2 > convertSize:
# print(f"Convert sample_num = {128 * convertChunkNum} (128 * {convertChunkNum}) is less than input sample_num x2 ({unpackedData.shape[0]}) x2. Chage to {unpackedData.shape[0] * 2} samples")
convertSize = unpackedData.shape[0] * 2 convertSize = unpackedData.shape[0] * 2
print("convert Size", convertChunkNum, convertSize) print("convert Size", convertChunkNum, convertSize)
if self.crossFadeOffsetRate != crossFadeOffsetRate or self.crossFadeEndRate != crossFadeEndRate or self.unpackedData_length != unpackedData.shape[0]: if self.crossFadeOffsetRate != crossFadeOffsetRate or self.crossFadeEndRate != crossFadeEndRate or self.unpackedData_length != unpackedData.shape[0]:
self.crossFadeOffsetRate = crossFadeOffsetRate self.crossFadeOffsetRate = crossFadeOffsetRate
self.crossFadeEndRate = crossFadeEndRate self.crossFadeEndRate = crossFadeEndRate

View File

@ -1,5 +1,7 @@
import numpy as np import numpy as np
from voice_changer.VoiceChanger import VoiceChanger from voice_changer.VoiceChanger import VoiceChanger
from fastapi.responses import JSONResponse
from fastapi.encoders import jsonable_encoder
class VoiceChangerManager(): class VoiceChangerManager():
@classmethod @classmethod
@ -13,6 +15,19 @@ class VoiceChangerManager():
self.voiceChanger.destroy() self.voiceChanger.destroy()
self.voiceChanger = VoiceChanger(config, model, onnx_model) self.voiceChanger = VoiceChanger(config, model, onnx_model)
def get_info(self):
if hasattr(self, 'voiceChanger'):
return self.voiceChanger.get_info()
else:
return {"no info":"no info"}
def set_onnx_provider(self, provider:str):
if hasattr(self, 'voiceChanger'):
return self.voiceChanger.set_onnx_provider(provider)
else:
return {"error":"no voice changer"}
def changeVoice(self, gpu, srcId, dstId, timestamp, convertChunkNum, crossFadeLowerValue, crossFadeOffsetRate, crossFadeEndRate, unpackedData): def changeVoice(self, gpu, srcId, dstId, timestamp, convertChunkNum, crossFadeLowerValue, crossFadeOffsetRate, crossFadeEndRate, unpackedData):
if hasattr(self, 'voiceChanger') == True: if hasattr(self, 'voiceChanger') == True:
return self.voiceChanger.on_request(gpu, srcId, dstId, timestamp, convertChunkNum, crossFadeLowerValue, crossFadeOffsetRate, crossFadeEndRate, unpackedData) return self.voiceChanger.on_request(gpu, srcId, dstId, timestamp, convertChunkNum, crossFadeLowerValue, crossFadeOffsetRate, crossFadeEndRate, unpackedData)