This commit is contained in:
wataru 2023-01-08 16:18:20 +09:00
parent 8774fe1904
commit 7f5490e202
18 changed files with 646 additions and 453 deletions

View File

@ -1,10 +1,8 @@
import * as React from "react";
import { createRoot } from "react-dom/client";
import "./css/App.css"
import { useEffect, useMemo, useRef, useState } from "react";
import { useMemo, } from "react";
import { useMicrophoneOptions } from "./100_options_microphone";
import { VoiceChnagerClient, createDummyMediaStream } from "@dannadori/voice-changer-client-js"
import { AUDIO_ELEMENT_FOR_PLAY_RESULT } from "./const";
const container = document.getElementById("app")!;
const root = createRoot(container);

View File

@ -1,6 +1,6 @@
import * as React from "react";
import { useEffect, useMemo, useState } from "react";
import { AUDIO_ELEMENT_FOR_PLAY_RESULT, CHROME_EXTENSION } from "./const";
import { AUDIO_ELEMENT_FOR_PLAY_RESULT } from "./const";
import { useServerSetting } from "./101_server_setting";
import { useDeviceSetting } from "./102_device_setting";
import { useConvertSetting } from "./104_convert_setting";
@ -8,6 +8,7 @@ import { useAdvancedSetting } from "./105_advanced_setting";
import { useSpeakerSetting } from "./103_speaker_setting";
import { useClient } from "./hooks/useClient";
import { useServerControl } from "./106_server_control";
import { ServerSettingKey } from "@dannadori/voice-changer-client-js";
@ -19,8 +20,7 @@ export const useMicrophoneOptions = () => {
})
const serverSetting = useServerSetting({
uploadFile: clientState.uploadFile,
changeOnnxExcecutionProvider: clientState.changeOnnxExcecutionProvider
clientState
})
const deviceSetting = useDeviceSetting(audioContext)
const speakerSetting = useSpeakerSetting()
@ -31,6 +31,7 @@ export const useMicrophoneOptions = () => {
const serverControl = useServerControl({
convertStart: async () => { await clientState.start(serverSetting.mmvcServerUrl, serverSetting.protocol) },
convertStop: async () => { clientState.stop() },
getInfo: clientState.getInfo,
volume: clientState.volume,
bufferingTime: clientState.bufferingTime,
responseTime: clientState.responseTime
@ -47,13 +48,73 @@ export const useMicrophoneOptions = () => {
document.addEventListener('mousedown', createAudioContext);
}, [])
// 101 ServerSetting
//// サーバ変更
useEffect(() => {
console.log("input Cahngaga!")
if (!clientState.clientInitialized) return
clientState.setServerUrl(serverSetting.mmvcServerUrl)
}, [serverSetting.mmvcServerUrl])
//// プロトコル変更
useEffect(() => {
if (!clientState.clientInitialized) return
clientState.setProtocol(serverSetting.protocol)
}, [serverSetting.protocol])
//// フレームワーク変更
useEffect(() => {
if (!clientState.clientInitialized) return
clientState.updateSettings(ServerSettingKey.framework, serverSetting.framework)
}, [serverSetting.framework])
//// OnnxExecutionProvider変更
useEffect(() => {
if (!clientState.clientInitialized) return
clientState.updateSettings(ServerSettingKey.onnxExecutionProvider, serverSetting.onnxExecutionProvider)
}, [serverSetting.onnxExecutionProvider])
// 102 DeviceSetting
//// 入力情報の設定
useEffect(() => {
if (!clientState.clientInitialized) return
clientState.changeInput(deviceSetting.audioInput, convertSetting.bufferSize, advancedSetting.vfForceDisabled)
}, [clientState.clientInitialized, deviceSetting.audioInput, convertSetting.bufferSize, advancedSetting.vfForceDisabled])
// 103 SpeakerSetting
// 音声変換元、変換先の設定
useEffect(() => {
if (!clientState.clientInitialized) return
clientState.updateSettings(ServerSettingKey.srcId, speakerSetting.srcId)
}, [speakerSetting.srcId])
useEffect(() => {
if (!clientState.clientInitialized) return
clientState.updateSettings(ServerSettingKey.dstId, speakerSetting.dstId)
}, [speakerSetting.dstId])
// 104 ConvertSetting
useEffect(() => {
if (!clientState.clientInitialized) return
clientState.setInputChunkNum(convertSetting.inputChunkNum)
}, [convertSetting.inputChunkNum])
useEffect(() => {
if (!clientState.clientInitialized) return
clientState.updateSettings(ServerSettingKey.convertChunkNum, convertSetting.convertChunkNum)
}, [convertSetting.convertChunkNum])
useEffect(() => {
if (!clientState.clientInitialized) return
clientState.updateSettings(ServerSettingKey.gpu, convertSetting.gpu)
}, [convertSetting.gpu])
useEffect(() => {
if (!clientState.clientInitialized) return
clientState.updateSettings(ServerSettingKey.crossFadeOffsetRate, convertSetting.crossFadeOffsetRate)
}, [convertSetting.crossFadeOffsetRate])
useEffect(() => {
if (!clientState.clientInitialized) return
clientState.updateSettings(ServerSettingKey.crossFadeEndRate, convertSetting.crossFadeEndRate)
}, [convertSetting.crossFadeEndRate])
// 105 AdvancedSetting
useEffect(() => {
if (!clientState.clientInitialized) return
clientState.setVoiceChangerMode(advancedSetting.voiceChangerMode)
}, [advancedSetting.voiceChangerMode])
// // const [options, setOptions] = useState<MicrophoneOptionsState>(InitMicrophoneOptionsState)

View File

@ -1,10 +1,10 @@
import { DefaultVoiceChangerOptions, OnnxExecutionProvider, Protocol, Framework, fileSelector, getInfo, loadModel } from "@dannadori/voice-changer-client-js"
import React, { useEffect } from "react"
import { DefaultVoiceChangerOptions, OnnxExecutionProvider, Protocol, Framework, fileSelector, ServerSettingKey } from "@dannadori/voice-changer-client-js"
import React from "react"
import { useMemo, useState } from "react"
import { ClientState } from "./hooks/useClient"
export type UseServerSettingProps = {
uploadFile: (baseUrl: string, file: File, onprogress: (progress: number, end: boolean) => void) => Promise<void>
changeOnnxExcecutionProvider: (baseUrl: string, provider: OnnxExecutionProvider) => Promise<void>
clientState: ClientState
}
export type ServerSettingState = {
@ -80,24 +80,21 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
return
}
if (pyTorchModel) {
await props.uploadFile(mmvcServerUrl, pyTorchModel, (progress: number, end: boolean) => {
await props.clientState.uploadFile(pyTorchModel, (progress: number, end: boolean) => {
console.log(progress, end)
})
}
if (onnxModel) {
await props.uploadFile(mmvcServerUrl, onnxModel, (progress: number, end: boolean) => {
await props.clientState.uploadFile(onnxModel, (progress: number, end: boolean) => {
console.log(progress, end)
})
}
await props.uploadFile(mmvcServerUrl, configFile, (progress: number, end: boolean) => {
await props.clientState.uploadFile(configFile, (progress: number, end: boolean) => {
console.log(progress, end)
})
const res = await getInfo(mmvcServerUrl)
console.log(res)
const res2 = await loadModel(mmvcServerUrl, configFile, pyTorchModel, onnxModel)
console.log(res2)
await props.clientState.loadModel(configFile, pyTorchModel, onnxModel)
console.log("loaded")
}
return (
@ -148,7 +145,7 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
</div>
</>
)
}, [pyTorchModel, configFile, onnxModel, mmvcServerUrl, props.uploadFile])
}, [pyTorchModel, configFile, onnxModel, mmvcServerUrl])
const protocolRow = useMemo(() => {
const onProtocolChanged = async (val: Protocol) => {
@ -201,7 +198,6 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
return
}
const onOnnxExecutionProviderChanged = async (val: OnnxExecutionProvider) => {
await props.changeOnnxExcecutionProvider(mmvcServerUrl, val)
setOnnxExecutionProvider(val)
}
return (

View File

@ -83,13 +83,9 @@ export const useDeviceSetting = (audioContext: AudioContext | null): DeviceSetti
useEffect(() => {
console.log("iiiiinnnppu1")
if (!audioContext) {
console.log("iiiiinnnppu2")
return
}
console.log("iiiiinnnppu3")
if (audioInputForGUI == "none") {
const ms = createDummyMediaStream(audioContext)
setAudioInput(ms)
@ -168,6 +164,7 @@ export const useDeviceSetting = (audioContext: AudioContext | null): DeviceSetti
}, [outputAudioDeviceInfo, audioOutputForGUI])
useEffect(() => {
if (audioOutputForGUI == "none") return
[AUDIO_ELEMENT_FOR_PLAY_RESULT, AUDIO_ELEMENT_FOR_TEST_ORIGINAL].forEach(x => {
const audio = document.getElementById(x) as HTMLAudioElement
if (audio) {

View File

@ -88,7 +88,7 @@ export const useSpeakerSetting = () => {
</div>
)
}, [speakers])
}, [speakers, editSpeakerTargetId, editSpeakerTargetName])
const speakerSetting = useMemo(() => {

View File

@ -4,14 +4,14 @@ import React, { useMemo, useState } from "react"
export type AdvancedSettingState = {
advancedSetting: JSX.Element;
vfForceDisabled: boolean;
voiceChangeMode: VoiceChangerMode;
voiceChangerMode: VoiceChangerMode;
}
export const useAdvancedSetting = (): AdvancedSettingState => {
const [vfForceDisabled, setVfForceDisabled] = useState<boolean>(false)
const [voiceChangeMode, setVoiceChangeMode] = useState<VoiceChangerMode>("realtime")
const [voiceChangerMode, setVoiceChangerMode] = useState<VoiceChangerMode>("realtime")
const vfForceDisableRow = useMemo(() => {
return (
@ -31,7 +31,7 @@ export const useAdvancedSetting = (): AdvancedSettingState => {
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1 ">Voice Change Mode</div>
<div className="body-select-container">
<select className="body-select" value={voiceChangeMode} onChange={(e) => { setVoiceChangeMode(e.target.value as VoiceChangerMode) }}>
<select className="body-select" value={voiceChangerMode} onChange={(e) => { setVoiceChangerMode(e.target.value as VoiceChangerMode) }}>
{
Object.values(VoiceChangerMode).map(x => {
return <option key={x} value={x}>{x}</option>
@ -60,7 +60,7 @@ export const useAdvancedSetting = (): AdvancedSettingState => {
return {
advancedSetting,
vfForceDisabled,
voiceChangeMode,
voiceChangerMode,
}
}

View File

@ -1,8 +1,10 @@
import { ServerInfo } from "@dannadori/voice-changer-client-js"
import React, { useMemo, useState } from "react"
export type UseServerControlProps = {
convertStart: () => Promise<void>
convertStop: () => Promise<void>
getInfo: () => Promise<void>
volume: number,
bufferingTime: number,
responseTime: number
@ -53,6 +55,29 @@ export const useServerControl = (props: UseServerControlProps) => {
}, [props.volume, props.bufferingTime, props.responseTime])
const infoRow = useMemo(() => {
const onReloadClicked = async () => {
const info = await props.getInfo()
console.log("info", info)
}
return (
<>
<div className="body-row split-3-1-1-1-4 left-padding-1 guided">
<div className="body-item-title left-padding-1">Info:</div>
<div className="body-item-text">vol(rms):{props.volume.toFixed(4)}</div>
<div className="body-item-text">buf(ms):{props.bufferingTime}</div>
<div className="body-item-text">res(ms):{props.responseTime}</div>
<div className="body-button-container">
<div className="body-button" onClick={onReloadClicked}>reload</div>
</div>
</div>
</>
)
}, [props.getInfo])
const serverControl = useMemo(() => {
return (
<>
@ -63,9 +88,10 @@ export const useServerControl = (props: UseServerControlProps) => {
</div>
{startButtonRow}
{performanceRow}
{infoRow}
</>
)
}, [startButtonRow, performanceRow])
}, [startButtonRow, performanceRow, infoRow])
return {
serverControl,

View File

@ -1,4 +1,4 @@
import { BufferSize, createDummyMediaStream, Protocol, VoiceChangerMode, VoiceChangerRequestParamas, VoiceChnagerClient, uploadLargeFile, concatUploadedFile, OnnxExecutionProvider, setOnnxExecutionProvider } from "@dannadori/voice-changer-client-js"
import { BufferSize, createDummyMediaStream, Protocol, ServerSettingKey, VoiceChangerMode, VoiceChnagerClient } from "@dannadori/voice-changer-client-js"
import { useEffect, useMemo, useRef, useState } from "react"
export type UseClientProps = {
@ -11,14 +11,26 @@ export type ClientState = {
bufferingTime: number;
responseTime: number;
volume: number;
// Client Setting
setServerUrl: (mmvcServerUrl: string) => Promise<void>
setProtocol: (protocol: Protocol) => Promise<void>
setInputChunkNum: (num: number) => Promise<void>
setVoiceChangerMode: (val: VoiceChangerMode) => Promise<void>
// Client Control
start: (mmvcServerUrl: string, protocol: Protocol) => Promise<void>;
stop: () => Promise<void>;
// Device Setting
changeInput: (audioInput: MediaStream | string, bufferSize: BufferSize, vfForceDisable: boolean) => Promise<void>
changeInputChunkNum: (inputChunkNum: number) => void
changeVoiceChangeMode: (voiceChangerMode: VoiceChangerMode) => void
changeRequestParams: (params: VoiceChangerRequestParamas) => void
uploadFile: (baseUrl: string, file: File, onprogress: (progress: number, end: boolean) => void) => Promise<void>
changeOnnxExcecutionProvider: (baseUrl: string, provider: OnnxExecutionProvider) => Promise<void>
// Server Setting
uploadFile: (file: File, onprogress: (progress: number, end: boolean) => void) => Promise<void>
loadModel: (configFile: File, pyTorchModelFile: File | null, onnxModelFile: File | null) => Promise<void>
updateSettings: (key: ServerSettingKey, val: string | number) => Promise<any>
// Information
getInfo: () => Promise<void>
}
export const useClient = (props: UseClientProps): ClientState => {
@ -53,7 +65,7 @@ export const useClient = (props: UseClientProps): ClientState => {
})
await voiceChangerClient.isInitialized()
voiceChangerClientRef.current = voiceChangerClient
console.log("client initialized!!")
console.log("[useClient] client initialized")
setClientInitialized(true)
const audio = document.getElementById(props.audioOutputElementId) as HTMLAudioElement
@ -63,13 +75,60 @@ export const useClient = (props: UseClientProps): ClientState => {
initialized()
}, [props.audioContext])
const start = useMemo(() => {
return async (mmvcServerUrl: string, protocol: Protocol) => {
// Client Setting
const setServerUrl = useMemo(() => {
return async (mmvcServerUrl: string) => {
if (!voiceChangerClientRef.current) {
console.log("client is not initialized")
return
}
voiceChangerClientRef.current.setServerUrl(mmvcServerUrl, protocol, true)
voiceChangerClientRef.current.setServerUrl(mmvcServerUrl, true)
voiceChangerClientRef.current.stop()
}
}, [])
const setProtocol = useMemo(() => {
return async (protocol: Protocol) => {
if (!voiceChangerClientRef.current) {
console.log("client is not initialized")
return
}
voiceChangerClientRef.current.setProtocol(protocol)
voiceChangerClientRef.current.stop()
}
}, [])
const setInputChunkNum = useMemo(() => {
return async (num: number) => {
if (!voiceChangerClientRef.current) {
console.log("client is not initialized")
return
}
voiceChangerClientRef.current.setInputChunkNum(num)
voiceChangerClientRef.current.stop()
}
}, [])
const setVoiceChangerMode = useMemo(() => {
return async (val: VoiceChangerMode) => {
if (!voiceChangerClientRef.current) {
console.log("client is not initialized")
return
}
voiceChangerClientRef.current.setVoiceChangerMode(val)
voiceChangerClientRef.current.stop()
}
}, [])
// Client Control
const start = useMemo(() => {
return async (mmvcServerUrl: string) => {
if (!voiceChangerClientRef.current) {
console.log("client is not initialized")
return
}
voiceChangerClientRef.current.setServerUrl(mmvcServerUrl, true)
voiceChangerClientRef.current.start()
}
}, [])
@ -83,82 +142,92 @@ export const useClient = (props: UseClientProps): ClientState => {
}
}, [])
// Device Setting
const changeInput = useMemo(() => {
return async (audioInput: MediaStream | string, bufferSize: BufferSize, vfForceDisable: boolean) => {
if (!voiceChangerClientRef.current || !props.audioContext) {
console.log("not initialized", voiceChangerClientRef.current, props.audioContext)
console.log("[useClient] not initialized", voiceChangerClientRef.current, props.audioContext)
return
}
if (!audioInput || audioInput == "none") {
console.log("setup! 1")
console.log("[useClient] setup!(1)", audioInput)
const ms = createDummyMediaStream(props.audioContext)
await voiceChangerClientRef.current.setup(ms, bufferSize, vfForceDisable)
} else {
console.log("setup! 2")
console.log("[useClient] setup!(2)", audioInput)
await voiceChangerClientRef.current.setup(audioInput, bufferSize, vfForceDisable)
}
}
}, [props.audioContext])
const changeInputChunkNum = useMemo(() => {
return (inputChunkNum: number) => {
if (!voiceChangerClientRef.current) {
// console.log("client is not initialized")
return
}
voiceChangerClientRef.current.setInputChunkNum(inputChunkNum)
}
}, [])
const changeVoiceChangeMode = useMemo(() => {
return (voiceChangerMode: VoiceChangerMode) => {
if (!voiceChangerClientRef.current) {
// console.log("client is not initialized")
return
}
voiceChangerClientRef.current.setVoiceChangerMode(voiceChangerMode)
}
}, [])
const changeRequestParams = useMemo(() => {
return (params: VoiceChangerRequestParamas) => {
if (!voiceChangerClientRef.current) {
// console.log("client is not initialized")
return
}
voiceChangerClientRef.current.setRequestParams(params)
}
}, [])
// Server Setting
const uploadFile = useMemo(() => {
return async (baseUrl: string, file: File, onprogress: (progress: number, end: boolean) => void) => {
const num = await uploadLargeFile(baseUrl, file, onprogress)
const res = await concatUploadedFile(baseUrl, file, num)
return async (file: File, onprogress: (progress: number, end: boolean) => void) => {
if (!voiceChangerClientRef.current) {
throw "[useClient] Client Not Initialized."
}
const num = await voiceChangerClientRef.current.uploadFile(file, onprogress)
const res = await voiceChangerClientRef.current.concatUploadedFile(file, num)
console.log("upload", num, res)
}
}, [])
const changeOnnxExcecutionProvider = useMemo(() => {
return async (baseUrl: string, provider: OnnxExecutionProvider) => {
setOnnxExecutionProvider(baseUrl, provider)
const loadModel = useMemo(() => {
return async (configFile: File, pyTorchModelFile: File | null, onnxModelFile: File | null) => {
if (!voiceChangerClientRef.current) {
throw "[useClient] Client Not Initialized."
}
await voiceChangerClientRef.current.loadModel(configFile, pyTorchModelFile, onnxModelFile)
console.log("load model")
}
}, [])
const updateSettings = useMemo(() => {
return async (key: ServerSettingKey, val: string | number) => {
if (!voiceChangerClientRef.current) {
throw "[useClient] Client Not Initialized."
}
return await voiceChangerClientRef.current.updateServerSettings(key, "" + val)
}
}, [])
// Information
const getInfo = useMemo(() => {
return async () => {
if (!voiceChangerClientRef.current) {
throw "[useClient] Client Not Initialized."
}
const serverSettings = await voiceChangerClientRef.current.getServerSettings()
const clientSettings = await voiceChangerClientRef.current.getClientSettings()
console.log(serverSettings, clientSettings)
}
}, [])
return {
clientInitialized,
bufferingTime,
responseTime,
volume,
setServerUrl,
setProtocol,
setInputChunkNum,
setVoiceChangerMode,
start,
stop,
uploadFile,
changeInput,
changeInputChunkNum,
changeVoiceChangeMode,
changeRequestParams,
changeOnnxExcecutionProvider,
uploadFile,
loadModel,
updateSettings,
getInfo,
}
}

View File

@ -1,7 +1,7 @@
import { io, Socket } from "socket.io-client";
import { DefaultEventsMap } from "@socket.io/component-emitter";
import { Duplex, DuplexOptions } from "readable-stream";
import { DefaultVoiceChangerRequestParamas, Protocol, VoiceChangerMode, VoiceChangerRequestParamas, VOICE_CHANGER_CLIENT_EXCEPTION } from "./const";
import { Protocol, VoiceChangerMode, VOICE_CHANGER_CLIENT_EXCEPTION } from "./const";
export type Callbacks = {
onVoiceReceived: (voiceChangerMode: VoiceChangerMode, data: ArrayBuffer) => void
@ -11,6 +11,14 @@ export type AudioStreamerListeners = {
notifyResponseTime: (time: number) => void
notifyException: (code: VOICE_CHANGER_CLIENT_EXCEPTION, message: string) => void
}
export type AudioStreamerSettings = {
serverUrl: string;
protocol: Protocol;
inputChunkNum: number;
voiceChangerMode: VoiceChangerMode;
}
export class AudioStreamer extends Duplex {
private callbacks: Callbacks
private audioStreamerListeners: AudioStreamerListeners
@ -18,8 +26,7 @@ export class AudioStreamer extends Duplex {
private serverUrl = ""
private socket: Socket<DefaultEventsMap, DefaultEventsMap> | null = null
private voiceChangerMode: VoiceChangerMode = "realtime"
private requestParamas: VoiceChangerRequestParamas = DefaultVoiceChangerRequestParamas
private inputChunkNum = 10
private inputChunkNum = 128
private requestChunks: ArrayBuffer[] = []
private recordChunks: ArrayBuffer[] = []
private isRecording = false
@ -58,16 +65,15 @@ export class AudioStreamer extends Duplex {
}
// Option Change
setServerUrl = (serverUrl: string, mode: Protocol) => {
setServerUrl = (serverUrl: string) => {
this.serverUrl = serverUrl
this.protocol = mode
console.log(`[AudioStreamer] Server Setting:${this.serverUrl} ${this.protocol}`)
this.createSocketIO()// mode check is done in the method.
}
setRequestParams = (val: VoiceChangerRequestParamas) => {
this.requestParamas = val
setProtocol = (mode: Protocol) => {
this.protocol = mode
console.log(`[AudioStreamer] Server Setting:${this.serverUrl} ${this.protocol}`)
this.createSocketIO()// mode check is done in the method.
}
setInputChunkNum = (num: number) => {
@ -78,6 +84,15 @@ export class AudioStreamer extends Duplex {
this.voiceChangerMode = val
}
getSettings = (): AudioStreamerSettings => {
return {
serverUrl: this.serverUrl,
protocol: this.protocol,
inputChunkNum: this.inputChunkNum,
voiceChangerMode: this.voiceChangerMode
}
}
// Main Process
//// Pipe from mic stream
@ -191,7 +206,7 @@ export class AudioStreamer extends Duplex {
const timestamp = Date.now()
// console.log("REQUEST_MESSAGE:", [this.gpu, this.srcId, this.dstId, timestamp, newBuffer.buffer])
// console.log("SERVER_URL", this.serverUrl, this.protocol)
const convertChunkNum = this.voiceChangerMode === "realtime" ? this.requestParamas.convertChunkNum : 0
// const convertChunkNum = this.voiceChangerMode === "realtime" ? this.requestParamas.convertChunkNum : 0
if (this.protocol === "sio") {
if (!this.socket) {
console.warn(`sio is not initialized`)
@ -199,26 +214,26 @@ export class AudioStreamer extends Duplex {
}
// console.log("emit!")
this.socket.emit('request_message', [
this.requestParamas.gpu,
this.requestParamas.srcId,
this.requestParamas.dstId,
// this.requestParamas.gpu,
// this.requestParamas.srcId,
// this.requestParamas.dstId,
timestamp,
convertChunkNum,
this.requestParamas.crossFadeLowerValue,
this.requestParamas.crossFadeOffsetRate,
this.requestParamas.crossFadeEndRate,
// convertChunkNum,
// this.requestParamas.crossFadeLowerValue,
// this.requestParamas.crossFadeOffsetRate,
// this.requestParamas.crossFadeEndRate,
newBuffer.buffer]);
} else {
const res = await postVoice(
this.serverUrl + "/test",
this.requestParamas.gpu,
this.requestParamas.srcId,
this.requestParamas.dstId,
// this.requestParamas.gpu,
// this.requestParamas.srcId,
// this.requestParamas.dstId,
timestamp,
convertChunkNum,
this.requestParamas.crossFadeLowerValue,
this.requestParamas.crossFadeOffsetRate,
this.requestParamas.crossFadeEndRate,
// convertChunkNum,
// this.requestParamas.crossFadeLowerValue,
// this.requestParamas.crossFadeOffsetRate,
// this.requestParamas.crossFadeEndRate,
newBuffer.buffer)
if (res.byteLength < 128 * 2) {
@ -233,24 +248,24 @@ export class AudioStreamer extends Duplex {
export const postVoice = async (
url: string,
gpu: number,
srcId: number,
dstId: number,
// gpu: number,
// srcId: number,
// dstId: number,
timestamp: number,
convertChunkNum: number,
crossFadeLowerValue: number,
crossFadeOffsetRate: number,
crossFadeEndRate: number,
// convertChunkNum: number,
// crossFadeLowerValue: number,
// crossFadeOffsetRate: number,
// crossFadeEndRate: number,
buffer: ArrayBuffer) => {
const obj = {
gpu,
srcId,
dstId,
// gpu,
// srcId,
// dstId,
timestamp,
convertChunkNum,
crossFadeLowerValue,
crossFadeOffsetRate,
crossFadeEndRate,
// convertChunkNum,
// crossFadeLowerValue,
// crossFadeOffsetRate,
// crossFadeEndRate,
buffer: Buffer.from(buffer).toString('base64')
};
const body = JSON.stringify(obj);

View File

@ -0,0 +1,132 @@
import { ServerInfo, ServerSettingKey } from "./const";
type FileChunk = {
hash: number,
chunk: Blob
}
export class ServerConfigurator {
private serverUrl = ""
getSettings = async () => {
const url = this.serverUrl + "/info"
const info = await new Promise<ServerInfo>((resolve) => {
const request = new Request(url, {
method: 'GET',
});
fetch(request).then(async (response) => {
const json = await response.json() as ServerInfo
resolve(json)
})
})
return info
}
setServerUrl = (serverUrl: string) => {
this.serverUrl = serverUrl
console.log(`[ServerConfigurator] Server URL: ${this.serverUrl}`)
}
updateSettings = async (key: ServerSettingKey, val: string) => {
const url = this.serverUrl + "/update_setteings"
const p = new Promise<void>((resolve) => {
const formData = new FormData();
formData.append("key", key);
formData.append("val", val);
const request = new Request(url, {
method: 'POST',
body: formData,
});
fetch(request).then(async (response) => {
console.log(await response.json())
resolve()
})
})
const info = await p
return info
}
uploadFile = async (file: File, onprogress: (progress: number, end: boolean) => void) => {
const url = this.serverUrl + "/upload_file"
onprogress(0, false)
const size = 1024 * 1024;
const fileChunks: FileChunk[] = [];
let index = 0; // index値
for (let cur = 0; cur < file.size; cur += size) {
fileChunks.push({
hash: index++,
chunk: file.slice(cur, cur + size),
});
}
const chunkNum = fileChunks.length
console.log("FILE_CHUNKS:", chunkNum, fileChunks)
while (true) {
const promises: Promise<void>[] = []
for (let i = 0; i < 10; i++) {
const chunk = fileChunks.shift()
if (!chunk) {
break
}
const p = new Promise<void>((resolve) => {
const formData = new FormData();
formData.append("file", chunk.chunk);
formData.append("filename", `${file.name}_${chunk.hash}`);
const request = new Request(url, {
method: 'POST',
body: formData,
});
fetch(request).then(async (response) => {
console.log(await response.text())
resolve()
})
})
promises.push(p)
}
await Promise.all(promises)
if (fileChunks.length == 0) {
break
}
onprogress(Math.floor(((chunkNum - fileChunks.length) / (chunkNum + 1)) * 100), false)
}
return chunkNum
}
concatUploadedFile = async (file: File, chunkNum: number) => {
const url = this.serverUrl + "/concat_uploaded_file"
new Promise<void>((resolve) => {
const formData = new FormData();
formData.append("filename", file.name);
formData.append("filenameChunkNum", "" + chunkNum);
const request = new Request(url, {
method: 'POST',
body: formData,
});
fetch(request).then(async (response) => {
console.log(await response.text())
resolve()
})
})
}
loadModel = async (configFile: File, pyTorchModelFile: File | null, onnxModelFile: File | null) => {
const url = this.serverUrl + "/load_model"
const loadP = new Promise<void>((resolve) => {
const formData = new FormData();
formData.append("pyTorchModelFilename", pyTorchModelFile?.name || "-");
formData.append("onnxModelFilename", onnxModelFile?.name || "-");
formData.append("configFilename", configFile.name);
const request = new Request(url, {
method: 'POST',
body: formData,
});
fetch(request).then(async (response) => {
console.log(await response.text())
resolve()
})
})
await loadP
}
}

View File

@ -3,9 +3,10 @@ import { VoiceChangerWorkletNode, VolumeListener } from "./VoiceChangerWorkletNo
import workerjs from "raw-loader!../worklet/dist/index.js";
import { VoiceFocusDeviceTransformer, VoiceFocusTransformDevice } from "amazon-chime-sdk-js";
import { createDummyMediaStream, validateUrl } from "./util";
import { BufferSize, DefaultVoiceChangerOptions, DefaultVoiceChangerRequestParamas, Protocol, VoiceChangerMode, VoiceChangerRequestParamas, VOICE_CHANGER_CLIENT_EXCEPTION } from "./const";
import { BufferSize, DefaultVoiceChangerOptions, Protocol, ServerSettingKey, VoiceChangerMode, VOICE_CHANGER_CLIENT_EXCEPTION } from "./const";
import MicrophoneStream from "microphone-stream";
import { AudioStreamer, Callbacks, AudioStreamerListeners } from "./AudioStreamer";
import { ServerConfigurator } from "./ServerConfigurator";
// オーディオデータの流れ
@ -15,6 +16,7 @@ import { AudioStreamer, Callbacks, AudioStreamerListeners } from "./AudioStreame
export class VoiceChnagerClient {
private configurator: ServerConfigurator
private ctx: AudioContext
private vfEnable = false
private vf: VoiceFocusDeviceTransformer | null = null
@ -61,6 +63,7 @@ export class VoiceChnagerClient {
}
constructor(ctx: AudioContext, vfEnable: boolean, audioStreamerListeners: AudioStreamerListeners, volumeListener: VolumeListener) {
this.configurator = new ServerConfigurator()
this.ctx = ctx
this.vfEnable = vfEnable
this.promiseForInitialize = new Promise<void>(async (resolve) => {
@ -72,7 +75,7 @@ export class VoiceChnagerClient {
this.vcNode.connect(this.currentMediaStreamAudioDestinationNode) // vc node -> output node
// (vc nodeにはaudio streamerのcallbackでデータが投げ込まれる)
this.audioStreamer = new AudioStreamer(this.callbacks, audioStreamerListeners, { objectMode: true, })
this.audioStreamer.setRequestParams(DefaultVoiceChangerRequestParamas)
// this.audioStreamer.setRequestParams(DefaultVoiceChangerRequestParamas)
this.audioStreamer.setInputChunkNum(DefaultVoiceChangerOptions.inputChunkNum)
this.audioStreamer.setVoiceChangerMode(DefaultVoiceChangerOptions.voiceChangerMode)
@ -168,7 +171,7 @@ export class VoiceChnagerClient {
return this._isVoiceChanging
}
// Audio Streamer Settingg
setServerUrl = (serverUrl: string, mode: Protocol, openTab: boolean = false) => {
setServerUrl = (serverUrl: string, openTab: boolean = false) => {
const url = validateUrl(serverUrl)
const pageUrl = `${location.protocol}//${location.host}`
console.log("SERVER CHECK", url, pageUrl)
@ -183,11 +186,12 @@ export class VoiceChnagerClient {
}
}
}
this.audioStreamer.setServerUrl(validateUrl(serverUrl), mode)
this.audioStreamer.setServerUrl(url)
this.configurator.setServerUrl(url)
}
setRequestParams = (val: VoiceChangerRequestParamas) => {
this.audioStreamer.setRequestParams(val)
setProtocol = (mode: Protocol) => {
this.audioStreamer.setProtocol(mode)
}
setInputChunkNum = (num: number) => {
@ -198,5 +202,28 @@ export class VoiceChnagerClient {
this.audioStreamer.setVoiceChangerMode(val)
}
// Configurator Method
uploadFile = (file: File, onprogress: (progress: number, end: boolean) => void) => {
return this.configurator.uploadFile(file, onprogress)
}
concatUploadedFile = (file: File, chunkNum: number) => {
return this.configurator.concatUploadedFile(file, chunkNum)
}
loadModel = (configFile: File, pyTorchModelFile: File | null, onnxModelFile: File | null) => {
return this.configurator.loadModel(configFile, pyTorchModelFile, onnxModelFile)
}
updateServerSettings = (key: ServerSettingKey, val: string) => {
return this.configurator.updateSettings(key, val)
}
// Information
getClientSettings = () => {
return this.audioStreamer.getSettings()
}
getServerSettings = () => {
return this.configurator.getSettings()
}
}

View File

@ -36,6 +36,15 @@ export type Speaker = {
"name": string,
}
export type ServerInfo = {
pyTorchModelFile: string,
onnxModelFile: string,
configFile: string,
providers: string[]
}
// Consts
export const Protocol = {
@ -80,6 +89,18 @@ export const Framework = {
}
export type Framework = typeof Framework[keyof typeof Framework]
export const ServerSettingKey = {
"srcId": "srcId",
"dstId": "dstId",
"convertChunkNum": "convertChunkNum",
"gpu": "gpu",
"crossFadeOffsetRate": "crossFadeOffsetRate",
"crossFadeEndRate": "crossFadeEndRate",
"framework": "framework",
"onnxExecutionProvider": "onnxExecutionProvider"
} as const
export type ServerSettingKey = typeof ServerSettingKey[keyof typeof ServerSettingKey]
// Defaults
export const DefaultVoiceChangerRequestParamas: VoiceChangerRequestParamas = {
convertChunkNum: 32, //(★1)

View File

@ -1,4 +1,3 @@
export * from "./const"
export * from "./VoiceChangerClient"
export * from "./util"
export * from "./uploader"

View File

@ -1,181 +0,0 @@
import { OnnxExecutionProvider } from "./const"
import { validateUrl } from "./util"
type FileChunk = {
hash: number,
chunk: Blob
}
export type ServerInfo = {
pyTorchModelFile: string,
onnxModelFile: string,
configFile: string,
providers: string[]
}
export const getInfo = async (baseUrl: string) => {
const url = validateUrl(baseUrl) + "/info"
const info = await new Promise<ServerInfo>((resolve) => {
const request = new Request(url, {
method: 'GET',
});
fetch(request).then(async (response) => {
const json = await response.json() as ServerInfo
resolve(json)
})
})
return info
}
export const uploadLargeFile = async (baseUrl: string, file: File, onprogress: (progress: number, end: boolean) => void) => {
const url = validateUrl(baseUrl) + "/upload_file"
onprogress(0, false)
const size = 1024 * 1024;
const fileChunks: FileChunk[] = [];
let index = 0; // index値
for (let cur = 0; cur < file.size; cur += size) {
fileChunks.push({
hash: index++,
chunk: file.slice(cur, cur + size),
});
}
const chunkNum = fileChunks.length
console.log("FILE_CHUNKS:", chunkNum, fileChunks)
while (true) {
const promises: Promise<void>[] = []
for (let i = 0; i < 10; i++) {
const chunk = fileChunks.shift()
if (!chunk) {
break
}
const p = new Promise<void>((resolve) => {
const formData = new FormData();
formData.append("file", chunk.chunk);
formData.append("filename", `${file.name}_${chunk.hash}`);
const request = new Request(url, {
method: 'POST',
body: formData,
});
fetch(request).then(async (response) => {
console.log(await response.text())
resolve()
})
})
promises.push(p)
}
await Promise.all(promises)
if (fileChunks.length == 0) {
break
}
onprogress(Math.floor(((chunkNum - fileChunks.length) / (chunkNum + 1)) * 100), false)
}
return chunkNum
}
export const concatUploadedFile = async (baseUrl: string, file: File, chunkNum: number) => {
const url = validateUrl(baseUrl) + "/concat_uploaded_file"
new Promise<void>((resolve) => {
const formData = new FormData();
formData.append("filename", file.name);
formData.append("filenameChunkNum", "" + chunkNum);
const request = new Request(url, {
method: 'POST',
body: formData,
});
fetch(request).then(async (response) => {
console.log(await response.text())
resolve()
})
})
}
export const loadModel = async (baseUrl: string, configFile: File, pyTorchModelFile: File | null, onnxModelFile: File | null) => {
const url = validateUrl(baseUrl) + "/load_model"
const loadP = new Promise<void>((resolve) => {
const formData = new FormData();
formData.append("pyTorchModelFilename", pyTorchModelFile?.name || "-");
formData.append("onnxModelFilename", onnxModelFile?.name || "-");
formData.append("configFilename", configFile.name);
const request = new Request(url, {
method: 'POST',
body: formData,
});
fetch(request).then(async (response) => {
console.log(await response.text())
resolve()
})
})
await loadP
}
export const setOnnxExecutionProvider = async (baseUrl: string, provider: OnnxExecutionProvider) => {
const url = validateUrl(baseUrl) + "/set_onnx_provider"
const loadP = new Promise<void>((resolve) => {
const formData = new FormData();
formData.append("provider", provider);
const request = new Request(url, {
method: 'POST',
body: formData,
});
fetch(request).then(async (response) => {
console.log(await response.json())
resolve()
})
})
await loadP
}
// export const uploadModelProps = async (baseUrl: string, modelFile: File, configFile: File, onprogress: (progress: number, end: boolean) => void) => {
// const uploadURL = DEBUG ? `${DEBUG_BASE_URL}/upload_file` : `${baseUrl}/upload_file`
// const loadModelURL = DEBUG ? `${DEBUG_BASE_URL}/load_model` : `${baseUrl}/load_model`
// onprogress(0, false)
// const chunkNum = await uploadLargeFile(baseUrl, modelFile, (progress: number, _end: boolean) => {
// onprogress(progress, false)
// })
// console.log("model uploaded")
// const configP = new Promise<void>((resolve) => {
// const formData = new FormData();
// formData.append("file", configFile);
// formData.append("filename", configFile.name);
// const request = new Request(uploadURL, {
// method: 'POST',
// body: formData,
// });
// fetch(request).then(async (response) => {
// console.log(await response.text())
// resolve()
// })
// })
// await configP
// console.log("config uploaded")
// const loadP = new Promise<void>((resolve) => {
// const formData = new FormData();
// formData.append("modelFilename", modelFile.name);
// formData.append("modelFilenameChunkNum", "" + chunkNum);
// formData.append("configFilename", configFile.name);
// const request = new Request(loadModelURL, {
// method: 'POST',
// body: formData,
// });
// fetch(request).then(async (response) => {
// console.log(await response.text())
// resolve()
// })
// })
// await loadP
// onprogress(100, true)
// console.log("model loaded")
// }

View File

@ -1,5 +1,5 @@
import os,shutil
from typing import Union
from fastapi import APIRouter
from fastapi.encoders import jsonable_encoder
from fastapi.responses import JSONResponse
@ -20,7 +20,7 @@ class MMVC_Rest_Fileuploader:
self.router.add_api_route("/info", self.get_info, methods=["GET"])
self.router.add_api_route("/upload_file", self.post_upload_file, methods=["POST"])
self.router.add_api_route("/concat_uploaded_file", self.post_concat_uploaded_file, methods=["POST"])
self.router.add_api_route("/set_onnx_provider", self.post_set_onnx_provider, methods=["POST"])
self.router.add_api_route("/update_setteings",self.post_update_setteings, methods=["POST"])
self.router.add_api_route("/load_model", self.post_load_model, methods=["POST"])
self.router.add_api_route("/load_model_for_train", self.post_load_model_for_train, methods=["POST"])
self.router.add_api_route("/extract_voices", self.post_extract_voices, methods=["POST"])
@ -35,16 +35,17 @@ class MMVC_Rest_Fileuploader:
UPLOAD_DIR, filename, filenameChunkNum, UPLOAD_DIR)
return {"concat": f"{modelFilePath}"}
def post_set_onnx_provider(self, provider: str = Form(...)):
res = self.voiceChangerManager.set_onnx_provider(provider)
json_compatible_item_data = jsonable_encoder(res)
return JSONResponse(content=json_compatible_item_data)
def get_info(self):
info = self.voiceChangerManager.get_info()
json_compatible_item_data = jsonable_encoder(info)
return JSONResponse(content=json_compatible_item_data)
def post_update_setteings(self, key:str=Form(...), val:Union[int, str, float]=Form(...)):
print("post_update_setteings", key, val)
info = self.voiceChangerManager.update_setteings(key, val)
json_compatible_item_data = jsonable_encoder(info)
return JSONResponse(content=json_compatible_item_data)
def post_load_model(
self,
pyTorchModelFilename: str = Form(...),

View File

@ -30,15 +30,14 @@ class MMVC_Namespace(socketio.AsyncNamespace):
crossFadeOffsetRate = float(msg[6])
crossFadeEndRate = float(msg[7])
data = msg[8]
# print(srcId, dstId, timestamp, convertChunkNum, crossFadeLowerValue, crossFadeOffsetRate, crossFadeEndRate)
unpackedData = np.array(struct.unpack('<%sh' % (len(data) // struct.calcsize('<h')), data))
audio1 = self.voiceChangerManager.changeVoice(
gpu, srcId, dstId, timestamp, convertChunkNum, crossFadeLowerValue, crossFadeOffsetRate, crossFadeEndRate, unpackedData)
# print("sio result:", len(audio1), audio1.shape)
bin = struct.pack('<%sh' % len(audio1), *audio1)
await self.emit('response', [timestamp, bin])
# bin = struct.pack('<%sh' % len(audio1), *audio1)
# await self.emit('response', [timestamp, bin])
def on_disconnect(self, sid):
# print('[{}] disconnect'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))

View File

@ -2,7 +2,7 @@ import torch
import math, os, traceback
from scipy.io.wavfile import write, read
import numpy as np
from dataclasses import dataclass, asdict
import utils
import commons
from models import SynthesizerTrn
@ -16,8 +16,29 @@ import onnxruntime
providers = ['OpenVINOExecutionProvider',"CUDAExecutionProvider","DmlExecutionProvider","CPUExecutionProvider"]
@dataclass
class VocieChangerSettings():
gpu:int = 0
srcId:int = 107
dstId:int = 100
crossFadeOffsetRate:float = 0.1
crossFadeEndRate:float = 0.9
convertChunkNum:int = 32
framework:str = "PyTorch"
pyTorch_model_file:str = ""
onnx_model_file:str = ""
config_file:str = ""
# ↓mutableな物だけ列挙
intData = ["srcId", "dstId", "convertChunkNum"]
floatData = ["gpu", "crossFadeOffsetRate", "crossFadeEndRate",]
strData = ["framework"]
class VoiceChanger():
def __init__(self, config:str, model:str=None, onnx_model:str=None):
def __init__(self, config:str, pyTorch_model_file:str=None, onnx_model_file:str=None):
# 初期化
self.settings = VocieChangerSettings(config_file=config, pyTorch_model_file=pyTorch_model_file, onnx_model_file=onnx_model_file)
self.unpackedData_length=0
# 共通で使用する情報を収集
self.hps = utils.get_hparams_from_file(config)
self.gpu_num = torch.cuda.device_count()
@ -31,12 +52,8 @@ class VoiceChanger():
print(f"VoiceChanger Initialized (GPU_NUM:{self.gpu_num}, mps_enabled:{self.mps_enabled})")
self.crossFadeOffsetRate = 0
self.crossFadeEndRate = 0
self.unpackedData_length = 0
# PyTorchモデル生成
if model != None:
if pyTorch_model_file != None:
self.net_g = SynthesizerTrn(
len(symbols),
self.hps.data.filter_length // 2 + 1,
@ -44,19 +61,19 @@ class VoiceChanger():
n_speakers=self.hps.data.n_speakers,
**self.hps.model)
self.net_g.eval()
utils.load_checkpoint(model, self.net_g, None)
utils.load_checkpoint(pyTorch_model_file, self.net_g, None)
else:
self.net_g = None
# ONNXモデル生成
if onnx_model != None:
if onnx_model_file != None:
ort_options = onnxruntime.SessionOptions()
ort_options.intra_op_num_threads = 8
# ort_options.execution_mode = onnxruntime.ExecutionMode.ORT_SEQUENTIAL
# ort_options.execution_mode = onnxruntime.ExecutionMode.ORT_PARALLEL
# ort_options.inter_op_num_threads = 8
self.onnx_session = onnxruntime.InferenceSession(
onnx_model,
onnx_model_file,
providers=providers
)
# print("ONNX_MDEOL!1", self.onnx_session.get_providers())
@ -67,42 +84,58 @@ class VoiceChanger():
else:
self.onnx_session = None
# ファイル情報を記録
self.pyTorch_model_file = model
self.onnx_model_file = onnx_model
self.config_file = config
def destroy(self):
del self.net_g
del self.onnx_session
def get_info(self):
print("ONNX_MODEL",self.onnx_model_file)
return {
"pyTorchModelFile":os.path.basename(self.pyTorch_model_file)if self.pyTorch_model_file!=None else "",
"onnxModelFile":os.path.basename(self.onnx_model_file)if self.onnx_model_file!=None else "",
"configFile":os.path.basename(self.config_file),
"providers":self.onnx_session.get_providers() if hasattr(self, "onnx_session") else ""
}
data = asdict(self.settings)
data["providers"] = self.onnx_session.get_providers() if hasattr(self, "onnx_session") else ""
files = ["config_file", "pyTorch_model_file", "onnx_model_file"]
for f in files:
data[f] = os.path.basename(data[f])
return data
def set_onnx_provider(self, provider:str):
if hasattr(self, "onnx_session"):
self.onnx_session.set_providers(providers=[provider])
print("ONNX_MDEOL: ", self.onnx_session.get_providers())
return {"provider":self.onnx_session.get_providers()}
def update_setteings(self, key:str, val:any):
if key == "onnxExecutionProvider":
self.onnx_session.set_providers(providers=[val])
return self.get_info()
elif key in self.settings.intData:
setattr(self.settings, key, int(val))
return self.get_info()
elif key in self.settings.floatData:
setattr(self.settings, key, float(val))
return self.get_info()
elif key in self.settings.strData:
setattr(self.settings, key, str(val))
return self.get_info()
else:
return {"provider":""}
print(f"{key} is not mutalbe variable!")
return self.get_info()
# def set_gpu(self, gpu:int):
# self.settings.gpu = gpu
# return {"gpu":self.settings.gpu}
def _generate_strength(self, crossFadeOffsetRate:float, crossFadeEndRate:float, unpackedData):
# def set_crossfade_setting(self, crossFadeOffsetRate:float, crossFadeEndRate:float):
# self.settings.crossFadeOffsetRate = crossFadeOffsetRate
# self.settings.crossFadeEndRate = crossFadeEndRate
# self.unpackedData_length = 0 # 次のVC時にStrengthを再計算させるため。
if self.crossFadeOffsetRate != crossFadeOffsetRate or self.crossFadeEndRate != crossFadeEndRate or self.unpackedData_length != unpackedData.shape[0]:
self.crossFadeOffsetRate = crossFadeOffsetRate
self.crossFadeEndRate = crossFadeEndRate
# def set_conversion_setting(self, srcId:int, dstId:int):
# self.settings.srcId = srcId
# self.settings.dstId = dstId
# def set_convert_chunk_num(self, convertChunkNum):
# self.settings.convertChunkNum = convertChunkNum
def _generate_strength(self, unpackedData):
if self.unpackedData_length != unpackedData.shape[0]:
self.unpackedData_length = unpackedData.shape[0]
cf_offset = int(unpackedData.shape[0] * crossFadeOffsetRate)
cf_end = int(unpackedData.shape[0] * crossFadeEndRate)
cf_offset = int(unpackedData.shape[0] * self.settings.crossFadeOffsetRate)
cf_end = int(unpackedData.shape[0] * self.settings.crossFadeEndRate)
cf_range = cf_end - cf_offset
percent = np.arange(cf_range) / cf_range
@ -115,7 +148,7 @@ class VoiceChanger():
self.prev_strength = torch.FloatTensor(self.np_prev_strength)
self.cur_strength = torch.FloatTensor(self.np_cur_strength)
torch.set_printoptions(edgeitems=2100)
# torch.set_printoptions(edgeitems=2100)
print("Generated Strengths")
# print(f"cross fade: start:{cf_offset} end:{cf_end} range:{cf_range}")
# print(f"target_len:{unpackedData.shape[0]}, prev_len:{len(self.prev_strength)} cur_len:{len(self.cur_strength)}")
@ -126,7 +159,7 @@ class VoiceChanger():
if hasattr(self, 'prev_audio1') == True:
delattr(self,"prev_audio1")
def _generate_input(self, unpackedData, convertSize, srcId):
def _generate_input(self, unpackedData:any, convertSize:int):
# 今回変換するデータをテンソルとして整形する
audio = torch.FloatTensor(unpackedData.astype(np.float32)) # float32でtensorfを作成
audio_norm = audio / self.hps.data.max_wav_value # normalize
@ -139,119 +172,113 @@ class VoiceChanger():
self.hps.data.sampling_rate, self.hps.data.hop_length, self.hps.data.win_length,
center=False)
spec = torch.squeeze(spec, 0)
sid = torch.LongTensor([int(srcId)])
sid = torch.LongTensor([int(self.settings.srcId)])
data = (self.text_norm, spec, audio_norm, sid)
data = TextAudioSpeakerCollate()([data])
return data
def on_request(self, gpu:int, srcId:int, dstId:int, timestamp:int, convertChunkNum:int, crossFadeLowerValue:float, crossFadeOffsetRate:float, crossFadeEndRate:float, unpackedData:any):
convertSize = convertChunkNum * 128 # 128sample/1chunk
def on_request(self, unpackedData:any):
convertSize = self.settings.convertChunkNum * 128 # 128sample/1chunk
if unpackedData.shape[0] * 2 > convertSize:
convertSize = unpackedData.shape[0] * 2
# print("convert Size", convertChunkNum, convertSize)
self._generate_strength(crossFadeOffsetRate, crossFadeEndRate, unpackedData)
data = self. _generate_input(unpackedData, convertSize, srcId)
self._generate_strength(unpackedData)
data = self._generate_input(unpackedData, convertSize)
try:
# if gpu < 0 or (self.gpu_num == 0 and not self.mps_enabled):
if gpu == -2 and hasattr(self, 'onnx_session') == True:
x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x for x in data]
sid_tgt1 = torch.LongTensor([dstId])
# if spec.size()[2] >= 8:
audio1 = self.onnx_session.run(
["audio"],
{
"specs": spec.numpy(),
"lengths": spec_lengths.numpy(),
"sid_src": sid_src.numpy(),
"sid_tgt": sid_tgt1.numpy()
})[0][0,0] * self.hps.data.max_wav_value
if hasattr(self, 'np_prev_audio1') == True:
prev = self.np_prev_audio1[-1*unpackedData.shape[0]:]
cur = audio1[-2*unpackedData.shape[0]:-1*unpackedData.shape[0]]
# print(prev.shape, self.np_prev_strength.shape, cur.shape, self.np_cur_strength.shape)
powered_prev = prev * self.np_prev_strength
powered_cur = cur * self.np_cur_strength
result = powered_prev + powered_cur
#result = prev * self.np_prev_strength + cur * self.np_cur_strength
else:
cur = audio1[-2*unpackedData.shape[0]:-1*unpackedData.shape[0]]
result = cur
self.np_prev_audio1 = audio1
# try:
# # if gpu < 0 or (self.gpu_num == 0 and not self.mps_enabled):
# if self.gpu == -2 and hasattr(self, 'onnx_session') == True:
# x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x for x in data]
# sid_tgt1 = torch.LongTensor([self.dstId])
# # if spec.size()[2] >= 8:
# audio1 = self.onnx_session.run(
# ["audio"],
# {
# "specs": spec.numpy(),
# "lengths": spec_lengths.numpy(),
# "sid_src": sid_src.numpy(),
# "sid_tgt": sid_tgt1.numpy()
# })[0][0,0] * self.hps.data.max_wav_value
# if hasattr(self, 'np_prev_audio1') == True:
# prev = self.np_prev_audio1[-1*unpackedData.shape[0]:]
# cur = audio1[-2*unpackedData.shape[0]:-1*unpackedData.shape[0]]
# # print(prev.shape, self.np_prev_strength.shape, cur.shape, self.np_cur_strength.shape)
# powered_prev = prev * self.np_prev_strength
# powered_cur = cur * self.np_cur_strength
# result = powered_prev + powered_cur
# #result = prev * self.np_prev_strength + cur * self.np_cur_strength
# else:
# cur = audio1[-2*unpackedData.shape[0]:-1*unpackedData.shape[0]]
# result = cur
# self.np_prev_audio1 = audio1
elif gpu < 0 or self.gpu_num == 0:
with torch.no_grad():
x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [
x.cpu() for x in data]
sid_tgt1 = torch.LongTensor([dstId]).cpu()
audio1 = (self.net_g.cpu().voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt1)[0][0, 0].data * self.hps.data.max_wav_value)
if self.prev_strength.device != torch.device('cpu'):
print(f"prev_strength move from {self.prev_strength.device} to cpu")
self.prev_strength = self.prev_strength.cpu()
if self.cur_strength.device != torch.device('cpu'):
print(f"cur_strength move from {self.cur_strength.device} to cpu")
self.cur_strength = self.cur_strength.cpu()
if hasattr(self, 'prev_audio1') == True and self.prev_audio1.device == torch.device('cpu'):
prev = self.prev_audio1[-1*unpackedData.shape[0]:]
cur = audio1[-2*unpackedData.shape[0]:-1*unpackedData.shape[0]]
result = prev * self.prev_strength + cur * self.cur_strength
else:
cur = audio1[-2*unpackedData.shape[0]:-1*unpackedData.shape[0]]
result = cur
self.prev_audio1 = audio1
result = result.cpu().float().numpy()
# elif self.mps_enabled == True: # MPS doesnt support aten::weight_norm_interface, and PYTORCH_ENABLE_MPS_FALLBACK=1 cause a big dely.
# elif self.gpu < 0 or self.gpu_num == 0:
# with torch.no_grad():
# x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [
# x.to("mps") for x in data]
# sid_tgt1 = torch.LongTensor([dstId]).to("mps")
# audio1 = (self.net_g.to("mps").voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt1)[
# 0][0, 0].data * self.hps.data.max_wav_value).cpu().float().numpy()
# x.cpu() for x in data]
# sid_tgt1 = torch.LongTensor([self.dstId]).cpu()
# audio1 = (self.net_g.cpu().voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt1)[0][0, 0].data * self.hps.data.max_wav_value)
else:
with torch.no_grad():
x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x.cuda(gpu) for x in data]
sid_tgt1 = torch.LongTensor([dstId]).cuda(gpu)
# audio1 = (self.net_g.cuda(gpu).voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt1)[0][0, 0].data * self.hps.data.max_wav_value).cpu().float().numpy()
audio1 = self.net_g.cuda(gpu).voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt1)[0][0, 0].data * self.hps.data.max_wav_value
# if self.prev_strength.device != torch.device('cpu'):
# print(f"prev_strength move from {self.prev_strength.device} to cpu")
# self.prev_strength = self.prev_strength.cpu()
# if self.cur_strength.device != torch.device('cpu'):
# print(f"cur_strength move from {self.cur_strength.device} to cpu")
# self.cur_strength = self.cur_strength.cpu()
if self.prev_strength.device != torch.device('cuda', gpu):
print(f"prev_strength move from {self.prev_strength.device} to gpu{gpu}")
self.prev_strength = self.prev_strength.cuda(gpu)
if self.cur_strength.device != torch.device('cuda', gpu):
print(f"cur_strength move from {self.cur_strength.device} to gpu{gpu}")
self.cur_strength = self.cur_strength.cuda(gpu)
# if hasattr(self, 'prev_audio1') == True and self.prev_audio1.device == torch.device('cpu'):
# prev = self.prev_audio1[-1*unpackedData.shape[0]:]
# cur = audio1[-2*unpackedData.shape[0]:-1*unpackedData.shape[0]]
# result = prev * self.prev_strength + cur * self.cur_strength
# else:
# cur = audio1[-2*unpackedData.shape[0]:-1*unpackedData.shape[0]]
# result = cur
# self.prev_audio1 = audio1
# result = result.cpu().float().numpy()
# else:
# with torch.no_grad():
# x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x.cuda(self.gpu) for x in data]
# sid_tgt1 = torch.LongTensor([self.dstId]).cuda(self.gpu)
# audio1 = self.net_g.cuda(self.gpu).voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt1)[0][0, 0].data * self.hps.data.max_wav_value
# if self.prev_strength.device != torch.device('cuda', self.gpu):
# print(f"prev_strength move from {self.prev_strength.device} to gpu{self.gpu}")
# self.prev_strength = self.prev_strength.cuda(self.gpu)
# if self.cur_strength.device != torch.device('cuda', self.gpu):
# print(f"cur_strength move from {self.cur_strength.device} to gpu{self.gpu}")
# self.cur_strength = self.cur_strength.cuda(self.gpu)
if hasattr(self, 'prev_audio1') == True and self.prev_audio1.device == torch.device('cuda', gpu):
prev = self.prev_audio1[-1*unpackedData.shape[0]:]
cur = audio1[-2*unpackedData.shape[0]:-1*unpackedData.shape[0]]
result = prev * self.prev_strength + cur * self.cur_strength
# print("merging...", prev.shape, cur.shape)
else:
cur = audio1[-2*unpackedData.shape[0]:-1*unpackedData.shape[0]]
result = cur
# print("no merging...", cur.shape)
self.prev_audio1 = audio1
# if hasattr(self, 'prev_audio1') == True and self.prev_audio1.device == torch.device('cuda', self.gpu):
# prev = self.prev_audio1[-1*unpackedData.shape[0]:]
# cur = audio1[-2*unpackedData.shape[0]:-1*unpackedData.shape[0]]
# result = prev * self.prev_strength + cur * self.cur_strength
# # print("merging...", prev.shape, cur.shape)
# else:
# cur = audio1[-2*unpackedData.shape[0]:-1*unpackedData.shape[0]]
# result = cur
# # print("no merging...", cur.shape)
# self.prev_audio1 = audio1
#print(result)
result = result.cpu().float().numpy()
# #print(result)
# result = result.cpu().float().numpy()
except Exception as e:
print("VC PROCESSING!!!! EXCEPTION!!!", e)
print(traceback.format_exc())
del self.np_prev_audio1
del self.prev_audio1
# except Exception as e:
# print("VC PROCESSING!!!! EXCEPTION!!!", e)
# print(traceback.format_exc())
# del self.np_prev_audio1
# del self.prev_audio1
result = result.astype(np.int16)
# print("on_request result size:",result.shape)
return result
# result = result.astype(np.int16)
# # print("on_request result size:",result.shape)
# return result
return

View File

@ -21,16 +21,22 @@ class VoiceChangerManager():
else:
return {"no info":"no info"}
def set_onnx_provider(self, provider:str):
def update_setteings(self, key:str, val:any):
if hasattr(self, 'voiceChanger'):
return self.voiceChanger.set_onnx_provider(provider)
return self.voiceChanger.update_setteings(key, val)
else:
return {"error":"no voice changer"}
return {"no info":"no info"}
# def set_onnx_provider(self, provider:str):
# if hasattr(self, 'voiceChanger'):
# return self.voiceChanger.set_onnx_provider(provider)
# else:
# return {"error":"no voice changer"}
def changeVoice(self, gpu:int, srcId:int, dstId:int, timestamp:int, convertChunkNum:int, crossFadeLowerValue:float, crossFadeOffsetRate:float, crossFadeEndRate:float, unpackedData:any):
if hasattr(self, 'voiceChanger') == True:
return self.voiceChanger.on_request(gpu, srcId, dstId, timestamp, convertChunkNum, crossFadeLowerValue, crossFadeOffsetRate, crossFadeEndRate, unpackedData)
return self.voiceChanger.on_request(unpackedData)
else:
print("Voice Change is not loaded. Did you load a correct model?")
return np.zeros(1).astype(np.int16)