WIP refactoring

This commit is contained in:
wataru 2023-01-07 20:07:39 +09:00
parent 32e21b1a7a
commit ee910eb395
33 changed files with 1972 additions and 3977 deletions

View File

@ -1 +1,10 @@
<!doctype html><html style="width:100%;height:100%;overflow:hidden"><head><meta charset="utf-8"/><title>Voice Changer Client Demo</title><script defer="defer" src="index.js"></script></head><body style="width:100%;height:100%;margin:0"><div id="app" style="width:100%;height:100%"></div></body></html>
<!DOCTYPE html>
<html style="width: 100%; height: 100%; overflow: hidden">
<head>
<meta charset="utf-8" />
<title>Voice Changer Client Demo</title>
<script defer src="index.js"></script></head>
<body style="width: 100%; height: 100%; margin: 0px">
<div id="app" style="width: 100%; height: 100%"></div>
</body>
</html>

File diff suppressed because one or more lines are too long

View File

@ -1,31 +0,0 @@
/*! regenerator-runtime -- Copyright (c) 2014-present, Facebook, Inc. -- license (MIT): https://github.com/facebook/regenerator/blob/main/LICENSE */
/**
* @license React
* react-dom.production.min.js
*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
/**
* @license React
* react.production.min.js
*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
/**
* @license React
* scheduler.production.min.js
*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

File diff suppressed because it is too large Load Diff

View File

@ -48,7 +48,7 @@
"webpack-dev-server": "^4.11.1"
},
"dependencies": {
"@dannadori/voice-changer-client-js": "^1.0.3",
"@dannadori/voice-changer-client-js": "file:../lib",
"react": "^18.2.0",
"react-dom": "^18.2.0"
}

View File

@ -0,0 +1,58 @@
import * as React from "react";
import { createRoot } from "react-dom/client";
import "./css/App.css"
import { useEffect, useMemo, useRef, useState } from "react";
import { useMicrophoneOptions } from "./100_options_microphone";
import { VoiceChnagerClient, createDummyMediaStream } from "@dannadori/voice-changer-client-js"
import { AUDIO_ELEMENT_FOR_PLAY_RESULT } from "./const";
const container = document.getElementById("app")!;
const root = createRoot(container);
const App = () => {
const { voiceChangerSetting } = useMicrophoneOptions()
const onClearSettingClicked = async () => {
//@ts-ignore
await chrome.storage.local.clear();
//@ts-ignore
await chrome.storage.sync.clear();
location.reload()
}
const clearRow = useMemo(() => {
return (
<>
<div className="body-row split-3-3-4 left-padding-1">
<div className="body-button-container">
<div className="body-button" onClick={onClearSettingClicked}>clear setting</div>
</div>
<div className="body-item-text"></div>
<div className="body-item-text"></div>
</div>
</>
)
}, [])
return (
<div className="body">
<div className="body-row">
<div className="body-top-title">
Voice Changer Setting
</div>
</div>
{clearRow}
{voiceChangerSetting}
<div>
<audio id="audio-output"></audio>
</div>
</div>
)
}
root.render(
<App></App>
);

View File

@ -0,0 +1,99 @@
import * as React from "react";
import { useEffect, useMemo, useRef, useState } from "react";
import { AUDIO_ELEMENT_FOR_PLAY_RESULT, CHROME_EXTENSION } from "./const";
import { DefaultVoiceChangerRequestParamas, VoiceChangerOptions, VoiceChangerRequestParamas, DefaultVoiceChangerOptions, VoiceChangerMode, } from "@dannadori/voice-changer-client-js"
import { useServerSetting } from "./101_server_setting";
import { useDeviceSetting } from "./102_device_setting";
import { useConvertSetting } from "./104_convert_setting";
import { useAdvancedSetting } from "./105_advanced_setting";
import { useSpeakerSetting } from "./103_speaker_setting";
import { VoiceChnagerClient } from "@dannadori/voice-changer-client-js";
import { useClient } from "./hooks/useClient";
import { useServerControl } from "./106_server_control";
export const useMicrophoneOptions = () => {
const [audioContext, setAudioContext] = useState<AudioContext | null>(null)
const serverSetting = useServerSetting()
const deviceSetting = useDeviceSetting(audioContext)
const speakerSetting = useSpeakerSetting()
const convertSetting = useConvertSetting()
const advancedSetting = useAdvancedSetting()
const clientState = useClient({
audioContext: audioContext,
audioOutputElementId: AUDIO_ELEMENT_FOR_PLAY_RESULT
})
const serverControl = useServerControl({
convertStart: async () => { await clientState.start(serverSetting.mmvcServerUrl, serverSetting.protocol) },
convertStop: async () => { clientState.stop() },
volume: clientState.volume,
bufferingTime: clientState.bufferingTime,
responseTime: clientState.responseTime
})
useEffect(() => {
const createAudioContext = () => {
const ctx = new AudioContext()
setAudioContext(ctx)
document.removeEventListener('touchstart', createAudioContext);
document.removeEventListener('mousedown', createAudioContext);
}
document.addEventListener('touchstart', createAudioContext);
document.addEventListener('mousedown', createAudioContext);
}, [])
useEffect(() => {
console.log("input Cahngaga!")
clientState.changeInput(deviceSetting.audioInput, convertSetting.bufferSize, advancedSetting.vfForceDisabled)
}, [clientState.clientInitialized, deviceSetting.audioInput, convertSetting.bufferSize, advancedSetting.vfForceDisabled])
// // const [options, setOptions] = useState<MicrophoneOptionsState>(InitMicrophoneOptionsState)
// const [params, setParams] = useState<VoiceChangerRequestParamas>(DefaultVoiceChangerRequestParamas)
// const [options, setOptions] = useState<VoiceChangerOptions>(DefaultVoiceChangerOptions)
// const [isStarted, setIsStarted] = useState<boolean>(false)
// useEffect(() => {
// const storeOptions = async () => {
// if (CHROME_EXTENSION) {
// // @ts-ignore
// await chrome.storage.local.set({ microphoneOptions: options })
// }
// }
// storeOptions()
// }, [options]) // loadより前に持ってくるとstorage内が初期化されるのでだめかも。要検証
const voiceChangerSetting = useMemo(() => {
return (
<>
<div className="body-row left-padding-1">
<div className="body-section-title">Virtual Microphone</div>
</div>
{serverControl.serverControl}
{serverSetting.serverSetting}
{deviceSetting.deviceSetting}
{speakerSetting.speakerSetting}
{convertSetting.convertSetting}
{advancedSetting.advancedSetting}
</>
)
}, [serverControl.serverControl,
serverSetting.serverSetting,
deviceSetting.deviceSetting,
speakerSetting.speakerSetting,
convertSetting.convertSetting,
advancedSetting.advancedSetting])
return {
voiceChangerSetting,
}
}

View File

@ -0,0 +1,212 @@
import { DefaultVoiceChangerOptions, OnnxExecutionProvider, Protocol, Framework, fileSelector } from "@dannadori/voice-changer-client-js"
import React from "react"
import { useMemo, useState } from "react"
export type ServerSettingState = {
serverSetting: JSX.Element;
mmvcServerUrl: string;
pyTorchModel: File | null;
configFile: File | null;
onnxModel: File | null;
framework: string;
onnxExecutionProvider: OnnxExecutionProvider;
protocol: Protocol;
}
export const useServerSetting = (): ServerSettingState => {
const [mmvcServerUrl, setMmvcServerUrl] = useState<string>(DefaultVoiceChangerOptions.mmvcServerUrl)
const [pyTorchModel, setPyTorchModel] = useState<File | null>(null)
const [configFile, setConfigFile] = useState<File | null>(null)
const [onnxModel, setOnnxModel] = useState<File | null>(null)
const [protocol, setProtocol] = useState<Protocol>("sio")
const [onnxExecutionProvider, setOnnxExecutionProvider] = useState<OnnxExecutionProvider>("CPUExecutionProvider")
const [framework, setFramework] = useState<Framework>("PyTorch")
const mmvcServerUrlRow = useMemo(() => {
const onSetServerClicked = async () => {
const input = document.getElementById("mmvc-server-url") as HTMLInputElement
setMmvcServerUrl(input.value)
}
return (
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title left-padding-1">MMVC Server</div>
<div className="body-input-container">
<input type="text" defaultValue={mmvcServerUrl} id="mmvc-server-url" className="body-item-input" />
</div>
<div className="body-button-container">
<div className="body-button" onClick={onSetServerClicked}>set</div>
</div>
</div>
)
}, [])
const uploadeModelRow = useMemo(() => {
const onPyTorchFileLoadClicked = async () => {
const file = await fileSelector("")
if (file.name.endsWith(".pth") == false) {
alert("モデルファイルの拡張子はpthである必要があります。")
return
}
setPyTorchModel(file)
}
const onConfigFileLoadClicked = async () => {
const file = await fileSelector("")
if (file.name.endsWith(".json") == false) {
alert("モデルファイルの拡張子はjsonである必要があります。")
return
}
setConfigFile(file)
}
const onOnnxFileLoadClicked = async () => {
const file = await fileSelector("")
if (file.name.endsWith(".onnx") == false) {
alert("モデルファイルの拡張子はonnxである必要があります。")
return
}
setOnnxModel(file)
}
return (
<>
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title left-padding-1">Model Uploader</div>
<div className="body-item-text">
<div></div>
</div>
<div className="body-item-text">
<div></div>
</div>
</div>
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title left-padding-2">PyTorch(.pth)</div>
<div className="body-item-text">
<div>{pyTorchModel?.name}</div>
</div>
<div className="body-button-container">
<div className="body-button" onClick={onPyTorchFileLoadClicked}>select</div>
</div>
</div>
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title left-padding-2">Config(.json)</div>
<div className="body-item-text">
<div>{configFile?.name}</div>
</div>
<div className="body-button-container">
<div className="body-button" onClick={onConfigFileLoadClicked}>select</div>
</div>
</div>
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title left-padding-2">Onnx(.onnx)</div>
<div className="body-item-text">
<div>{onnxModel?.name}</div>
</div>
<div className="body-button-container">
<div className="body-button" onClick={onOnnxFileLoadClicked}>select</div>
</div>
</div>
</>
)
}, [pyTorchModel, configFile, onnxModel])
const protocolRow = useMemo(() => {
const onProtocolChanged = async (val: Protocol) => {
setProtocol(val)
}
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Protocol</div>
<div className="body-select-container">
<select className="body-select" value={protocol} onChange={(e) => {
onProtocolChanged(e.target.value as
Protocol)
}}>
{
Object.values(Protocol).map(x => {
return <option key={x} value={x}>{x}</option>
})
}
</select>
</div>
</div>
)
}, [protocol])
const frameworkRow = useMemo(() => {
const onFrameworkChanged = async (val: Framework) => {
setFramework(val)
}
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Framework</div>
<div className="body-select-container">
<select className="body-select" value={framework} onChange={(e) => {
onFrameworkChanged(e.target.value as
Framework)
}}>
{
Object.values(Framework).map(x => {
return <option key={x} value={x}>{x}</option>
})
}
</select>
</div>
</div>
)
}, [framework])
const onnxExecutionProviderRow = useMemo(() => {
if (framework != "ONNX") {
return
}
const onOnnxExecutionProviderChanged = async (val: OnnxExecutionProvider) => {
setOnnxExecutionProvider(val)
}
return (
<div className="body-row split-3-7 left-padding-1">
<div className="body-item-title left-padding-2">OnnxExecutionProvider</div>
<div className="body-select-container">
<select className="body-select" value={onnxExecutionProvider} onChange={(e) => {
onOnnxExecutionProviderChanged(e.target.value as
OnnxExecutionProvider)
}}>
{
Object.values(OnnxExecutionProvider).map(x => {
return <option key={x} value={x}>{x}</option>
})
}
</select>
</div>
</div>
)
}, [onnxExecutionProvider, framework])
const serverSetting = useMemo(() => {
return (
<>
<div className="body-row split-3-7 left-padding-1">
<div className="body-sub-section-title">Server Setting</div>
<div className="body-select-container">
</div>
</div>
{mmvcServerUrlRow}
{uploadeModelRow}
{frameworkRow}
{onnxExecutionProviderRow}
{protocolRow}
</>
)
}, [mmvcServerUrlRow, uploadeModelRow, frameworkRow, onnxExecutionProviderRow, protocolRow])
return {
serverSetting,
mmvcServerUrl,
pyTorchModel,
configFile,
onnxModel,
framework,
onnxExecutionProvider,
protocol,
}
}

View File

@ -0,0 +1,220 @@
import { fileSelectorAsDataURL, createDummyMediaStream, SampleRate } from "@dannadori/voice-changer-client-js"
import React, { useEffect, useMemo, useState } from "react"
import { AUDIO_ELEMENT_FOR_PLAY_RESULT, AUDIO_ELEMENT_FOR_TEST_CONVERTED, AUDIO_ELEMENT_FOR_TEST_ORIGINAL } from "./const"
const reloadDevices = async () => {
try {
await navigator.mediaDevices.getUserMedia({ video: true, audio: true });
} catch (e) {
console.warn("Enumerate device error::", e)
}
const mediaDeviceInfos = await navigator.mediaDevices.enumerateDevices();
const audioInputs = mediaDeviceInfos.filter(x => { return x.kind == "audioinput" })
audioInputs.push({
deviceId: "none",
groupId: "none",
kind: "audioinput",
label: "none",
toJSON: () => { }
})
audioInputs.push({
deviceId: "file",
groupId: "file",
kind: "audioinput",
label: "file",
toJSON: () => { }
})
const audioOutputs = mediaDeviceInfos.filter(x => { return x.kind == "audiooutput" })
return [audioInputs, audioOutputs]
}
export type DeviceSettingState = {
deviceSetting: JSX.Element;
audioInput: string | MediaStream;
sampleRate: SampleRate;
}
export const useDeviceSetting = (audioContext: AudioContext | null): DeviceSettingState => {
const [inputAudioDeviceInfo, setInputAudioDeviceInfo] = useState<MediaDeviceInfo[]>([])
const [outputAudioDeviceInfo, setOutputAudioDeviceInfo] = useState<MediaDeviceInfo[]>([])
const [audioInputForGUI, setAudioInputForGUI] = useState<string>("none")
const [audioInput, setAudioInput] = useState<string | MediaStream>("none")
const [audioOutputForGUI, setAudioOutputForGUI] = useState<string>("none")
const [sampleRate, setSampleRate] = useState<SampleRate>(48000)
useEffect(() => {
const initialize = async () => {
const audioInfo = await reloadDevices()
setInputAudioDeviceInfo(audioInfo[0])
setOutputAudioDeviceInfo(audioInfo[1])
// if (CHROME_EXTENSION) {
// //@ts-ignore
// const storedOptions = await chrome.storage.local.get("microphoneOptions")
// if (storedOptions) {
// setOptions(storedOptions)
// }
// }
}
initialize()
}, [])
const audioInputRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">AudioInput</div>
<div className="body-select-container">
<select className="body-select" value={audioInputForGUI} onChange={(e) => { setAudioInputForGUI(e.target.value) }}>
{
inputAudioDeviceInfo.map(x => {
return <option key={x.deviceId} value={x.deviceId}>{x.label}</option>
})
}
</select>
</div>
</div>
)
}, [inputAudioDeviceInfo, audioInputForGUI])
useEffect(() => {
console.log("iiiiinnnppu1")
if (!audioContext) {
console.log("iiiiinnnppu2")
return
}
console.log("iiiiinnnppu3")
if (audioInputForGUI == "none") {
const ms = createDummyMediaStream(audioContext)
setAudioInput(ms)
} else if (audioInputForGUI == "file") {
// file selector (audioMediaInputRow)
} else {
setAudioInput(audioInputForGUI)
}
}, [audioContext, audioInputForGUI])
const audioMediaInputRow = useMemo(() => {
if (audioInputForGUI != "file") {
return <></>
}
const onFileLoadClicked = async () => {
const url = await fileSelectorAsDataURL("")
// input stream for client.
const audio = document.getElementById(AUDIO_ELEMENT_FOR_TEST_CONVERTED) as HTMLAudioElement
audio.src = url
await audio.play()
const src = audioContext!.createMediaElementSource(audio);
const dst = audioContext!.createMediaStreamDestination()
src.connect(dst)
setAudioInput(dst.stream)
// original stream to play.
const audio_org = document.getElementById(AUDIO_ELEMENT_FOR_TEST_ORIGINAL) as HTMLAudioElement
audio_org.src = url
audio_org.pause()
// audio_org.onplay = () => {
// console.log(audioOutputRef.current)
// // @ts-ignore
// audio_org.setSinkId(audioOutputRef.current)
// }
}
return (
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title"></div>
<div className="body-item-text">
<div>
org:<audio id={AUDIO_ELEMENT_FOR_TEST_ORIGINAL} controls></audio>
</div>
<div>
cnv:<audio id={AUDIO_ELEMENT_FOR_TEST_CONVERTED} controls></audio>
</div>
</div>
<div className="body-button-container">
<div className="body-button" onClick={onFileLoadClicked}>load</div>
</div>
</div>
)
}, [audioInputForGUI])
const audioOutputRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">AudioOutput</div>
<div className="body-select-container">
<select className="body-select" value={audioOutputForGUI} onChange={(e) => { setAudioOutputForGUI(e.target.value) }}>
{
outputAudioDeviceInfo.map(x => {
return <option key={x.deviceId} value={x.deviceId}>{x.label}</option>
})
}
</select>
<audio hidden id={AUDIO_ELEMENT_FOR_PLAY_RESULT}></audio>
</div>
</div>
)
}, [outputAudioDeviceInfo, audioOutputForGUI])
useEffect(() => {
[AUDIO_ELEMENT_FOR_PLAY_RESULT, AUDIO_ELEMENT_FOR_TEST_ORIGINAL].forEach(x => {
const audio = document.getElementById(x) as HTMLAudioElement
if (audio) {
// @ts-ignore
audio.setSinkId(audioOutputForGUI)
}
})
}, [audioOutputForGUI])
const sampleRateRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Sample Rate</div>
<div className="body-select-container">
<select className="body-select" value={sampleRate} onChange={(e) => { setSampleRate(Number(e.target.value) as SampleRate) }}>
{
Object.values(SampleRate).map(x => {
return <option key={x} value={x}>{x}</option>
})
}
</select>
</div>
</div>
)
}, [sampleRate])
const deviceSetting = useMemo(() => {
return (
<>
<div className="body-row split-3-7 left-padding-1">
<div className="body-sub-section-title">Device Setting</div>
<div className="body-select-container">
</div>
</div>
{audioInputRow}
{audioMediaInputRow}
{sampleRateRow}
{audioOutputRow}
</>
)
}, [audioInputRow, audioMediaInputRow, sampleRateRow, audioOutputRow])
return {
deviceSetting,
audioInput,
sampleRate,
}
}

View File

@ -0,0 +1,117 @@
import { DefaultVoiceChangerRequestParamas, DefaultVoiceChangerOptions, Speaker } from "@dannadori/voice-changer-client-js"
import React, { useMemo, useState } from "react"
export const useSpeakerSetting = () => {
const [speakers, setSpeakers] = useState<Speaker[]>(DefaultVoiceChangerOptions.speakers)
const [editSpeakerTargetId, setEditSpeakerTargetId] = useState<number>(0)
const [editSpeakerTargetName, setEditSpeakerTargetName] = useState<string>("")
const [srcId, setSrcId] = useState<number>(DefaultVoiceChangerRequestParamas.srcId)
const [dstId, setDstId] = useState<number>(DefaultVoiceChangerRequestParamas.dstId)
const srcIdRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Source Speaker Id</div>
<div className="body-select-container">
<select className="body-select" value={srcId} onChange={(e) => { setSrcId(Number(e.target.value)) }}>
{
speakers.map(x => {
return <option key={x.id} value={x.id}>{x.name}({x.id})</option>
})
}
</select>
</div>
</div>
)
}, [srcId, speakers])
const dstIdRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Destination Speaker Id</div>
<div className="body-select-container">
<select className="body-select" value={dstId} onChange={(e) => { setDstId(Number(e.target.value)) }}>
{
speakers.map(x => {
return <option key={x.id} value={x.id}>{x.name}({x.id})</option>
})
}
</select>
</div>
</div>
)
}, [dstId, speakers])
const editSpeakerIdMappingRow = useMemo(() => {
const onSetSpeakerMappingClicked = async () => {
const targetId = editSpeakerTargetId
const targetName = editSpeakerTargetName
const targetSpeaker = speakers.find(x => { return x.id == targetId })
if (targetSpeaker) {
if (targetName.length == 0) { // Delete
const newSpeakers = speakers.filter(x => { return x.id != targetId })
setSpeakers(newSpeakers)
} else { // Update
targetSpeaker.name = targetName
setSpeakers([...speakers])
}
} else {
if (targetName.length == 0) { // Noop
} else {// add
speakers.push({
id: targetId,
name: targetName
})
setSpeakers([...speakers])
}
}
}
return (
<div className="body-row split-3-1-2-4 left-padding-1 guided">
<div className="body-item-title left-padding-1">Edit Speaker Mapping</div>
<div className="body-input-container">
<input type="number" min={1} max={256} step={1} value={editSpeakerTargetId} onChange={(e) => {
const id = Number(e.target.value)
setEditSpeakerTargetId(id)
setEditSpeakerTargetName(speakers.find(x => { return x.id == id })?.name || "")
}} />
</div>
<div className="body-input-container">
<input type="text" value={editSpeakerTargetName} onChange={(e) => { setEditSpeakerTargetName(e.target.value) }} />
</div>
<div className="body-button-container">
<div className="body-button" onClick={onSetSpeakerMappingClicked}>set</div>
</div>
</div>
)
}, [speakers])
const speakerSetting = useMemo(() => {
return (
<>
<div className="body-row split-3-7 left-padding-1">
<div className="body-sub-section-title">Speaker Setting</div>
<div className="body-select-container">
</div>
</div>
{srcIdRow}
{dstIdRow}
{editSpeakerIdMappingRow}
</>
)
}, [srcIdRow, dstIdRow, editSpeakerIdMappingRow])
return {
speakerSetting,
srcId,
dstId,
}
}

View File

@ -0,0 +1,128 @@
import { DefaultVoiceChangerRequestParamas, DefaultVoiceChangerOptions, BufferSize } from "@dannadori/voice-changer-client-js"
import React, { useMemo, useState } from "react"
export type SpeakerSettingState = {
convertSetting: JSX.Element;
bufferSize: BufferSize;
inputChunkNum: number;
convertChunkNum: number;
gpu: number;
crossFadeOffsetRate: number;
crossFadeEndRate: number;
}
export const useConvertSetting = (): SpeakerSettingState => {
const [bufferSize, setBufferSize] = useState<BufferSize>(1024)
const [inputChunkNum, setInputChunkNum] = useState<number>(DefaultVoiceChangerOptions.inputChunkNum)
const [convertChunkNum, setConvertChunkNum] = useState<number>(DefaultVoiceChangerRequestParamas.convertChunkNum)
const [gpu, setGpu] = useState<number>(DefaultVoiceChangerRequestParamas.gpu)
const [crossFadeOffsetRate, setCrossFadeOffsetRate] = useState<number>(DefaultVoiceChangerRequestParamas.crossFadeOffsetRate)
const [crossFadeEndRate, setCrossFadeEndRate] = useState<number>(DefaultVoiceChangerRequestParamas.crossFadeEndRate)
const bufferSizeRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Buffer Size</div>
<div className="body-select-container">
<select className="body-select" value={bufferSize} onChange={(e) => { setBufferSize(Number(e.target.value) as BufferSize) }}>
{
Object.values(BufferSize).map(x => {
return <option key={x} value={x}>{x}</option>
})
}
</select>
</div>
</div>
)
}, [bufferSize])
const inputChunkNumRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Input Chunk Num(128sample/chunk)</div>
<div className="body-input-container">
<input type="number" min={1} max={256} step={1} value={inputChunkNum} onChange={(e) => { setInputChunkNum(Number(e.target.value)) }} />
</div>
</div>
)
}, [inputChunkNum])
const convertChunkNumRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Convert Chunk Num(128sample/chunk)</div>
<div className="body-input-container">
<input type="number" min={1} max={256} step={1} value={convertChunkNum} onChange={(e) => { setConvertChunkNum(Number(e.target.value)) }} />
</div>
</div>
)
}, [convertChunkNum])
const gpuRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">GPU</div>
<div className="body-input-container">
<input type="number" min={-2} max={5} step={1} value={gpu} onChange={(e) => { setGpu(Number(e.target.value)) }} />
</div>
</div>
)
}, [gpu])
const crossFadeOffsetRateRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Cross Fade Offset Rate</div>
<div className="body-input-container">
<input type="number" min={0} max={1} step={0.1} value={crossFadeOffsetRate} onChange={(e) => { setCrossFadeOffsetRate(Number(e.target.value)) }} />
</div>
</div>
)
}, [crossFadeOffsetRate])
const crossFadeEndRateRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Cross Fade End Rate</div>
<div className="body-input-container">
<input type="number" min={0} max={1} step={0.1} value={crossFadeEndRate} onChange={(e) => { setCrossFadeEndRate(Number(e.target.value)) }} />
</div>
</div>
)
}, [crossFadeEndRate])
const convertSetting = useMemo(() => {
return (
<>
<div className="body-row split-3-7 left-padding-1">
<div className="body-sub-section-title">Converter Setting</div>
<div className="body-select-container">
</div>
</div>
{bufferSizeRow}
{inputChunkNumRow}
{convertChunkNumRow}
{gpuRow}
{crossFadeOffsetRateRow}
{crossFadeEndRateRow}
</>
)
}, [bufferSizeRow, inputChunkNumRow, convertChunkNumRow, gpuRow, crossFadeOffsetRateRow, crossFadeEndRateRow])
return {
convertSetting,
bufferSize,
inputChunkNum,
convertChunkNum,
gpu,
crossFadeOffsetRate,
crossFadeEndRate,
}
}

View File

@ -0,0 +1,68 @@
import { VoiceChangerMode } from "@dannadori/voice-changer-client-js"
import React, { useMemo, useState } from "react"
export type AdvancedSettingState = {
advancedSetting: JSX.Element;
vfForceDisabled: boolean;
voiceChangeMode: VoiceChangerMode;
}
export const useAdvancedSetting = (): AdvancedSettingState => {
const [vfForceDisabled, setVfForceDisabled] = useState<boolean>(false)
const [voiceChangeMode, setVoiceChangeMode] = useState<VoiceChangerMode>("realtime")
const vfForceDisableRow = useMemo(() => {
return (
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title left-padding-1 ">VF Disabled</div>
<div>
<input type="checkbox" checked={vfForceDisabled} onChange={(e) => setVfForceDisabled(e.target.checked)} />
</div>
<div className="body-button-container">
</div>
</div>
)
}, [vfForceDisabled])
const voiceChangeModeRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1 ">Voice Change Mode</div>
<div className="body-select-container">
<select className="body-select" value={voiceChangeMode} onChange={(e) => { setVoiceChangeMode(e.target.value as VoiceChangerMode) }}>
{
Object.values(VoiceChangerMode).map(x => {
return <option key={x} value={x}>{x}</option>
})
}
</select>
</div>
</div>
)
}, [])
const advancedSetting = useMemo(() => {
return (
<>
<div className="body-row split-3-7 left-padding-1">
<div className="body-sub-section-title">Advanced Setting</div>
<div className="body-select-container">
</div>
</div>
{vfForceDisableRow}
{voiceChangeModeRow}
</>
)
}, [vfForceDisableRow, voiceChangeModeRow])
return {
advancedSetting,
vfForceDisabled,
voiceChangeMode,
}
}

View File

@ -0,0 +1,77 @@
import { VoiceChangerMode } from "@dannadori/voice-changer-client-js"
import React, { useMemo, useState } from "react"
export type UseServerControlProps = {
convertStart: () => Promise<void>
convertStop: () => Promise<void>
volume: number,
bufferingTime: number,
responseTime: number
}
export const useServerControl = (props: UseServerControlProps) => {
const [isStarted, setIsStarted] = useState<boolean>(false)
const startButtonRow = useMemo(() => {
const onStartClicked = async () => {
setIsStarted(true)
await props.convertStart()
}
const onStopClicked = async () => {
setIsStarted(false)
await props.convertStop()
}
const startClassName = isStarted ? "body-button-active" : "body-button-stanby"
const stopClassName = isStarted ? "body-button-stanby" : "body-button-active"
return (
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title left-padding-1">Start</div>
<div className="body-button-container">
<div onClick={onStartClicked} className={startClassName}>start</div>
<div onClick={onStopClicked} className={stopClassName}>stop</div>
</div>
<div className="body-input-container">
</div>
</div>
)
}, [isStarted])
const performanceRow = useMemo(() => {
return (
<>
<div className="body-row split-3-1-1-1-4 left-padding-1 guided">
<div className="body-item-title left-padding-1">monitor:</div>
<div className="body-item-text">vol(rms):{props.volume.toFixed(4)}</div>
<div className="body-item-text">buf(ms):{props.bufferingTime}</div>
<div className="body-item-text">res(ms):{props.responseTime}</div>
<div className="body-item-text"></div>
</div>
</>
)
}, [props.volume, props.bufferingTime, props.responseTime])
const serverControl = useMemo(() => {
return (
<>
<div className="body-row split-3-7 left-padding-1">
<div className="body-sub-section-title">Server Control</div>
<div className="body-select-container">
</div>
</div>
{startButtonRow}
{performanceRow}
</>
)
}, [startButtonRow, performanceRow])
return {
serverControl,
}
}

View File

@ -1 +1,6 @@
export const CHROME_EXTENSION = false
export const AUDIO_ELEMENT_FOR_PLAY_RESULT = "audio-result"
export const AUDIO_ELEMENT_FOR_TEST_ORIGINAL = "audio-test-original"
export const AUDIO_ELEMENT_FOR_TEST_CONVERTED = "audio-test-converted"

View File

@ -43,7 +43,8 @@ body {
height: 100%;
width: 100%;
color: var(--text-color);
background: linear-gradient(45deg, var(--company-color1) 0, 5%, var(--company-color2) 5% 10%, var(--company-color3) 10% 80%, var(--company-color1) 80% 85%, var(--company-color2) 85% 100%);
/* background: linear-gradient(45deg, var(--company-color1) 0, 5%, var(--company-color2) 5% 10%, var(--company-color3) 10% 80%, var(--company-color1) 80% 85%, var(--company-color2) 85% 100%); */
background: linear-gradient(45deg, var(--company-color1) 0, 1%, var(--company-color2) 1% 5%, var(--company-color3) 5% 80%, var(--company-color1) 80% 85%, var(--company-color2) 85% 100%);
}
#app {
height: 100%;
@ -194,7 +195,12 @@ body {
padding-left: 2rem;
}
.highlight {
background-color: rgba(200, 200, 255, 0.1);
background-color: rgba(200, 200, 255, 0.3);
}
.guided{
/* background-color: rgba(9, 133, 67, 0.3); */
background-color: rgba(159, 165, 162, 0.1);
/* border-bottom: 1px solid rgba(9, 133, 67, 0.3); */
}
.body-top-title {
@ -204,6 +210,12 @@ body {
font-size: 1.5rem;
color: rgb(51, 49, 49);
}
.body-sub-section-title {
font-size: 1.1rem;
font-weight: 700;
color: rgb(3, 53, 12);
}
.body-item-title {
color: rgb(51, 99, 49);
}

View File

@ -0,0 +1,149 @@
import { BufferSize, createDummyMediaStream, Protocol, VoiceChangerMode, VoiceChangerRequestParamas, VoiceChnagerClient } from "@dannadori/voice-changer-client-js"
import { useEffect, useMemo, useRef, useState } from "react"
export type UseClientProps = {
audioContext: AudioContext | null
audioOutputElementId: string
}
export type ClientState = {
clientInitialized: boolean
bufferingTime: number;
responseTime: number;
volume: number;
start: (mmvcServerUrl: string, protocol: Protocol) => Promise<void>;
stop: () => Promise<void>;
changeInput: (audioInput: MediaStream | string, bufferSize: BufferSize, vfForceDisable: boolean) => Promise<void>
changeInputChunkNum: (inputChunkNum: number) => void
changeVoiceChangeMode: (voiceChangerMode: VoiceChangerMode) => void
changeRequestParams: (params: VoiceChangerRequestParamas) => void
}
export const useClient = (props: UseClientProps): ClientState => {
const voiceChangerClientRef = useRef<VoiceChnagerClient | null>(null)
const [clientInitialized, setClientInitialized] = useState<boolean>(false)
const [bufferingTime, setBufferingTime] = useState<number>(0)
const [responseTime, setResponseTime] = useState<number>(0)
const [volume, setVolume] = useState<number>(0)
useEffect(() => {
const initialized = async () => {
if (!props.audioContext) {
return
}
const voiceChangerClient = new VoiceChnagerClient(props.audioContext, true, {
notifySendBufferingTime: (val: number) => {
setBufferingTime(val)
},
notifyResponseTime: (val: number) => {
setResponseTime(val)
},
notifyException: (mes: string) => {
if (mes.length > 0) {
console.log(`error:${mes}`)
}
}
}, {
notifyVolume: (vol: number) => {
setVolume(vol)
}
})
await voiceChangerClient.isInitialized()
voiceChangerClientRef.current = voiceChangerClient
console.log("client initialized!!")
setClientInitialized(true)
const audio = document.getElementById(props.audioOutputElementId) as HTMLAudioElement
audio.srcObject = voiceChangerClientRef.current.stream
audio.play()
}
initialized()
}, [props.audioContext])
const start = useMemo(() => {
return async (mmvcServerUrl: string, protocol: Protocol) => {
if (!voiceChangerClientRef.current) {
console.log("client is not initialized")
return
}
voiceChangerClientRef.current.setServerUrl(mmvcServerUrl, protocol, true)
voiceChangerClientRef.current.start()
}
}, [])
const stop = useMemo(() => {
return async () => {
if (!voiceChangerClientRef.current) {
// console.log("client is not initialized")
return
}
voiceChangerClientRef.current.stop()
}
}, [])
const changeInput = useMemo(() => {
return async (audioInput: MediaStream | string, bufferSize: BufferSize, vfForceDisable: boolean) => {
if (!voiceChangerClientRef.current || !props.audioContext) {
console.log("not initialized", voiceChangerClientRef.current, props.audioContext)
return
}
if (!audioInput || audioInput == "none") {
console.log("setup! 1")
const ms = createDummyMediaStream(props.audioContext)
await voiceChangerClientRef.current.setup(ms, bufferSize, vfForceDisable)
} else {
console.log("setup! 2")
await voiceChangerClientRef.current.setup(audioInput, bufferSize, vfForceDisable)
}
}
}, [props.audioContext])
const changeInputChunkNum = useMemo(() => {
return (inputChunkNum: number) => {
if (!voiceChangerClientRef.current) {
// console.log("client is not initialized")
return
}
voiceChangerClientRef.current.setInputChunkNum(inputChunkNum)
}
}, [])
const changeVoiceChangeMode = useMemo(() => {
return (voiceChangerMode: VoiceChangerMode) => {
if (!voiceChangerClientRef.current) {
// console.log("client is not initialized")
return
}
voiceChangerClientRef.current.setVoiceChangerMode(voiceChangerMode)
}
}, [])
const changeRequestParams = useMemo(() => {
return (params: VoiceChangerRequestParamas) => {
if (!voiceChangerClientRef.current) {
// console.log("client is not initialized")
return
}
voiceChangerClientRef.current.setRequestParams(params)
}
}, [])
return {
clientInitialized,
bufferingTime,
responseTime,
volume,
start,
stop,
changeInput,
changeInputChunkNum,
changeVoiceChangeMode,
changeRequestParams,
}
}

View File

@ -1,179 +0,0 @@
import * as React from "react";
import { createRoot } from "react-dom/client";
import "./css/App.css"
import { useEffect, useMemo, useRef, useState } from "react";
import { VoiceChnagerClient, createDummyMediaStream } from "@dannadori/voice-changer-client-js"
import { useMicrophoneOptions } from "./options_microphone";
const container = document.getElementById("app")!;
const root = createRoot(container);
const App = () => {
const audioContextRef = useRef<AudioContext>()
const voiceChangerClientRef = useRef<VoiceChnagerClient | null>(null)
const [clientInitialized, setClientInitialized] = useState<boolean>(false)
const [bufferingTime, setBufferingTime] = useState<number>(0)
const [responseTime, setResponseTime] = useState<number>(0)
const [volume, setVolume] = useState<number>(0)
const { component: microphoneSettingComponent, options: microphoneOptions, params: microphoneParams, isStarted } = useMicrophoneOptions(audioContextRef.current)
const onClearSettingClicked = async () => {
//@ts-ignore
await chrome.storage.local.clear();
//@ts-ignore
await chrome.storage.sync.clear();
location.reload()
}
useEffect(() => {
const initialized = async () => {
audioContextRef.current = new AudioContext()
voiceChangerClientRef.current = new VoiceChnagerClient(audioContextRef.current, true, {
notifySendBufferingTime: (val: number) => {
setBufferingTime(val)
},
notifyResponseTime: (val: number) => {
setResponseTime(val)
},
notifyException: (mes: string) => {
if (mes.length > 0) {
console.log(`error:${mes}`)
}
}
}, {
notifyVolume: (vol: number) => {
setVolume(vol)
}
})
await voiceChangerClientRef.current.isInitialized()
setClientInitialized(true)
const audio = document.getElementById("audio-output") as HTMLAudioElement
audio.srcObject = voiceChangerClientRef.current.stream
audio.play()
}
initialized()
}, [])
useEffect(() => {
const start = async () => {
if (!voiceChangerClientRef.current || !clientInitialized) {
// console.log("client is not initialized")
return
}
// if (!microphoneOptions.audioInputDeviceId || microphoneOptions.audioInputDeviceId.length == 0) {
// console.log("audioInputDeviceId is not initialized")
// return
// }
// await voiceChangerClientRef.current.setup(microphoneOptions.audioInputDeviceId!, microphoneOptions.bufferSize)
voiceChangerClientRef.current.setServerUrl(microphoneOptions.mmvcServerUrl, microphoneOptions.protocol, true)
voiceChangerClientRef.current.start()
}
const stop = async () => {
if (!voiceChangerClientRef.current || !clientInitialized) {
// console.log("client is not initialized")
return
}
voiceChangerClientRef.current.stop()
}
if (isStarted) {
start()
} else {
stop()
}
}, [isStarted])
useEffect(() => {
const changeInput = async () => {
if (!voiceChangerClientRef.current || !clientInitialized) {
// console.log("client is not initialized")
return
}
if (!microphoneOptions.audioInput || microphoneOptions.audioInput == "none") {
const ms = createDummyMediaStream(audioContextRef.current!)
await voiceChangerClientRef.current.setup(ms, microphoneOptions.bufferSize, microphoneOptions.forceVfDisable)
} else {
await voiceChangerClientRef.current.setup(microphoneOptions.audioInput, microphoneOptions.bufferSize, microphoneOptions.forceVfDisable)
}
}
changeInput()
}, [microphoneOptions.audioInput, microphoneOptions.bufferSize, microphoneOptions.forceVfDisable])
useEffect(() => {
if (!voiceChangerClientRef.current || !clientInitialized) {
// console.log("client is not initialized")
return
}
voiceChangerClientRef.current.setInputChunkNum(microphoneOptions.inputChunkNum)
}, [microphoneOptions.inputChunkNum])
useEffect(() => {
if (!voiceChangerClientRef.current || !clientInitialized) {
// console.log("client is not initialized")
return
}
voiceChangerClientRef.current.setVoiceChangerMode(microphoneOptions.voiceChangerMode)
}, [microphoneOptions.voiceChangerMode])
useEffect(() => {
if (!voiceChangerClientRef.current || !clientInitialized) {
// console.log("client is not initialized")
return
}
voiceChangerClientRef.current.setRequestParams(microphoneParams)
}, [microphoneParams])
const clearRow = useMemo(() => {
return (
<>
<div className="body-row split-3-3-4 left-padding-1 highlight">
<div className="body-item-title">Clear Setting</div>
<div className="body-item-text"></div>
<div className="body-button-container">
<div className="body-button" onClick={onClearSettingClicked}>clear</div>
</div>
</div>
</>
)
}, [])
const performanceRow = useMemo(() => {
return (
<>
<div className="body-row split-3-1-1-1-4 left-padding-1 highlight">
<div className="body-item-title">monitor:</div>
<div className="body-item-text">vol(rms):{volume.toFixed(4)}</div>
<div className="body-item-text">buf(ms):{bufferingTime}</div>
<div className="body-item-text">res(ms):{responseTime}</div>
<div className="body-item-text"></div>
</div>
</>
)
}, [volume, bufferingTime, responseTime])
return (
<div className="body">
<div className="body-row">
<div className="body-top-title">
Voice Changer Setting
</div>
</div>
{clearRow}
{performanceRow}
{microphoneSettingComponent}
<div>
<audio id="audio-output"></audio>
</div>
</div>
)
}
root.render(
<App></App>
);

View File

@ -1,462 +0,0 @@
import * as React from "react";
import { useEffect, useMemo, useState } from "react";
import { CHROME_EXTENSION } from "./const";
import { DefaultVoiceChangerRequestParamas, VoiceChangerOptions, VoiceChangerRequestParamas, DefaultVoiceChangerOptions, SampleRate, BufferSize, VoiceChangerMode, Protocol, fileSelectorAsDataURL, createDummyMediaStream } from "@dannadori/voice-changer-client-js"
const reloadDevices = async () => {
try {
await navigator.mediaDevices.getUserMedia({ video: true, audio: true });
} catch (e) {
console.warn("Enumerate device error::", e)
}
const mediaDeviceInfos = await navigator.mediaDevices.enumerateDevices();
const audioInputs = mediaDeviceInfos.filter(x => { return x.kind == "audioinput" })
audioInputs.push({
deviceId: "none",
groupId: "none",
kind: "audioinput",
label: "none",
toJSON: () => { }
})
audioInputs.push({
deviceId: "file",
groupId: "file",
kind: "audioinput",
label: "file",
toJSON: () => { }
})
const audioOutputs = mediaDeviceInfos.filter(x => { return x.kind == "audiooutput" })
return [audioInputs, audioOutputs]
}
export type MicrophoneOptionsComponent = {
component: JSX.Element,
options: VoiceChangerOptions,
params: VoiceChangerRequestParamas
isStarted: boolean
}
export const useMicrophoneOptions = (audioContext?: AudioContext): MicrophoneOptionsComponent => {
// GUI Info
const [inputAudioDeviceInfo, setInputAudioDeviceInfo] = useState<MediaDeviceInfo[]>([])
const [outputAudioDeviceInfo, setOutputAudioDeviceInfo] = useState<MediaDeviceInfo[]>([])
const [editSpeakerTargetId, setEditSpeakerTargetId] = useState<number>(0)
const [editSpeakerTargetName, setEditSpeakerTargetName] = useState<string>("")
const [audioInput, setAudioInput] = useState<string>("none")
const audioOutputRef = React.useRef<string>("")
const [audioOutput, _setAudioOutput] = useState<string>("none")
const setAudioOutput = (id: string) => {
audioOutputRef.current = id
_setAudioOutput(audioOutputRef.current)
const audio = document.getElementById("audio-output") as HTMLAudioElement
//@ts-ignore
audio.setSinkId(audioOutputRef.current)
}
// const [options, setOptions] = useState<MicrophoneOptionsState>(InitMicrophoneOptionsState)
const [params, setParams] = useState<VoiceChangerRequestParamas>(DefaultVoiceChangerRequestParamas)
const [options, setOptions] = useState<VoiceChangerOptions>(DefaultVoiceChangerOptions)
const [isStarted, setIsStarted] = useState<boolean>(false)
useEffect(() => {
const initialize = async () => {
const audioInfo = await reloadDevices()
setInputAudioDeviceInfo(audioInfo[0])
setOutputAudioDeviceInfo(audioInfo[1])
if (CHROME_EXTENSION) {
//@ts-ignore
const storedOptions = await chrome.storage.local.get("microphoneOptions")
if (storedOptions) {
setOptions(storedOptions)
}
}
}
initialize()
}, [])
useEffect(() => {
const storeOptions = async () => {
if (CHROME_EXTENSION) {
// @ts-ignore
await chrome.storage.local.set({ microphoneOptions: options })
}
}
storeOptions()
}, [options]) // loadより前に持ってくるとstorage内が初期化されるのでだめかも。要検証
const startButtonRow = useMemo(() => {
const onStartClicked = () => {
setIsStarted(true)
}
const onStopClicked = () => {
setIsStarted(false)
}
const startClassName = isStarted ? "body-button-active" : "body-button-stanby"
const stopClassName = isStarted ? "body-button-stanby" : "body-button-active"
return (
<div className="body-row split-3-3-4 left-padding-1">
<div className="body-item-title">Start</div>
<div className="body-button-container">
<div onClick={onStartClicked} className={startClassName}>start</div>
<div onClick={onStopClicked} className={stopClassName}>stop</div>
</div>
<div className="body-input-container">
</div>
</div>
)
}, [isStarted])
const audioInputRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 highlight">
<div className="body-item-title">AudioInput</div>
<div className="body-select-container">
<select className="body-select" value={audioInput} onChange={(e) => { setAudioInput(e.target.value) }}>
{
inputAudioDeviceInfo.map(x => {
return <option key={x.deviceId} value={x.deviceId}>{x.label}</option>
})
}
</select>
</div>
</div>
)
}, [inputAudioDeviceInfo, audioInput])
const audioMediaInputRow = useMemo(() => {
if (audioInput != "file") {
return <></>
}
const onFileLoadClicked = async () => {
const url = await fileSelectorAsDataURL("")
const audio = document.getElementById("body-audio-converted") as HTMLAudioElement
audio.src = url
// audio.volume = 0.0
// audio.onplay = () => {
// //@ts-ignore
// const ms = audio.captureStream()
// setOptions({ ...options, audioInput: ms })
// }
await audio.play()
const src = audioContext!.createMediaElementSource(audio);
const dst = audioContext!.createMediaStreamDestination()
src.connect(dst)
setOptions({ ...options, audioInput: dst.stream })
const audio_org = document.getElementById("body-audio-original") as HTMLAudioElement
audio_org.src = url
audio_org.pause()
audio_org.onplay = () => {
console.log(audioOutputRef.current)
// @ts-ignore
audio_org.setSinkId(audioOutputRef.current)
}
}
return (
<div className="body-row split-3-3-4 left-padding-1 highlight">
<div className="body-item-title"></div>
<div className="body-item-text">
<div>
org:<audio id="body-audio-original" controls></audio>
</div>
<div>
cnv:<audio id="body-audio-converted" controls></audio>
</div>
</div>
<div className="body-button-container">
<div className="body-button" onClick={onFileLoadClicked}>load</div>
</div>
</div>
)
}, [audioInput, audioOutput])
useEffect(() => {
if (!audioContext) {
return
}
if (audioInput == "none") {
const ms = createDummyMediaStream(audioContext)
setOptions({ ...options, audioInput: ms })
} else if (audioInput == "file") {
// const audio = document.getElementById("body-audio") as HTMLAudioElement
// //@ts-ignore
// const ms = audio.captureStream()
// setOptions({ ...options, audioInput: ms })
} else {
setOptions({ ...options, audioInput: audioInput })
}
}, [audioContext, audioInput])
const audioOutputRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 highlight">
<div className="body-item-title">AudioOutput</div>
<div className="body-select-container">
<select className="body-select" value={audioOutput} onChange={(e) => { setAudioOutput(e.target.value) }}>
{
outputAudioDeviceInfo.map(x => {
return <option key={x.deviceId} value={x.deviceId}>{x.label}</option>
})
}
</select>
<audio hidden id="body-output-audio"></audio>
</div>
</div>
)
}, [outputAudioDeviceInfo, audioOutput])
const onSetServerClicked = async () => {
const input = document.getElementById("mmvc-server-url") as HTMLInputElement
setOptions({ ...options, mmvcServerUrl: input.value })
}
const onProtocolChanged = async (val: Protocol) => {
setOptions({ ...options, protocol: val })
}
const onSampleRateChanged = async (val: SampleRate) => {
setOptions({ ...options, sampleRate: val })
}
const onBufferSizeChanged = async (val: BufferSize) => {
setOptions({ ...options, bufferSize: val })
}
const onChunkSizeChanged = async (val: number) => {
setOptions({ ...options, inputChunkNum: val })
}
const onSrcIdChanged = async (val: number) => {
setParams({ ...params, srcId: val })
}
const onDstIdChanged = async (val: number) => {
setParams({ ...params, dstId: val })
}
const onSetSpeakerMappingClicked = async () => {
const targetId = editSpeakerTargetId
const targetName = editSpeakerTargetName
const targetSpeaker = options.speakers.find(x => { return x.id == targetId })
if (targetSpeaker) {
if (targetName.length == 0) { // Delete
const newSpeakers = options.speakers.filter(x => { return x.id != targetId })
options.speakers = newSpeakers
} else { // Update
targetSpeaker.name = targetName
}
} else {
if (targetName.length == 0) { // Noop
} else {// add
options.speakers.push({
id: targetId,
name: targetName
})
}
}
setOptions({ ...options })
}
const onVfEnabledChange = async (val: boolean) => {
setOptions({ ...options, forceVfDisable: val })
}
const onVoiceChangeModeChanged = async (val: VoiceChangerMode) => {
setOptions({ ...options, voiceChangerMode: val })
}
const onGpuChanged = async (val: number) => {
setParams({ ...params, gpu: val })
}
const onCrossFadeLowerValueChanged = async (val: number) => {
setParams({ ...params, crossFadeLowerValue: val })
}
const onCrossFadeOffsetRateChanged = async (val: number) => {
setParams({ ...params, crossFadeOffsetRate: val })
}
const onCrossFadeEndRateChanged = async (val: number) => {
setParams({ ...params, crossFadeEndRate: val })
}
const settings = useMemo(() => {
return (
<>
<div className="body-row left-padding-1">
<div className="body-section-title">Virtual Microphone</div>
</div>
{startButtonRow}
<div className="body-row split-3-3-4 left-padding-1">
<div className="body-item-title">MMVC Server</div>
<div className="body-input-container">
<input type="text" defaultValue={options.mmvcServerUrl} id="mmvc-server-url" className="body-item-input" />
</div>
<div className="body-button-container">
<div className="body-button" onClick={onSetServerClicked}>set</div>
</div>
</div>
<div className="body-row split-3-7 left-padding-1 highlight">
<div className="body-item-title">Protocol</div>
<div className="body-select-container">
<select className="body-select" value={options.protocol} onChange={(e) => {
onProtocolChanged(e.target.value as
Protocol)
}}>
{
Object.values(Protocol).map(x => {
return <option key={x} value={x}>{x}</option>
})
}
</select>
</div>
</div>
{audioInputRow}
{audioMediaInputRow}
{audioOutputRow}
<div className="body-row split-3-7 left-padding-1 highlight">
<div className="body-item-title">Sample Rate</div>
<div className="body-select-container">
<select className="body-select" value={options.sampleRate} onChange={(e) => { onSampleRateChanged(Number(e.target.value) as SampleRate) }}>
{
Object.values(SampleRate).map(x => {
return <option key={x} value={x}>{x}</option>
})
}
</select>
</div>
</div>
<div className="body-row split-3-7 left-padding-1 highlight">
<div className="body-item-title">Buffer Size</div>
<div className="body-select-container">
<select className="body-select" value={options.bufferSize} onChange={(e) => { onBufferSizeChanged(Number(e.target.value) as BufferSize) }}>
{
Object.values(BufferSize).map(x => {
return <option key={x} value={x}>{x}</option>
})
}
</select>
</div>
</div>
<div className="body-row split-3-7 left-padding-1 highlight">
<div className="body-item-title">Chunk Num(128sample/chunk)</div>
<div className="body-input-container">
<input type="number" min={1} max={256} step={1} value={options.inputChunkNum} onChange={(e) => { onChunkSizeChanged(Number(e.target.value)) }} />
</div>
</div>
<div className="body-row split-3-3-4 left-padding-1 highlight">
<div className="body-item-title">VF Enabled</div>
<div>
<input type="checkbox" checked={options.forceVfDisable} onChange={(e) => onVfEnabledChange(e.target.checked)} />
</div>
<div className="body-button-container">
</div>
</div>
<div className="body-row split-3-7 left-padding-1 highlight">
<div className="body-item-title">Voice Change Mode</div>
<div className="body-select-container">
<select className="body-select" value={options.voiceChangerMode} onChange={(e) => { onVoiceChangeModeChanged(e.target.value as VoiceChangerMode) }}>
{
Object.values(VoiceChangerMode).map(x => {
return <option key={x} value={x}>{x}</option>
})
}
</select>
</div>
</div>
<div className="body-row split-3-7 left-padding-1 highlight">
<div className="body-item-title">Source Speaker Id</div>
<div className="body-select-container">
<select className="body-select" value={params.srcId} onChange={(e) => { onSrcIdChanged(Number(e.target.value)) }}>
{
options.speakers.map(x => {
return <option key={x.id} value={x.id}>{x.name}({x.id})</option>
})
}
</select>
</div>
</div>
<div className="body-row split-3-7 left-padding-1 highlight">
<div className="body-item-title">Destination Speaker Id</div>
<div className="body-select-container">
<select className="body-select" value={params.dstId} onChange={(e) => { onDstIdChanged(Number(e.target.value)) }}>
{
options.speakers.map(x => {
return <option key={x.id} value={x.id}>{x.name}({x.id})</option>
})
}
</select>
</div>
</div>
<div className="body-row split-3-1-2-4 left-padding-1 highlight">
<div className="body-item-title">Edit Speaker Mapping</div>
<div className="body-input-container">
<input type="number" min={1} max={256} step={1} value={editSpeakerTargetId} onChange={(e) => {
const id = Number(e.target.value)
setEditSpeakerTargetId(id)
setEditSpeakerTargetName(options.speakers.find(x => { return x.id == id })?.name || "")
}} />
</div>
<div className="body-input-container">
<input type="text" value={editSpeakerTargetName} onChange={(e) => { setEditSpeakerTargetName(e.target.value) }} />
</div>
<div className="body-button-container">
<div className="body-button" onClick={onSetSpeakerMappingClicked}>set</div>
</div>
</div>
<div className="body-row split-3-7 left-padding-1 highlight">
<div className="body-item-title">GPU</div>
<div className="body-input-container">
<input type="number" min={-1} max={5} step={1} value={params.gpu} onChange={(e) => { onGpuChanged(Number(e.target.value)) }} />
</div>
</div>
<div className="body-row split-3-7 left-padding-1 highlight">
<div className="body-item-title">Cross Fade Lower Val</div>
<div className="body-input-container">
<input type="number" min={0} max={1} step={0.1} value={params.crossFadeLowerValue} onChange={(e) => { onCrossFadeLowerValueChanged(Number(e.target.value)) }} />
</div>
</div>
<div className="body-row split-3-7 left-padding-1 highlight">
<div className="body-item-title">Cross Fade Offset Rate</div>
<div className="body-input-container">
<input type="number" min={0} max={1} step={0.1} value={params.crossFadeOffsetRate} onChange={(e) => { onCrossFadeOffsetRateChanged(Number(e.target.value)) }} />
</div>
</div>
<div className="body-row split-3-7 left-padding-1 highlight">
<div className="body-item-title">Cross Fade End Rate</div>
<div className="body-input-container">
<input type="number" min={0} max={1} step={0.1} value={params.crossFadeEndRate} onChange={(e) => { onCrossFadeEndRateChanged(Number(e.target.value)) }} />
</div>
</div>
</>
)
}, [inputAudioDeviceInfo, outputAudioDeviceInfo, editSpeakerTargetId, editSpeakerTargetName, startButtonRow, audioInputRow, audioMediaInputRow, audioOutputRow, params, options])
return {
component: settings,
params: params,
options: options,
isStarted
}
}

View File

@ -3,7 +3,7 @@ const HtmlWebpackPlugin = require("html-webpack-plugin");
const CopyPlugin = require("copy-webpack-plugin");
module.exports = {
mode: "production",
entry: "./src/index.tsx",
entry: "./src/000_index.tsx",
resolve: {
extensions: [".ts", ".tsx", ".js"],
},

View File

@ -1,12 +1,12 @@
{
"name": "@dannadori/voice-changer-client-js",
"version": "1.0.3",
"version": "1.0.5",
"lockfileVersion": 2,
"requires": true,
"packages": {
"": {
"name": "@dannadori/voice-changer-client-js",
"version": "1.0.3",
"version": "1.0.5",
"license": "ISC",
"dependencies": {
"@types/readable-stream": "^2.3.15",

View File

@ -1,6 +1,6 @@
{
"name": "@dannadori/voice-changer-client-js",
"version": "1.0.3",
"version": "1.0.5",
"description": "",
"main": "dist/index.js",
"directories": {

View File

@ -3,7 +3,7 @@ import { VoiceChangerWorkletNode, VolumeListener } from "./VoiceChangerWorkletNo
import workerjs from "raw-loader!../worklet/dist/index.js";
import { VoiceFocusDeviceTransformer, VoiceFocusTransformDevice } from "amazon-chime-sdk-js";
import { createDummyMediaStream } from "./util";
import { BufferSize, DefaultVoiceChangerOptions, DefaultVoiceChangerRequestParamas, Protocol, VoiceChangerMode, VoiceChangerRequestParamas } from "./const";
import { BufferSize, DefaultVoiceChangerOptions, DefaultVoiceChangerRequestParamas, Protocol, VoiceChangerMode, VoiceChangerRequestParamas, VOICE_CHANGER_CLIENT_EXCEPTION } from "./const";
import MicrophoneStream from "microphone-stream";
import { AudioStreamer, Callbacks, AudioStreamerListeners } from "./AudioStreamer";
@ -152,7 +152,10 @@ export class VoiceChnagerClient {
}
start = () => {
if (!this.micStream) { return }
if (!this.micStream) {
throw `Exception:${VOICE_CHANGER_CLIENT_EXCEPTION.ERR_MIC_STREAM_NOT_INITIALIZED}`
return
}
this.micStream.playRecording()
this._isVoiceChanging = true
}

View File

@ -13,6 +13,7 @@ export type VoiceChangerRequestParamas = {
crossFadeLowerValue: number,
crossFadeOffsetRate: number,
crossFadeEndRate: number,
}
export type VoiceChangerOptions = {
@ -25,6 +26,8 @@ export type VoiceChangerOptions = {
speakers: Speaker[],
forceVfDisable: boolean,
voiceChangerMode: VoiceChangerMode,
OnnxExecutionProvider: OnnxExecutionProvider,
Framework: Framework
}
@ -63,10 +66,23 @@ export const BufferSize = {
} as const
export type BufferSize = typeof BufferSize[keyof typeof BufferSize]
export const OnnxExecutionProvider = {
"CPUExecutionProvider": "CPUExecutionProvider",
"CUDAExecutionProvider": "CUDAExecutionProvider",
"DmlExecutionProvider": "DmlExecutionProvider",
"OpenVINOExecutionProvider": "OpenVINOExecutionProvider",
} as const
export type OnnxExecutionProvider = typeof OnnxExecutionProvider[keyof typeof OnnxExecutionProvider]
export const Framework = {
"PyTorch": "PyTorch",
"ONNX": "ONNX",
}
export type Framework = typeof Framework[keyof typeof Framework]
// Defaults
export const DefaultVoiceChangerRequestParamas: VoiceChangerRequestParamas = {
convertChunkNum: 1, //(★1)
convertChunkNum: 32, //(★1)
srcId: 107,
dstId: 100,
gpu: 0,
@ -105,14 +121,18 @@ export const DefaultVoiceChangerOptions: VoiceChangerOptions = {
}
],
forceVfDisable: false,
voiceChangerMode: "realtime"
voiceChangerMode: "realtime",
Framework: "PyTorch",
OnnxExecutionProvider: "CPUExecutionProvider"
}
export const VOICE_CHANGER_CLIENT_EXCEPTION = {
ERR_SIO_CONNECT_FAILED: "ERR_SIO_CONNECT_FAILED",
ERR_SIO_INVALID_RESPONSE: "ERR_SIO_INVALID_RESPONSE",
ERR_REST_INVALID_RESPONSE: "ERR_REST_INVALID_RESPONSE"
ERR_REST_INVALID_RESPONSE: "ERR_REST_INVALID_RESPONSE",
ERR_MIC_STREAM_NOT_INITIALIZED: "ERR_MIC_STREAM_NOT_INITIALIZED"
} as const
export type VOICE_CHANGER_CLIENT_EXCEPTION = typeof VOICE_CHANGER_CLIENT_EXCEPTION[keyof typeof VOICE_CHANGER_CLIENT_EXCEPTION]

View File

@ -1,3 +1,4 @@
export * from "./const"
export * from "./VoiceChangerClient"
export * from "./util"
export * from "./uploader"

107
client/lib/src/uploader.ts Normal file
View File

@ -0,0 +1,107 @@
const DEBUG = true
const DEBUG_BASE_URL = "http://localhost:18888"
type FileChunk = {
hash: number,
chunk: Blob
}
const uploadLargeFile = async (baseUrl: string, file: File, onprogress: (progress: number, end: boolean) => void) => {
const uploadURL = DEBUG ? `${DEBUG_BASE_URL}/upload_file` : `${baseUrl}/upload_file`
onprogress(0, false)
const size = 1024 * 1024;
const fileChunks: FileChunk[] = [];
let index = 0; // index値
for (let cur = 0; cur < file.size; cur += size) {
fileChunks.push({
hash: index++,
chunk: file.slice(cur, cur + size),
});
}
const chunkNum = fileChunks.length
console.log("FILE_CHUNKS:", chunkNum, fileChunks)
while (true) {
const promises: Promise<void>[] = []
for (let i = 0; i < 10; i++) {
const chunk = fileChunks.shift()
if (!chunk) {
break
}
const p = new Promise<void>((resolve) => {
const formData = new FormData();
formData.append("file", chunk.chunk);
formData.append("filename", `${file.name}_${chunk.hash}`);
const request = new Request(uploadURL, {
method: 'POST',
body: formData,
});
fetch(request).then(async (response) => {
console.log(await response.text())
resolve()
})
})
promises.push(p)
}
await Promise.all(promises)
if (fileChunks.length == 0) {
break
}
onprogress(Math.floor(((chunkNum - fileChunks.length) / (chunkNum + 1)) * 100), false)
}
return chunkNum
}
export const uploadModelProps = async (baseUrl: string, modelFile: File, configFile: File, onprogress: (progress: number, end: boolean) => void) => {
const uploadURL = DEBUG ? `${DEBUG_BASE_URL}/upload_file` : `${baseUrl}/upload_file`
const loadModelURL = DEBUG ? `${DEBUG_BASE_URL}/load_model` : `${baseUrl}/load_model`
onprogress(0, false)
const chunkNum = await uploadLargeFile(baseUrl, modelFile, (progress: number, _end: boolean) => {
onprogress(progress, false)
})
console.log("model uploaded")
const configP = new Promise<void>((resolve) => {
const formData = new FormData();
formData.append("file", configFile);
formData.append("filename", configFile.name);
const request = new Request(uploadURL, {
method: 'POST',
body: formData,
});
fetch(request).then(async (response) => {
console.log(await response.text())
resolve()
})
})
await configP
console.log("config uploaded")
const loadP = new Promise<void>((resolve) => {
const formData = new FormData();
formData.append("modelFilename", modelFile.name);
formData.append("modelFilenameChunkNum", "" + chunkNum);
formData.append("configFilename", configFile.name);
const request = new Request(loadModelURL, {
method: 'POST',
body: formData,
});
fetch(request).then(async (response) => {
console.log(await response.text())
resolve()
})
})
await loadP
onprogress(100, true)
console.log("model loaded")
}

View File

@ -55,3 +55,5 @@ export const fileSelectorAsDataURL = async (regex: string) => {
})
return url
}

35
docker_onnx/Dockerfile Normal file
View File

@ -0,0 +1,35 @@
FROM debian:bullseye-slim
ARG DEBIAN_FRONTEND=noninteractive
RUN apt-get update && \
apt-get install -y \
python3-pip \
git \
wget \
emacs \
protobuf-compiler \
cmake \
libsndfile1-dev
RUN pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu113
RUN pip install Cython==0.29.32 pip install librosa==0.9.2 \
&& pip install librosa==0.9.2 \
&& pip install Unidecode==1.3.4 \
&& pip install phonemizer==3.2.1 \
&& pip install onnx \
&& pip install onnxsim \
&& pip install onnxruntime \
&& pip install onnx-simplifier \
RUN pip install --upgrade protobuf
RUN git clone https://github.com/isletennos/MMVC_Trainer.git -b v1.3.2.1
WORKDIR /MMVC_Trainer/monotonic_align
RUN cythonize -3 -i core.pyx \
&& mv core.cpython-39-x86_64-linux-gnu.so monotonic_align/
WORKDIR /MMVC_Trainer

View File

@ -5,12 +5,12 @@
"main": "index.js",
"scripts": {
"build:docker": "date +%Y%m%d%H%M%S > docker/dummy && DOCKER_BUILDKIT=1 docker build -f docker/Dockerfile docker/ -t voice-changer",
"build:docker_arm64": "date +%Y%m%d%H%M%S > docker/dummy && DOCKER_BUILDKIT=1 docker build -f docker/Dockerfile_arm64 docker/ -t voice-changer-arm64",
"build:docker:onnx": "DOCKER_BUILDKIT=1 docker build -f docker_onnx/Dockerfile docker/ -t onnx-converter",
"copy:frontend":"docker run -v `pwd`/frontend/dist:/frontend/dist --entrypoint /bin/bash -ti voice-changer -c \"cp -r /voice-changer-internal/frontend/dist/* /frontend/dist\"",
"copy:backend":"docker run -v `pwd`/demo:/demo --entrypoint /bin/bash -ti voice-changer -c \"cp -r /voice-changer-internal/voice-change-service/* /demo/ && rm -rf /demo/MMVC_Trainer/.git && rm -rf /demo/MMVC_Trainer/.gitignore \"",
"create:demo":"run-p copy:frontend copy:backend",
"push:docker": "bash script/001_pushDocker.sh",
"push:docker_arm64": "bash script/002_pushDocker_arm64.sh",
"test": "echo \"Error: no test specified\" && exit 1"
},
"repository": {

View File

@ -35,6 +35,7 @@ def setupArgParser():
parser.add_argument("-p", type=int, default=8080, help="port")
parser.add_argument("-c", type=str, help="path for the config.json")
parser.add_argument("-m", type=str, help="path for the model file")
parser.add_argument("-o", type=str, help="path for the onnx model file")
parser.add_argument("--https", type=strtobool,
default=False, help="use https")
parser.add_argument("--httpsKey", type=str,
@ -75,6 +76,7 @@ if __name__ == thisFilename or args.colab == True:
PORT = args.p
CONFIG = args.c
MODEL = args.m
ONNX_MODEL = args.o if args.o != None else None
# if os.getenv("EX_TB_PORT"):
# EX_TB_PORT = os.environ["EX_TB_PORT"]
@ -82,7 +84,7 @@ if __name__ == thisFilename or args.colab == True:
voiceChangerManager = VoiceChangerManager.get_instance()
if CONFIG and MODEL:
voiceChangerManager.loadModel(CONFIG, MODEL)
voiceChangerManager.loadModel(CONFIG, MODEL, ONNX_MODEL)
app_fastapi = MMVC_Rest.get_instance(voiceChangerManager)
app_socketio = MMVC_SocketIOApp.get_instance(app_fastapi, voiceChangerManager)
@ -96,13 +98,13 @@ if __name__ == '__main__':
PORT = args.p
CONFIG = args.c
MODEL = args.m
ONNX_MODEL = args.o if args.o != None else None
if TYPE != "MMVC" and TYPE != "TRAIN":
print("Type(-t) should be MMVC or TRAIN")
exit(1)
printMessage(f"Start MMVC SocketIO Server", level=0)
printMessage(f"CONFIG:{CONFIG}, MODEL:{MODEL}", level=1)
printMessage(f"CONFIG:{CONFIG}, MODEL:{MODEL} ONNX_MODEL:{ONNX_MODEL}", level=1)
if args.colab == False:
if os.getenv("EX_PORT"):

View File

@ -17,3 +17,4 @@ tqdm==4.64.1
Unidecode==1.3.6
uvicorn==0.20.0
websockets==10.4
onnxruntime==1.13.1

View File

@ -7,7 +7,6 @@ from const import frontend_path
class MMVC_SocketIOApp():
@classmethod
def get_instance(cls, app_fastapi, voiceChangerManager:VoiceChangerManager):
print("INDEX:::", f'${frontend_path}/index.html')
if not hasattr(cls, "_instance"):
sio = MMVC_SocketIOServer.get_instance(voiceChangerManager)
app_socketio = socketio.ASGIApp(

View File

@ -13,11 +13,14 @@ from data_utils import TextAudioSpeakerLoader, TextAudioSpeakerCollate
from mel_processing import spectrogram_torch
from text import text_to_sequence, cleaned_text_to_sequence
import onnxruntime
# providers = ['OpenVINOExecutionProvider',"CUDAExecutionProvider","DmlExecutionProvider", "CPUExecutionProvider"]
providers = ['OpenVINOExecutionProvider',"CUDAExecutionProvider","DmlExecutionProvider"]
class VoiceChanger():
def __init__(self, config, model):
# def __init__(self, config, model, onnx_model=None, providers=["CPUExecutionProvider"]):
def __init__(self, config, model, onnx_model=None):
self.hps = utils.get_hparams_from_file(config)
self.net_g = SynthesizerTrn(
len(symbols),
@ -43,8 +46,23 @@ class VoiceChanger():
self.crossFadeEndRate = 0
self.unpackedData_length = 0
if onnx_model != None:
ort_options = onnxruntime.SessionOptions()
ort_options.intra_op_num_threads = 8
# ort_options.execution_mode = onnxruntime.ExecutionMode.ORT_SEQUENTIAL
# ort_options.execution_mode = onnxruntime.ExecutionMode.ORT_PARALLEL
# ort_options.inter_op_num_threads = 8
self.onnx_session = onnxruntime.InferenceSession(
onnx_model,
# sess_options=ort_options,
providers=providers,
)
print("ONNX_MDEOL!1", self.onnx_session.get_providers())
def destroy(self):
del self.net_g
del self.onnx_session
@ -57,6 +75,8 @@ class VoiceChanger():
# print(f"Convert sample_num = {128 * convertChunkNum} (128 * {convertChunkNum}) is less than input sample_num x2 ({unpackedData.shape[0]}) x2. Chage to {unpackedData.shape[0] * 2} samples")
convertSize = unpackedData.shape[0] * 2
print("convert Size", convertChunkNum, convertSize)
if self.crossFadeOffsetRate != crossFadeOffsetRate or self.crossFadeEndRate != crossFadeEndRate or self.unpackedData_length != unpackedData.shape[0]:
@ -71,11 +91,11 @@ class VoiceChanger():
np_prev_strength = np.cos(percent * 0.5 * np.pi) ** 2
np_cur_strength = np.cos((1-percent) * 0.5 * np.pi) ** 2
np_prev_strength = np.concatenate([np.ones(cf_offset), np_prev_strength, np.zeros(unpackedData.shape[0]-cf_offset-len(np_prev_strength))])
np_cur_strength = np.concatenate([np.zeros(cf_offset), np_cur_strength, np.ones(unpackedData.shape[0]-cf_offset-len(np_cur_strength))])
self.np_prev_strength = np.concatenate([np.ones(cf_offset), np_prev_strength, np.zeros(unpackedData.shape[0]-cf_offset-len(np_prev_strength))])
self.np_cur_strength = np.concatenate([np.zeros(cf_offset), np_cur_strength, np.ones(unpackedData.shape[0]-cf_offset-len(np_cur_strength))])
self.prev_strength = torch.FloatTensor(np_prev_strength)
self.cur_strength = torch.FloatTensor(np_cur_strength)
self.prev_strength = torch.FloatTensor(self.np_prev_strength)
self.cur_strength = torch.FloatTensor(self.np_cur_strength)
torch.set_printoptions(edgeitems=2100)
print("Generated Strengths")
@ -108,7 +128,32 @@ class VoiceChanger():
data = TextAudioSpeakerCollate()([data])
# if gpu < 0 or (self.gpu_num == 0 and not self.mps_enabled):
if gpu < 0 or self.gpu_num == 0:
if gpu == -2 and hasattr(self, 'onnx_session') == True:
x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x for x in data]
sid_tgt1 = torch.LongTensor([dstId])
# if spec.size()[2] >= 8:
audio1 = self.onnx_session.run(
["audio"],
{
"specs": spec.numpy(),
"lengths": spec_lengths.numpy(),
"sid_src": sid_src.numpy(),
"sid_tgt": sid_tgt1.numpy()
})[0][0,0] * self.hps.data.max_wav_value
if hasattr(self, 'np_prev_audio1') == True:
prev = self.np_prev_audio1[-1*unpackedData.shape[0]:]
cur = audio1[-2*unpackedData.shape[0]:-1*unpackedData.shape[0]]
# print(prev.shape, self.np_prev_strength.shape, cur.shape, self.np_cur_strength.shape)
powered_prev = prev * self.np_prev_strength
powered_cur = cur * self.np_cur_strength
result = powered_prev + powered_cur
#result = prev * self.np_prev_strength + cur * self.np_cur_strength
else:
cur = audio1[-2*unpackedData.shape[0]:-1*unpackedData.shape[0]]
result = cur
self.np_prev_audio1 = audio1
elif gpu < 0 or self.gpu_num == 0:
with torch.no_grad():
x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [
x.cpu() for x in data]
@ -173,69 +218,10 @@ class VoiceChanger():
except Exception as e:
print("VC PROCESSING!!!! EXCEPTION!!!", e)
print(traceback.format_exc())
del self.np_prev_audio1
del self.prev_audio1
result = result.astype(np.int16)
# print("on_request result size:",result.shape)
return result
def on_request_old(self, gpu, srcId, dstId, timestamp, prefixChunkSize, wav):
unpackedData = wav
convertSize = unpackedData.shape[0] + (prefixChunkSize * 512)
try:
audio = torch.FloatTensor(unpackedData.astype(np.float32))
audio_norm = audio / self.hps.data.max_wav_value
audio_norm = audio_norm.unsqueeze(0)
self.audio_buffer = torch.cat(
[self.audio_buffer, audio_norm], axis=1)
audio_norm = self.audio_buffer[:, -convertSize:]
self.audio_buffer = audio_norm
spec = spectrogram_torch(audio_norm, self.hps.data.filter_length,
self.hps.data.sampling_rate, self.hps.data.hop_length, self.hps.data.win_length,
center=False)
spec = torch.squeeze(spec, 0)
sid = torch.LongTensor([int(srcId)])
data = (self.text_norm, spec, audio_norm, sid)
data = TextAudioSpeakerCollate()([data])
# if gpu < 0 or (self.gpu_num == 0 and not self.mps_enabled):
if gpu < 0 or self.gpu_num == 0:
with torch.no_grad():
x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [
x.cpu() for x in data]
sid_tgt1 = torch.LongTensor([dstId]).cpu()
audio1 = (self.net_g.cpu().voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt1)[
0][0, 0].data * self.hps.data.max_wav_value).cpu().float().numpy()
# elif self.mps_enabled == True: # MPS doesnt support aten::weight_norm_interface, and PYTORCH_ENABLE_MPS_FALLBACK=1 cause a big dely.
# x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [
# x.to("mps") for x in data]
# sid_tgt1 = torch.LongTensor([dstId]).to("mps")
# audio1 = (self.net_g.to("mps").voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt1)[
# 0][0, 0].data * self.hps.data.max_wav_value).cpu().float().numpy()
else:
with torch.no_grad():
x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [
x.cuda(gpu) for x in data]
sid_tgt1 = torch.LongTensor([dstId]).cuda(gpu)
audio1 = (self.net_g.cuda(gpu).voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt1)[
0][0, 0].data * self.hps.data.max_wav_value).cpu().float().numpy()
# if len(self.prev_audio) > unpackedData.shape[0]:
# prevLastFragment = self.prev_audio[-unpackedData.shape[0]:]
# curSecondLastFragment = audio1[-unpackedData.shape[0]*2:-unpackedData.shape[0]]
# print("prev, cur", prevLastFragment.shape, curSecondLastFragment.shape)
# self.prev_audio = audio1
# print("self.prev_audio", self.prev_audio.shape)
audio1 = audio1[-unpackedData.shape[0]*2:]
except Exception as e:
print("VC PROCESSING!!!! EXCEPTION!!!", e)
print(traceback.format_exc())
audio1 = audio1.astype(np.int16)
return audio1

View File

@ -8,10 +8,10 @@ class VoiceChangerManager():
cls._instance = cls()
return cls._instance
def loadModel(self, config, model):
def loadModel(self, config, model, onnx_model):
if hasattr(self, 'voiceChanger') == True:
self.voiceChanger.destroy()
self.voiceChanger = VoiceChanger(config, model)
self.voiceChanger = VoiceChanger(config, model, onnx_model)
def changeVoice(self, gpu, srcId, dstId, timestamp, convertChunkNum, crossFadeLowerValue, crossFadeOffsetRate, crossFadeEndRate, unpackedData):
if hasattr(self, 'voiceChanger') == True: