mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-01-23 13:35:12 +03:00
add quality control
This commit is contained in:
parent
852b4216ca
commit
f73178b9b0
23
client/demo/dist/index.js
vendored
23
client/demo/dist/index.js
vendored
File diff suppressed because one or more lines are too long
@ -8,6 +8,7 @@ import { useAdvancedSetting } from "./105_advanced_setting";
|
||||
import { useSpeakerSetting } from "./103_speaker_setting";
|
||||
import { useServerControl } from "./106_server_control";
|
||||
import { useClient } from "@dannadori/voice-changer-client-js";
|
||||
import { useQualityControl } from "./107_qulity_control";
|
||||
|
||||
export const useMicrophoneOptions = () => {
|
||||
const [audioContext, setAudioContext] = useState<AudioContext | null>(null)
|
||||
@ -23,6 +24,7 @@ export const useMicrophoneOptions = () => {
|
||||
const convertSetting = useConvertSetting({ clientState })
|
||||
const advancedSetting = useAdvancedSetting({ clientState })
|
||||
const serverControl = useServerControl({ clientState })
|
||||
const qualityControl = useQualityControl({ clientState })
|
||||
|
||||
const clearSetting = async () => {
|
||||
await clientState.clearSetting()
|
||||
@ -51,6 +53,7 @@ export const useMicrophoneOptions = () => {
|
||||
{serverControl.serverControl}
|
||||
{serverSetting.serverSetting}
|
||||
{deviceSetting.deviceSetting}
|
||||
{qualityControl.qualityControl}
|
||||
{speakerSetting.speakerSetting}
|
||||
{convertSetting.convertSetting}
|
||||
{advancedSetting.advancedSetting}
|
||||
@ -61,7 +64,8 @@ export const useMicrophoneOptions = () => {
|
||||
deviceSetting.deviceSetting,
|
||||
speakerSetting.speakerSetting,
|
||||
convertSetting.convertSetting,
|
||||
advancedSetting.advancedSetting])
|
||||
advancedSetting.advancedSetting,
|
||||
qualityControl.qualityControl])
|
||||
|
||||
return {
|
||||
voiceChangerSetting,
|
||||
|
@ -129,14 +129,13 @@ export const useSpeakerSetting = (props: UseSpeakerSettingProps) => {
|
||||
<div className="body-row split-3-2-1-4 left-padding-1 guided">
|
||||
<div className="body-item-title left-padding-1">F0 Factor</div>
|
||||
<div className="body-input-container">
|
||||
<input type="range" className="body-item-input" min="0.1" max="5.0" step="0.1" value={props.clientState.serverSetting.setting.f0Factor} onChange={(e) => {
|
||||
<input type="range" className="body-item-input-slider" min="0.1" max="5.0" step="0.1" value={props.clientState.serverSetting.setting.f0Factor} onChange={(e) => {
|
||||
props.clientState.serverSetting.setF0Factor(Number(e.target.value))
|
||||
}}></input>
|
||||
<span className="body-item-input-slider-val">{props.clientState.serverSetting.setting.f0Factor}</span>
|
||||
</div>
|
||||
<div className="body-item-text">
|
||||
<div>{props.clientState.serverSetting.setting.f0Factor}</div>
|
||||
</div>
|
||||
<div className="body-item-text">recommended:{recommendedF0Factor.toFixed(1)}</div>
|
||||
<div className="body-item-text"></div>
|
||||
<div className="body-item-text">recommend: {recommendedF0Factor.toFixed(1)}</div>
|
||||
</div>
|
||||
)
|
||||
}, [props.clientState.serverSetting.setting.f0Factor, props.clientState.serverSetting.setting.srcId, props.clientState.serverSetting.setting.dstId, props.clientState.clientSetting.setting.correspondences, props.clientState.serverSetting.setF0Factor])
|
||||
|
@ -1,4 +1,4 @@
|
||||
import { BufferSize, DownSamplingMode, Protocol, SampleRate, VoiceChangerMode } from "@dannadori/voice-changer-client-js"
|
||||
import { BufferSize, DownSamplingMode, F0Detector, Protocol, SampleRate, VoiceChangerMode } from "@dannadori/voice-changer-client-js"
|
||||
import React, { useMemo, useState } from "react"
|
||||
import { ClientState } from "@dannadori/voice-changer-client-js";
|
||||
|
||||
@ -162,21 +162,6 @@ export const useAdvancedSetting = (props: UseAdvancedSettingProps): AdvancedSett
|
||||
}, [props.clientState.serverSetting.setting.crossFadeEndRate, props.clientState.serverSetting.setCrossFadeEndRate])
|
||||
|
||||
|
||||
const vfForceDisableRow = useMemo(() => {
|
||||
return (
|
||||
<div className="body-row split-3-3-4 left-padding-1 guided">
|
||||
<div className="body-item-title left-padding-1 ">VF Disabled</div>
|
||||
<div>
|
||||
<input type="checkbox" checked={props.clientState.clientSetting.setting.forceVfDisable} onChange={(e) => {
|
||||
props.clientState.clientSetting.setVfForceDisabled(e.target.checked)
|
||||
}} />
|
||||
</div>
|
||||
<div className="body-button-container">
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}, [props.clientState.clientSetting.setting.forceVfDisable, props.clientState.clientSetting.setVfForceDisabled])
|
||||
|
||||
const voiceChangeModeRow = useMemo(() => {
|
||||
return (
|
||||
<div className="body-row split-3-7 left-padding-1 guided">
|
||||
@ -217,7 +202,6 @@ export const useAdvancedSetting = (props: UseAdvancedSettingProps): AdvancedSett
|
||||
}, [props.clientState.clientSetting.setting.downSamplingMode, props.clientState.clientSetting.setDownSamplingMode])
|
||||
|
||||
|
||||
|
||||
const workletSettingRow = useMemo(() => {
|
||||
return (
|
||||
<>
|
||||
@ -280,15 +264,15 @@ export const useAdvancedSetting = (props: UseAdvancedSettingProps): AdvancedSett
|
||||
{crossFadeOffsetRateRow}
|
||||
{crossFadeEndRateRow}
|
||||
<div className="body-row divider"></div>
|
||||
{vfForceDisableRow}
|
||||
{voiceChangeModeRow}
|
||||
<div className="body-row divider"></div>
|
||||
{workletSettingRow}
|
||||
<div className="body-row divider"></div>
|
||||
{downSamplingModeRow}
|
||||
|
||||
</>
|
||||
)
|
||||
}, [showAdvancedSetting, mmvcServerUrlRow, protocolRow, sampleRateRow, bufferSizeRow, convertChunkNumRow, minConvertSizeRow, crossFadeOverlapRateRow, crossFadeOffsetRateRow, crossFadeEndRateRow, vfForceDisableRow, voiceChangeModeRow, workletSettingRow, downSamplingModeRow])
|
||||
}, [showAdvancedSetting, mmvcServerUrlRow, protocolRow, sampleRateRow, bufferSizeRow, convertChunkNumRow, minConvertSizeRow, crossFadeOverlapRateRow, crossFadeOffsetRateRow, crossFadeEndRateRow, voiceChangeModeRow, workletSettingRow, downSamplingModeRow])
|
||||
|
||||
|
||||
const advancedSetting = useMemo(() => {
|
||||
|
@ -30,31 +30,13 @@ export const useServerControl = (props: UseServerControlProps) => {
|
||||
<div onClick={onStopClicked} className={stopClassName}>stop</div>
|
||||
</div>
|
||||
<div>
|
||||
<div className="body-input-container split-4-4-2">
|
||||
<div>gain(in)</div>
|
||||
<input type="range" className="body-item-input" min="0.0" max="1.0" step="0.1" value={props.clientState.clientSetting.setting.inputGain} onChange={(e) => {
|
||||
props.clientState.clientSetting.setInputGain(Number(e.target.value))
|
||||
}}></input>
|
||||
<div>{props.clientState.clientSetting.setting.inputGain}</div>
|
||||
</div>
|
||||
<div className="body-input-container split-4-4-2">
|
||||
<div>gain(out)</div>
|
||||
<input type="range" className="body-item-input" min="0.0" max="1.0" step="0.1" value={props.clientState.clientSetting.setting.outputGain} onChange={(e) => {
|
||||
props.clientState.clientSetting.setOutputGain(Number(e.target.value))
|
||||
}}></input>
|
||||
<div>{props.clientState.clientSetting.setting.outputGain}</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="body-input-container">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
)
|
||||
}, [isStarted, props.clientState.clientSetting.start, props.clientState.clientSetting.stop,
|
||||
props.clientState.clientSetting.setInputGain, props.clientState.clientSetting.setting.inputGain,
|
||||
props.clientState.clientSetting.setOutputGain, props.clientState.clientSetting.setting.outputGain
|
||||
])
|
||||
}, [isStarted, props.clientState.clientSetting.start, props.clientState.clientSetting.stop])
|
||||
|
||||
const performanceRow = useMemo(() => {
|
||||
return (
|
||||
|
150
client/demo/src/107_qulity_control.tsx
Normal file
150
client/demo/src/107_qulity_control.tsx
Normal file
@ -0,0 +1,150 @@
|
||||
import { BufferSize, DownSamplingMode, F0Detector, Protocol, SampleRate, VoiceChangerMode } from "@dannadori/voice-changer-client-js"
|
||||
import React, { useMemo, useState } from "react"
|
||||
import { ClientState } from "@dannadori/voice-changer-client-js";
|
||||
|
||||
|
||||
export type UseQualityControlProps = {
|
||||
clientState: ClientState
|
||||
}
|
||||
|
||||
export type QualityControlState = {
|
||||
qualityControl: JSX.Element;
|
||||
}
|
||||
|
||||
export const useQualityControl = (props: UseQualityControlProps): QualityControlState => {
|
||||
const [showQualityControl, setShowQualityControl] = useState<boolean>(false)
|
||||
|
||||
|
||||
const noiseControlRow = useMemo(() => {
|
||||
return (
|
||||
<div className="body-row split-3-2-2-2-1 left-padding-1 guided">
|
||||
<div className="body-item-title left-padding-1 ">Noise Suppression</div>
|
||||
<div>
|
||||
<input type="checkbox" checked={props.clientState.clientSetting.setting.echoCancel} onChange={(e) => {
|
||||
props.clientState.clientSetting.setEchoCancel(e.target.checked)
|
||||
}} /> echo cancel
|
||||
</div>
|
||||
<div>
|
||||
<input type="checkbox" checked={props.clientState.clientSetting.setting.noiseSuppression} onChange={(e) => {
|
||||
props.clientState.clientSetting.setNoiseSuppression(e.target.checked)
|
||||
}} /> suppression1
|
||||
</div>
|
||||
<div>
|
||||
<input type="checkbox" checked={props.clientState.clientSetting.setting.noiseSuppression2} onChange={(e) => {
|
||||
props.clientState.clientSetting.setNoiseSuppression2(e.target.checked)
|
||||
}} /> suppression2
|
||||
</div>
|
||||
<div className="body-button-container">
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}, [
|
||||
props.clientState.clientSetting.setting.echoCancel, props.clientState.clientSetting.setEchoCancel,
|
||||
props.clientState.clientSetting.setting.noiseSuppression, props.clientState.clientSetting.setNoiseSuppression,
|
||||
props.clientState.clientSetting.setting.noiseSuppression2, props.clientState.clientSetting.setNoiseSuppression2,
|
||||
])
|
||||
|
||||
const gainControlRow = useMemo(() => {
|
||||
return (
|
||||
<div className="body-row split-3-2-2-3 left-padding-1 guided">
|
||||
<div className="body-item-title left-padding-1 ">Gain Control</div>
|
||||
<div>
|
||||
<span className="body-item-input-slider-label">in</span>
|
||||
<input type="range" className="body-item-input-slider" min="0.0" max="1.0" step="0.1" value={props.clientState.clientSetting.setting.inputGain} onChange={(e) => {
|
||||
props.clientState.clientSetting.setInputGain(Number(e.target.value))
|
||||
}}></input>
|
||||
<span className="body-item-input-slider-val">{props.clientState.clientSetting.setting.inputGain}</span>
|
||||
</div>
|
||||
<div>
|
||||
<span className="body-item-input-slider-label">out</span>
|
||||
<input type="range" className="body-item-input-slider" min="0.0" max="1.0" step="0.1" value={props.clientState.clientSetting.setting.outputGain} onChange={(e) => {
|
||||
props.clientState.clientSetting.setOutputGain(Number(e.target.value))
|
||||
}}></input>
|
||||
<span className="body-item-input-slider-val">{props.clientState.clientSetting.setting.outputGain}</span>
|
||||
</div>
|
||||
<div className="body-button-container">
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}, [
|
||||
props.clientState.clientSetting.setting.inputGain, props.clientState.clientSetting.setting.inputGain,
|
||||
props.clientState.clientSetting.setting.outputGain, props.clientState.clientSetting.setOutputGain,
|
||||
])
|
||||
|
||||
const f0DetectorRow = useMemo(() => {
|
||||
const desc = { "harvest": "High Quality", "dio": "Light Weight" }
|
||||
return (
|
||||
<div className="body-row split-3-7 left-padding-1 guided">
|
||||
<div className="body-item-title left-padding-1 ">F0 Detector</div>
|
||||
<div className="body-select-container">
|
||||
<select className="body-select" value={props.clientState.serverSetting.setting.f0Detector} onChange={(e) => {
|
||||
props.clientState.serverSetting.setF0Detector(e.target.value as F0Detector)
|
||||
}}>
|
||||
{
|
||||
Object.values(F0Detector).map(x => {
|
||||
//@ts-ignore
|
||||
return <option key={x} value={x}>{x}({desc[x]})</option>
|
||||
})
|
||||
}
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}, [props.clientState.serverSetting.setting.f0Detector, props.clientState.serverSetting.setF0Detector])
|
||||
|
||||
|
||||
const recordIORow = useMemo(() => {
|
||||
return (
|
||||
<div className="body-row split-3-7 left-padding-1 guided">
|
||||
<div className="body-item-title left-padding-1 ">recordIO</div>
|
||||
<div className="body-select-container">
|
||||
<select className="body-select" value={props.clientState.serverSetting.setting.recordIO} onChange={(e) => {
|
||||
props.clientState.serverSetting.setRecordIO(Number(e.target.value))
|
||||
}}>
|
||||
{
|
||||
Object.values([0, 1]).map(x => {
|
||||
return <option key={x} value={x}>{x}</option>
|
||||
})
|
||||
}
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}, [props.clientState.serverSetting.setting.recordIO, props.clientState.serverSetting.setRecordIO])
|
||||
|
||||
const QualityControlContent = useMemo(() => {
|
||||
if (!showQualityControl) return <></>
|
||||
return (
|
||||
<>
|
||||
{noiseControlRow}
|
||||
{gainControlRow}
|
||||
{f0DetectorRow}
|
||||
{recordIORow}
|
||||
</>
|
||||
)
|
||||
}, [showQualityControl, gainControlRow, noiseControlRow, f0DetectorRow, recordIORow,])
|
||||
|
||||
|
||||
const qualityControl = useMemo(() => {
|
||||
return (
|
||||
<>
|
||||
<div className="body-row split-3-7 left-padding-1">
|
||||
<div className="body-sub-section-title">Quality Control</div>
|
||||
<div>
|
||||
<input type="checkbox" checked={showQualityControl} onChange={(e) => {
|
||||
setShowQualityControl(e.target.checked)
|
||||
}} /> show
|
||||
</div>
|
||||
</div>
|
||||
{QualityControlContent}
|
||||
</>
|
||||
)
|
||||
}, [showQualityControl, QualityControlContent])
|
||||
|
||||
return {
|
||||
qualityControl,
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -388,7 +388,16 @@ body {
|
||||
display: flex;
|
||||
}
|
||||
.body-item-input {
|
||||
width: 90%;
|
||||
width: 60%;
|
||||
}
|
||||
.body-item-input-slider {
|
||||
width: 60%;
|
||||
}
|
||||
.body-item-input-slider-label {
|
||||
margin-right: 1rem;
|
||||
}
|
||||
.body-item-input-slider-val {
|
||||
margin-left: 1rem;
|
||||
}
|
||||
|
||||
.body-button-container {
|
||||
|
@ -115,8 +115,10 @@ export class VoiceChangerClient {
|
||||
}
|
||||
|
||||
// forceVfDisable is for the condition that vf is enabled in constructor.
|
||||
setup = async (input: string | MediaStream, bufferSize: BufferSize, forceVfDisable: boolean = false) => {
|
||||
//noiseSuppression2 => VoiceFocus
|
||||
setup = async (input: string | MediaStream, bufferSize: BufferSize, echoCancel: boolean = true, noiseSuppression: boolean = true, noiseSuppression2: boolean = false) => {
|
||||
const lockNum = await this.lock()
|
||||
console.log(`Input Setup=> echo: ${echoCancel}, noise1: ${noiseSuppression}, noise2: ${noiseSuppression2}`)
|
||||
// condition check
|
||||
if (!this.vcNode) {
|
||||
console.warn("vc node is not initialized.")
|
||||
@ -136,10 +138,16 @@ export class VoiceChangerClient {
|
||||
channelCount: 1,
|
||||
sampleRate: 48000,
|
||||
sampleSize: 16,
|
||||
// echoCancellation: false,
|
||||
// noiseSuppression: false
|
||||
autoGainControl: false,
|
||||
echoCancellation: echoCancel,
|
||||
noiseSuppression: noiseSuppression
|
||||
}
|
||||
})
|
||||
// this.currentMediaStream.getAudioTracks().forEach((x) => {
|
||||
// console.log("MIC Setting(cap)", x.getCapabilities())
|
||||
// console.log("MIC Setting(const)", x.getConstraints())
|
||||
// console.log("MIC Setting(setting)", x.getSettings())
|
||||
// })
|
||||
} else {
|
||||
this.currentMediaStream = input
|
||||
}
|
||||
@ -160,14 +168,13 @@ export class VoiceChangerClient {
|
||||
this.inputGainNode = this.ctx.createGain()
|
||||
this.inputGainNode.gain.value = this.inputGain
|
||||
this.currentMediaStreamAudioSourceNode.connect(this.inputGainNode)
|
||||
if (this.currentDevice && forceVfDisable == false) {
|
||||
if (this.currentDevice && noiseSuppression2) {
|
||||
this.currentDevice.chooseNewInnerDevice(this.currentMediaStream)
|
||||
const voiceFocusNode = await this.currentDevice.createAudioNode(this.ctx); // vf node
|
||||
this.inputGainNode.connect(voiceFocusNode.start) // input node -> vf node
|
||||
voiceFocusNode.end.connect(this.outputNodeFromVF!)
|
||||
this.micStream.setStream(this.outputNodeFromVF!.stream) // vf node -> mic stream
|
||||
} else {
|
||||
console.log("VF disabled")
|
||||
const inputDestinationNodeForMicStream = this.ctx.createMediaStreamDestination()
|
||||
this.inputGainNode.connect(inputDestinationNodeForMicStream)
|
||||
this.micStream.setStream(inputDestinationNodeForMicStream.stream) // input device -> mic stream
|
||||
|
@ -23,6 +23,8 @@ export type VoiceChangerServerSetting = {
|
||||
onnxExecutionProvider: OnnxExecutionProvider,
|
||||
|
||||
f0Factor: number
|
||||
f0Detector: string // dio or harvest
|
||||
recordIO: number // 0:off, 1:on
|
||||
}
|
||||
|
||||
export type VoiceChangerClientSetting = {
|
||||
@ -34,7 +36,9 @@ export type VoiceChangerClientSetting = {
|
||||
inputChunkNum: number, // n of (256 x n) for send buffer
|
||||
speakers: Speaker[],
|
||||
correspondences: Correspondence[],
|
||||
forceVfDisable: boolean,
|
||||
echoCancel: boolean,
|
||||
noiseSuppression: boolean,
|
||||
noiseSuppression2: boolean,
|
||||
voiceChangerMode: VoiceChangerMode,
|
||||
downSamplingMode: DownSamplingMode,
|
||||
|
||||
@ -75,6 +79,8 @@ export type ServerInfo = {
|
||||
framework: Framework,
|
||||
onnxExecutionProvider: string[]
|
||||
f0Factor: number
|
||||
f0Detector: string
|
||||
recordIO: number
|
||||
}
|
||||
|
||||
|
||||
@ -130,6 +136,12 @@ export const Framework = {
|
||||
}
|
||||
export type Framework = typeof Framework[keyof typeof Framework]
|
||||
|
||||
export const F0Detector = {
|
||||
"dio": "dio",
|
||||
"harvest": "harvest",
|
||||
}
|
||||
export type F0Detector = typeof F0Detector[keyof typeof F0Detector]
|
||||
|
||||
export const ServerSettingKey = {
|
||||
"srcId": "srcId",
|
||||
"dstId": "dstId",
|
||||
@ -141,7 +153,9 @@ export const ServerSettingKey = {
|
||||
"crossFadeOverlapRate": "crossFadeOverlapRate",
|
||||
"framework": "framework",
|
||||
"onnxExecutionProvider": "onnxExecutionProvider",
|
||||
"f0Factor": "f0Factor"
|
||||
"f0Factor": "f0Factor",
|
||||
"f0Detector": "f0Detector",
|
||||
"recordIO": "recordIO"
|
||||
} as const
|
||||
export type ServerSettingKey = typeof ServerSettingKey[keyof typeof ServerSettingKey]
|
||||
|
||||
@ -158,8 +172,9 @@ export const DefaultVoiceChangerServerSetting: VoiceChangerServerSetting = {
|
||||
crossFadeOverlapRate: 0.5,
|
||||
framework: "PyTorch",
|
||||
f0Factor: 1.0,
|
||||
onnxExecutionProvider: "CPUExecutionProvider"
|
||||
|
||||
onnxExecutionProvider: "CPUExecutionProvider",
|
||||
f0Detector: "dio",
|
||||
recordIO: 0
|
||||
}
|
||||
|
||||
export const DefaultVoiceChangerClientSetting: VoiceChangerClientSetting = {
|
||||
@ -192,7 +207,9 @@ export const DefaultVoiceChangerClientSetting: VoiceChangerClientSetting = {
|
||||
}
|
||||
],
|
||||
correspondences: [],
|
||||
forceVfDisable: false,
|
||||
echoCancel: true,
|
||||
noiseSuppression: true,
|
||||
noiseSuppression2: false,
|
||||
voiceChangerMode: "realtime",
|
||||
downSamplingMode: "average",
|
||||
inputGain: 1.0,
|
||||
|
@ -17,7 +17,9 @@ export type ClientSettingState = {
|
||||
setProtocol: (proto: Protocol) => void;
|
||||
setAudioInput: (audioInput: string | MediaStream | null) => Promise<void>
|
||||
setBufferSize: (bufferSize: BufferSize) => Promise<void>
|
||||
setVfForceDisabled: (vfForceDisabled: boolean) => Promise<void>
|
||||
setEchoCancel: (voiceFocus: boolean) => Promise<void>
|
||||
setNoiseSuppression: (voiceFocus: boolean) => Promise<void>
|
||||
setNoiseSuppression2: (voiceFocus: boolean) => Promise<void>
|
||||
setInputChunkNum: (num: number) => void;
|
||||
setVoiceChangerMode: (mode: VoiceChangerMode) => void
|
||||
setDownSamplingMode: (mode: DownSamplingMode) => void
|
||||
@ -110,15 +112,13 @@ export const useClientSetting = (props: UseClientSettingProps): ClientSettingSta
|
||||
|
||||
const _setInput = async () => {
|
||||
if (!props.voiceChangerClient) return
|
||||
// console.log("[useClient] setup!(0)", settingRef.current.audioInput)
|
||||
if (!settingRef.current.audioInput || settingRef.current.audioInput == "none") {
|
||||
// console.log("[useClient] setup!(1)", settingRef.current.audioInput)
|
||||
const ms = createDummyMediaStream(props.audioContext!)
|
||||
await props.voiceChangerClient.setup(ms, settingRef.current.bufferSize, settingRef.current.forceVfDisable)
|
||||
|
||||
await props.voiceChangerClient.setup(ms, settingRef.current.bufferSize, settingRef.current.echoCancel, settingRef.current.noiseSuppression, settingRef.current.noiseSuppression2)
|
||||
} else {
|
||||
// console.log("[useClient] setup!(2)", settingRef.current.audioInput)
|
||||
await props.voiceChangerClient.setup(settingRef.current.audioInput, settingRef.current.bufferSize, settingRef.current.forceVfDisable)
|
||||
await props.voiceChangerClient.setup(settingRef.current.audioInput, settingRef.current.bufferSize, settingRef.current.echoCancel, settingRef.current.noiseSuppression, settingRef.current.noiseSuppression2)
|
||||
}
|
||||
}
|
||||
|
||||
@ -140,10 +140,28 @@ export const useClientSetting = (props: UseClientSettingProps): ClientSettingSta
|
||||
}
|
||||
}, [props.voiceChangerClient])
|
||||
|
||||
const setVfForceDisabled = useMemo(() => {
|
||||
return async (vfForceDisabled: boolean) => {
|
||||
const setEchoCancel = useMemo(() => {
|
||||
return async (val: boolean) => {
|
||||
if (!props.voiceChangerClient) return
|
||||
settingRef.current.forceVfDisable = vfForceDisabled
|
||||
settingRef.current.echoCancel = val
|
||||
await _setInput()
|
||||
setSetting({ ...settingRef.current })
|
||||
}
|
||||
}, [props.voiceChangerClient])
|
||||
|
||||
const setNoiseSuppression = useMemo(() => {
|
||||
return async (val: boolean) => {
|
||||
if (!props.voiceChangerClient) return
|
||||
settingRef.current.noiseSuppression = val
|
||||
await _setInput()
|
||||
setSetting({ ...settingRef.current })
|
||||
}
|
||||
}, [props.voiceChangerClient])
|
||||
|
||||
const setNoiseSuppression2 = useMemo(() => {
|
||||
return async (val: boolean) => {
|
||||
if (!props.voiceChangerClient) return
|
||||
settingRef.current.noiseSuppression2 = val
|
||||
await _setInput()
|
||||
setSetting({ ...settingRef.current })
|
||||
}
|
||||
@ -271,7 +289,9 @@ export const useClientSetting = (props: UseClientSettingProps): ClientSettingSta
|
||||
setProtocol,
|
||||
setAudioInput,
|
||||
setBufferSize,
|
||||
setVfForceDisabled,
|
||||
setEchoCancel,
|
||||
setNoiseSuppression,
|
||||
setNoiseSuppression2,
|
||||
setInputChunkNum,
|
||||
setVoiceChangerMode,
|
||||
setDownSamplingMode,
|
||||
|
@ -49,6 +49,8 @@ export type ServerSettingState = {
|
||||
setCrossFadeEndRate: (num: number) => Promise<boolean>;
|
||||
setCrossFadeOverlapRate: (num: number) => Promise<boolean>;
|
||||
setF0Factor: (num: number) => Promise<boolean>;
|
||||
setF0Detector: (val: string) => Promise<boolean>;
|
||||
setRecordIO: (num: number) => Promise<boolean>;
|
||||
reloadServerInfo: () => Promise<void>;
|
||||
setFileUploadSetting: (val: FileUploadSetting) => void
|
||||
loadModel: () => Promise<void>
|
||||
@ -97,6 +99,9 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
||||
props.voiceChangerClient.updateServerSettings(ServerSettingKey.crossFadeEndRate, "" + setting.crossFadeEndRate)
|
||||
props.voiceChangerClient.updateServerSettings(ServerSettingKey.crossFadeOverlapRate, "" + setting.crossFadeOverlapRate)
|
||||
props.voiceChangerClient.updateServerSettings(ServerSettingKey.f0Factor, "" + setting.f0Factor)
|
||||
props.voiceChangerClient.updateServerSettings(ServerSettingKey.f0Detector, "" + setting.f0Detector)
|
||||
props.voiceChangerClient.updateServerSettings(ServerSettingKey.recordIO, "" + setting.recordIO)
|
||||
|
||||
|
||||
}, [props.voiceChangerClient])
|
||||
|
||||
@ -123,7 +128,10 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
||||
crossFadeOverlapRate: res.crossFadeOverlapRate,
|
||||
framework: res.framework,
|
||||
onnxExecutionProvider: (!!res.onnxExecutionProvider && res.onnxExecutionProvider.length > 0) ? res.onnxExecutionProvider[0] as OnnxExecutionProvider : DefaultVoiceChangerServerSetting.onnxExecutionProvider,
|
||||
f0Factor: res.f0Factor
|
||||
f0Factor: res.f0Factor,
|
||||
f0Detector: res.f0Detector,
|
||||
recordIO: res.recordIO
|
||||
|
||||
}
|
||||
_setSetting(newSetting)
|
||||
setItem(INDEXEDDB_KEY_SERVER, newSetting)
|
||||
@ -199,6 +207,17 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
||||
return await _set_and_store(ServerSettingKey.f0Factor, "" + num)
|
||||
}
|
||||
}, [props.voiceChangerClient])
|
||||
|
||||
const setF0Detector = useMemo(() => {
|
||||
return async (val: string) => {
|
||||
return await _set_and_store(ServerSettingKey.f0Detector, "" + val)
|
||||
}
|
||||
}, [props.voiceChangerClient])
|
||||
const setRecordIO = useMemo(() => {
|
||||
return async (num: number) => {
|
||||
return await _set_and_store(ServerSettingKey.recordIO, "" + num)
|
||||
}
|
||||
}, [props.voiceChangerClient])
|
||||
//////////////
|
||||
// 操作
|
||||
/////////////
|
||||
@ -337,7 +356,9 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
||||
crossFadeOverlapRate: res.crossFadeOverlapRate,
|
||||
framework: res.framework,
|
||||
onnxExecutionProvider: (!!res.onnxExecutionProvider && res.onnxExecutionProvider.length > 0) ? res.onnxExecutionProvider[0] as OnnxExecutionProvider : DefaultVoiceChangerServerSetting.onnxExecutionProvider,
|
||||
f0Factor: res.f0Factor
|
||||
f0Factor: res.f0Factor,
|
||||
f0Detector: res.f0Detector,
|
||||
recordIO: res.recordIO
|
||||
})
|
||||
}
|
||||
}, [props.voiceChangerClient])
|
||||
@ -364,6 +385,8 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
||||
setCrossFadeEndRate,
|
||||
setCrossFadeOverlapRate,
|
||||
setF0Factor,
|
||||
setF0Detector,
|
||||
setRecordIO,
|
||||
reloadServerInfo,
|
||||
setFileUploadSetting,
|
||||
loadModel,
|
||||
|
@ -16,6 +16,9 @@ NATIVE_CLIENT_FILE_MAC = os.path.join(sys._MEIPASS, "voice-changer-native-client
|
||||
"voice-changer-native-client") if hasattr(sys, "_MEIPASS") else "voice-changer-native-client"
|
||||
|
||||
|
||||
TMP_DIR = os.path.join(tmpdir.name, "tmp_dir") if hasattr(sys, "_MEIPASS") else "tmp_dir"
|
||||
os.makedirs(TMP_DIR, exist_ok=True)
|
||||
|
||||
# SSL_KEY_DIR = os.path.join(sys._MEIPASS, "keys") if hasattr(sys, "_MEIPASS") else "keys"
|
||||
# MODEL_DIR = os.path.join(sys._MEIPASS, "logs") if hasattr(sys, "_MEIPASS") else "logs"
|
||||
# UPLOAD_DIR = os.path.join(sys._MEIPASS, "upload_dir") if hasattr(sys, "_MEIPASS") else "upload_dir"
|
||||
|
@ -9,7 +9,7 @@ from restapi.MMVC_Rest_Hello import MMVC_Rest_Hello
|
||||
from restapi.MMVC_Rest_VoiceChanger import MMVC_Rest_VoiceChanger
|
||||
from restapi.MMVC_Rest_Fileuploader import MMVC_Rest_Fileuploader
|
||||
from restapi.MMVC_Rest_Trainer import MMVC_Rest_Trainer
|
||||
from const import frontend_path
|
||||
from const import frontend_path, TMP_DIR
|
||||
|
||||
|
||||
class ValidationErrorLoggingRoute(APIRoute):
|
||||
@ -27,6 +27,7 @@ class ValidationErrorLoggingRoute(APIRoute):
|
||||
|
||||
return custom_route_handler
|
||||
|
||||
|
||||
class MMVC_Rest:
|
||||
|
||||
@classmethod
|
||||
@ -50,6 +51,8 @@ class MMVC_Rest:
|
||||
|
||||
app_fastapi.mount(
|
||||
"/recorder", StaticFiles(directory=f'{frontend_path}', html=True), name="static")
|
||||
app_fastapi.mount(
|
||||
"/tmp", StaticFiles(directory=f'{TMP_DIR}'), name="static")
|
||||
|
||||
restHello = MMVC_Rest_Hello()
|
||||
app_fastapi.include_router(restHello.router)
|
||||
|
@ -4,6 +4,7 @@ import numpy as np
|
||||
import socketio
|
||||
from voice_changer.VoiceChangerManager import VoiceChangerManager
|
||||
|
||||
|
||||
class MMVC_Namespace(socketio.AsyncNamespace):
|
||||
def __init__(self, namespace: str, voiceChangerManager: VoiceChangerManager):
|
||||
super().__init__(namespace)
|
||||
@ -36,4 +37,3 @@ class MMVC_Namespace(socketio.AsyncNamespace):
|
||||
def on_disconnect(self, sid):
|
||||
# print('[{}] disconnect'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
|
||||
pass
|
||||
|
||||
|
2
server/tmp_dir/.gitignore
vendored
Normal file
2
server/tmp_dir/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
*
|
||||
!.gitignore
|
@ -1,4 +1,4 @@
|
||||
from const import ERROR_NO_ONNX_SESSION
|
||||
from const import ERROR_NO_ONNX_SESSION, TMP_DIR
|
||||
import torch
|
||||
import os
|
||||
import traceback
|
||||
@ -84,15 +84,17 @@ class VocieChangerSettings():
|
||||
minConvertSize: int = 0
|
||||
framework: str = "PyTorch" # PyTorch or ONNX
|
||||
f0Factor: float = 1.0
|
||||
f0Detector: str = "dio" # dio or harvest
|
||||
recordIO: int = 1 # 0:off, 1:on
|
||||
|
||||
pyTorchModelFile: str = ""
|
||||
onnxModelFile: str = ""
|
||||
configFile: str = ""
|
||||
|
||||
# ↓mutableな物だけ列挙
|
||||
intData = ["gpu", "srcId", "dstId", "convertChunkNum", "minConvertSize"]
|
||||
intData = ["gpu", "srcId", "dstId", "convertChunkNum", "minConvertSize", "recordIO"]
|
||||
floatData = ["crossFadeOffsetRate", "crossFadeEndRate", "crossFadeOverlapRate", "f0Factor"]
|
||||
strData = ["framework"]
|
||||
strData = ["framework", "f0Detector"]
|
||||
|
||||
|
||||
class VoiceChanger():
|
||||
@ -113,15 +115,26 @@ class VoiceChanger():
|
||||
self.prev_audio = np.zeros(1)
|
||||
self.mps_enabled = getattr(torch.backends, "mps", None) is not None and torch.backends.mps.is_available()
|
||||
|
||||
mock_stream = MockStream(24000)
|
||||
mock_stream.open_outputfile("out.wav")
|
||||
self.out = mock_stream
|
||||
mock_stream_in = MockStream(24000)
|
||||
mock_stream_in.open_outputfile("in.wav")
|
||||
self.stream_in = mock_stream_in
|
||||
self._setupRecordIO()
|
||||
|
||||
print(f"VoiceChanger Initialized (GPU_NUM:{self.gpu_num}, mps_enabled:{self.mps_enabled})")
|
||||
|
||||
def _setupRecordIO(self):
|
||||
# IO Recorder Setup
|
||||
mock_stream_out = MockStream(24000)
|
||||
stream_output_file = os.path.join(TMP_DIR, "out.wav")
|
||||
if os.path.exists(stream_output_file):
|
||||
os.unlink(stream_output_file)
|
||||
mock_stream_out.open_outputfile(stream_output_file)
|
||||
self.stream_out = mock_stream_out
|
||||
|
||||
mock_stream_in = MockStream(24000)
|
||||
stream_input_file = os.path.join(TMP_DIR, "in.wav")
|
||||
if os.path.exists(stream_input_file):
|
||||
os.unlink(stream_input_file)
|
||||
mock_stream_in.open_outputfile(stream_input_file)
|
||||
self.stream_in = mock_stream_in
|
||||
|
||||
def loadModel(self, config: str, pyTorch_model_file: str = None, onnx_model_file: str = None):
|
||||
self.settings.configFile = config
|
||||
self.hps = get_hparams_from_file(config)
|
||||
@ -200,6 +213,8 @@ class VoiceChanger():
|
||||
self.onnx_session.set_providers(providers=["CUDAExecutionProvider"], provider_options=provider_options)
|
||||
if key == "crossFadeOffsetRate" or key == "crossFadeEndRate":
|
||||
self.unpackedData_length = 0
|
||||
if key == "recordIO" and val == 1:
|
||||
self._setupRecordIO()
|
||||
elif key in self.settings.floatData:
|
||||
setattr(self.settings, key, float(val))
|
||||
elif key in self.settings.strData:
|
||||
@ -256,10 +271,11 @@ class VoiceChanger():
|
||||
|
||||
# TBD: numpy <--> pytorch変換が行ったり来たりしているが、まずは動かすことを最優先。
|
||||
audio_norm_np = audio_norm.squeeze().numpy().astype(np.float64)
|
||||
if self.settings.f0Detector == "dio":
|
||||
_f0, _time = pw.dio(audio_norm_np, self.hps.data.sampling_rate, frame_period=5.5)
|
||||
f0 = pw.stonemask(audio_norm_np, _f0, _time, self.hps.data.sampling_rate)
|
||||
# print("type:", audio_norm_np.dtype)
|
||||
# f0, t = pw.harvest(audio_norm_np, self.hps.data.sampling_rate, frame_period=5.5, f0_floor=71.0, f0_ceil=1000.0)
|
||||
else:
|
||||
f0, t = pw.harvest(audio_norm_np, self.hps.data.sampling_rate, frame_period=5.5, f0_floor=71.0, f0_ceil=1000.0)
|
||||
f0 = convert_continuos_f0(f0, int(audio_norm_np.shape[0] / self.hps.data.hop_length))
|
||||
f0 = torch.from_numpy(f0.astype(np.float32))
|
||||
|
||||
@ -280,7 +296,7 @@ class VoiceChanger():
|
||||
f0_factor=self.settings.f0Factor
|
||||
)([(spec, sid, f0)])
|
||||
|
||||
return data
|
||||
return data, f0.numpy()
|
||||
|
||||
def _onnx_inference(self, data, inputSize):
|
||||
if hasattr(self, "onnx_session") == False or self.onnx_session == None:
|
||||
@ -401,7 +417,6 @@ class VoiceChanger():
|
||||
|
||||
def on_request(self, unpackedData: any):
|
||||
convertSize = self.settings.convertChunkNum * 128 # 128sample/1chunk
|
||||
self.stream_in.write(unpackedData.astype(np.int16).tobytes())
|
||||
# print("convsize:", unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate))
|
||||
if unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate) + 1024 > convertSize:
|
||||
convertSize = int(unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate)) + 1024
|
||||
@ -412,7 +427,8 @@ class VoiceChanger():
|
||||
# convertSize = 8192
|
||||
|
||||
self._generate_strength(unpackedData)
|
||||
data = self._generate_input(unpackedData, convertSize)
|
||||
# f0はデバッグ用
|
||||
data, f0 = self._generate_input(unpackedData, convertSize)
|
||||
|
||||
try:
|
||||
if self.settings.framework == "ONNX":
|
||||
@ -431,6 +447,9 @@ class VoiceChanger():
|
||||
|
||||
result = result.astype(np.int16)
|
||||
# print("on_request result size:",result.shape)
|
||||
if self.settings.recordIO == 1:
|
||||
self.stream_in.write(unpackedData.astype(np.int16).tobytes())
|
||||
self.stream_out.write(result.tobytes())
|
||||
return result
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user