add quality control

This commit is contained in:
wataru 2023-02-15 05:02:51 +09:00
parent 852b4216ca
commit f73178b9b0
16 changed files with 327 additions and 94 deletions

File diff suppressed because one or more lines are too long

View File

@ -8,6 +8,7 @@ import { useAdvancedSetting } from "./105_advanced_setting";
import { useSpeakerSetting } from "./103_speaker_setting"; import { useSpeakerSetting } from "./103_speaker_setting";
import { useServerControl } from "./106_server_control"; import { useServerControl } from "./106_server_control";
import { useClient } from "@dannadori/voice-changer-client-js"; import { useClient } from "@dannadori/voice-changer-client-js";
import { useQualityControl } from "./107_qulity_control";
export const useMicrophoneOptions = () => { export const useMicrophoneOptions = () => {
const [audioContext, setAudioContext] = useState<AudioContext | null>(null) const [audioContext, setAudioContext] = useState<AudioContext | null>(null)
@ -23,6 +24,7 @@ export const useMicrophoneOptions = () => {
const convertSetting = useConvertSetting({ clientState }) const convertSetting = useConvertSetting({ clientState })
const advancedSetting = useAdvancedSetting({ clientState }) const advancedSetting = useAdvancedSetting({ clientState })
const serverControl = useServerControl({ clientState }) const serverControl = useServerControl({ clientState })
const qualityControl = useQualityControl({ clientState })
const clearSetting = async () => { const clearSetting = async () => {
await clientState.clearSetting() await clientState.clearSetting()
@ -51,6 +53,7 @@ export const useMicrophoneOptions = () => {
{serverControl.serverControl} {serverControl.serverControl}
{serverSetting.serverSetting} {serverSetting.serverSetting}
{deviceSetting.deviceSetting} {deviceSetting.deviceSetting}
{qualityControl.qualityControl}
{speakerSetting.speakerSetting} {speakerSetting.speakerSetting}
{convertSetting.convertSetting} {convertSetting.convertSetting}
{advancedSetting.advancedSetting} {advancedSetting.advancedSetting}
@ -61,7 +64,8 @@ export const useMicrophoneOptions = () => {
deviceSetting.deviceSetting, deviceSetting.deviceSetting,
speakerSetting.speakerSetting, speakerSetting.speakerSetting,
convertSetting.convertSetting, convertSetting.convertSetting,
advancedSetting.advancedSetting]) advancedSetting.advancedSetting,
qualityControl.qualityControl])
return { return {
voiceChangerSetting, voiceChangerSetting,

View File

@ -129,14 +129,13 @@ export const useSpeakerSetting = (props: UseSpeakerSettingProps) => {
<div className="body-row split-3-2-1-4 left-padding-1 guided"> <div className="body-row split-3-2-1-4 left-padding-1 guided">
<div className="body-item-title left-padding-1">F0 Factor</div> <div className="body-item-title left-padding-1">F0 Factor</div>
<div className="body-input-container"> <div className="body-input-container">
<input type="range" className="body-item-input" min="0.1" max="5.0" step="0.1" value={props.clientState.serverSetting.setting.f0Factor} onChange={(e) => { <input type="range" className="body-item-input-slider" min="0.1" max="5.0" step="0.1" value={props.clientState.serverSetting.setting.f0Factor} onChange={(e) => {
props.clientState.serverSetting.setF0Factor(Number(e.target.value)) props.clientState.serverSetting.setF0Factor(Number(e.target.value))
}}></input> }}></input>
<span className="body-item-input-slider-val">{props.clientState.serverSetting.setting.f0Factor}</span>
</div> </div>
<div className="body-item-text"> <div className="body-item-text"></div>
<div>{props.clientState.serverSetting.setting.f0Factor}</div> <div className="body-item-text">recommend: {recommendedF0Factor.toFixed(1)}</div>
</div>
<div className="body-item-text">recommended:{recommendedF0Factor.toFixed(1)}</div>
</div> </div>
) )
}, [props.clientState.serverSetting.setting.f0Factor, props.clientState.serverSetting.setting.srcId, props.clientState.serverSetting.setting.dstId, props.clientState.clientSetting.setting.correspondences, props.clientState.serverSetting.setF0Factor]) }, [props.clientState.serverSetting.setting.f0Factor, props.clientState.serverSetting.setting.srcId, props.clientState.serverSetting.setting.dstId, props.clientState.clientSetting.setting.correspondences, props.clientState.serverSetting.setF0Factor])

View File

@ -1,4 +1,4 @@
import { BufferSize, DownSamplingMode, Protocol, SampleRate, VoiceChangerMode } from "@dannadori/voice-changer-client-js" import { BufferSize, DownSamplingMode, F0Detector, Protocol, SampleRate, VoiceChangerMode } from "@dannadori/voice-changer-client-js"
import React, { useMemo, useState } from "react" import React, { useMemo, useState } from "react"
import { ClientState } from "@dannadori/voice-changer-client-js"; import { ClientState } from "@dannadori/voice-changer-client-js";
@ -162,21 +162,6 @@ export const useAdvancedSetting = (props: UseAdvancedSettingProps): AdvancedSett
}, [props.clientState.serverSetting.setting.crossFadeEndRate, props.clientState.serverSetting.setCrossFadeEndRate]) }, [props.clientState.serverSetting.setting.crossFadeEndRate, props.clientState.serverSetting.setCrossFadeEndRate])
const vfForceDisableRow = useMemo(() => {
return (
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title left-padding-1 ">VF Disabled</div>
<div>
<input type="checkbox" checked={props.clientState.clientSetting.setting.forceVfDisable} onChange={(e) => {
props.clientState.clientSetting.setVfForceDisabled(e.target.checked)
}} />
</div>
<div className="body-button-container">
</div>
</div>
)
}, [props.clientState.clientSetting.setting.forceVfDisable, props.clientState.clientSetting.setVfForceDisabled])
const voiceChangeModeRow = useMemo(() => { const voiceChangeModeRow = useMemo(() => {
return ( return (
<div className="body-row split-3-7 left-padding-1 guided"> <div className="body-row split-3-7 left-padding-1 guided">
@ -217,7 +202,6 @@ export const useAdvancedSetting = (props: UseAdvancedSettingProps): AdvancedSett
}, [props.clientState.clientSetting.setting.downSamplingMode, props.clientState.clientSetting.setDownSamplingMode]) }, [props.clientState.clientSetting.setting.downSamplingMode, props.clientState.clientSetting.setDownSamplingMode])
const workletSettingRow = useMemo(() => { const workletSettingRow = useMemo(() => {
return ( return (
<> <>
@ -280,15 +264,15 @@ export const useAdvancedSetting = (props: UseAdvancedSettingProps): AdvancedSett
{crossFadeOffsetRateRow} {crossFadeOffsetRateRow}
{crossFadeEndRateRow} {crossFadeEndRateRow}
<div className="body-row divider"></div> <div className="body-row divider"></div>
{vfForceDisableRow}
{voiceChangeModeRow} {voiceChangeModeRow}
<div className="body-row divider"></div> <div className="body-row divider"></div>
{workletSettingRow} {workletSettingRow}
<div className="body-row divider"></div> <div className="body-row divider"></div>
{downSamplingModeRow} {downSamplingModeRow}
</> </>
) )
}, [showAdvancedSetting, mmvcServerUrlRow, protocolRow, sampleRateRow, bufferSizeRow, convertChunkNumRow, minConvertSizeRow, crossFadeOverlapRateRow, crossFadeOffsetRateRow, crossFadeEndRateRow, vfForceDisableRow, voiceChangeModeRow, workletSettingRow, downSamplingModeRow]) }, [showAdvancedSetting, mmvcServerUrlRow, protocolRow, sampleRateRow, bufferSizeRow, convertChunkNumRow, minConvertSizeRow, crossFadeOverlapRateRow, crossFadeOffsetRateRow, crossFadeEndRateRow, voiceChangeModeRow, workletSettingRow, downSamplingModeRow])
const advancedSetting = useMemo(() => { const advancedSetting = useMemo(() => {

View File

@ -30,31 +30,13 @@ export const useServerControl = (props: UseServerControlProps) => {
<div onClick={onStopClicked} className={stopClassName}>stop</div> <div onClick={onStopClicked} className={stopClassName}>stop</div>
</div> </div>
<div> <div>
<div className="body-input-container split-4-4-2">
<div>gain(in)</div>
<input type="range" className="body-item-input" min="0.0" max="1.0" step="0.1" value={props.clientState.clientSetting.setting.inputGain} onChange={(e) => {
props.clientState.clientSetting.setInputGain(Number(e.target.value))
}}></input>
<div>{props.clientState.clientSetting.setting.inputGain}</div>
</div> </div>
<div className="body-input-container split-4-4-2">
<div>gain(out)</div>
<input type="range" className="body-item-input" min="0.0" max="1.0" step="0.1" value={props.clientState.clientSetting.setting.outputGain} onChange={(e) => {
props.clientState.clientSetting.setOutputGain(Number(e.target.value))
}}></input>
<div>{props.clientState.clientSetting.setting.outputGain}</div>
</div>
</div>
<div className="body-input-container"> <div className="body-input-container">
</div> </div>
</div> </div>
) )
}, [isStarted, props.clientState.clientSetting.start, props.clientState.clientSetting.stop, }, [isStarted, props.clientState.clientSetting.start, props.clientState.clientSetting.stop])
props.clientState.clientSetting.setInputGain, props.clientState.clientSetting.setting.inputGain,
props.clientState.clientSetting.setOutputGain, props.clientState.clientSetting.setting.outputGain
])
const performanceRow = useMemo(() => { const performanceRow = useMemo(() => {
return ( return (

View File

@ -0,0 +1,150 @@
import { BufferSize, DownSamplingMode, F0Detector, Protocol, SampleRate, VoiceChangerMode } from "@dannadori/voice-changer-client-js"
import React, { useMemo, useState } from "react"
import { ClientState } from "@dannadori/voice-changer-client-js";
export type UseQualityControlProps = {
clientState: ClientState
}
export type QualityControlState = {
qualityControl: JSX.Element;
}
export const useQualityControl = (props: UseQualityControlProps): QualityControlState => {
const [showQualityControl, setShowQualityControl] = useState<boolean>(false)
const noiseControlRow = useMemo(() => {
return (
<div className="body-row split-3-2-2-2-1 left-padding-1 guided">
<div className="body-item-title left-padding-1 ">Noise Suppression</div>
<div>
<input type="checkbox" checked={props.clientState.clientSetting.setting.echoCancel} onChange={(e) => {
props.clientState.clientSetting.setEchoCancel(e.target.checked)
}} /> echo cancel
</div>
<div>
<input type="checkbox" checked={props.clientState.clientSetting.setting.noiseSuppression} onChange={(e) => {
props.clientState.clientSetting.setNoiseSuppression(e.target.checked)
}} /> suppression1
</div>
<div>
<input type="checkbox" checked={props.clientState.clientSetting.setting.noiseSuppression2} onChange={(e) => {
props.clientState.clientSetting.setNoiseSuppression2(e.target.checked)
}} /> suppression2
</div>
<div className="body-button-container">
</div>
</div>
)
}, [
props.clientState.clientSetting.setting.echoCancel, props.clientState.clientSetting.setEchoCancel,
props.clientState.clientSetting.setting.noiseSuppression, props.clientState.clientSetting.setNoiseSuppression,
props.clientState.clientSetting.setting.noiseSuppression2, props.clientState.clientSetting.setNoiseSuppression2,
])
const gainControlRow = useMemo(() => {
return (
<div className="body-row split-3-2-2-3 left-padding-1 guided">
<div className="body-item-title left-padding-1 ">Gain Control</div>
<div>
<span className="body-item-input-slider-label">in</span>
<input type="range" className="body-item-input-slider" min="0.0" max="1.0" step="0.1" value={props.clientState.clientSetting.setting.inputGain} onChange={(e) => {
props.clientState.clientSetting.setInputGain(Number(e.target.value))
}}></input>
<span className="body-item-input-slider-val">{props.clientState.clientSetting.setting.inputGain}</span>
</div>
<div>
<span className="body-item-input-slider-label">out</span>
<input type="range" className="body-item-input-slider" min="0.0" max="1.0" step="0.1" value={props.clientState.clientSetting.setting.outputGain} onChange={(e) => {
props.clientState.clientSetting.setOutputGain(Number(e.target.value))
}}></input>
<span className="body-item-input-slider-val">{props.clientState.clientSetting.setting.outputGain}</span>
</div>
<div className="body-button-container">
</div>
</div>
)
}, [
props.clientState.clientSetting.setting.inputGain, props.clientState.clientSetting.setting.inputGain,
props.clientState.clientSetting.setting.outputGain, props.clientState.clientSetting.setOutputGain,
])
const f0DetectorRow = useMemo(() => {
const desc = { "harvest": "High Quality", "dio": "Light Weight" }
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1 ">F0 Detector</div>
<div className="body-select-container">
<select className="body-select" value={props.clientState.serverSetting.setting.f0Detector} onChange={(e) => {
props.clientState.serverSetting.setF0Detector(e.target.value as F0Detector)
}}>
{
Object.values(F0Detector).map(x => {
//@ts-ignore
return <option key={x} value={x}>{x}({desc[x]})</option>
})
}
</select>
</div>
</div>
)
}, [props.clientState.serverSetting.setting.f0Detector, props.clientState.serverSetting.setF0Detector])
const recordIORow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1 ">recordIO</div>
<div className="body-select-container">
<select className="body-select" value={props.clientState.serverSetting.setting.recordIO} onChange={(e) => {
props.clientState.serverSetting.setRecordIO(Number(e.target.value))
}}>
{
Object.values([0, 1]).map(x => {
return <option key={x} value={x}>{x}</option>
})
}
</select>
</div>
</div>
)
}, [props.clientState.serverSetting.setting.recordIO, props.clientState.serverSetting.setRecordIO])
const QualityControlContent = useMemo(() => {
if (!showQualityControl) return <></>
return (
<>
{noiseControlRow}
{gainControlRow}
{f0DetectorRow}
{recordIORow}
</>
)
}, [showQualityControl, gainControlRow, noiseControlRow, f0DetectorRow, recordIORow,])
const qualityControl = useMemo(() => {
return (
<>
<div className="body-row split-3-7 left-padding-1">
<div className="body-sub-section-title">Quality Control</div>
<div>
<input type="checkbox" checked={showQualityControl} onChange={(e) => {
setShowQualityControl(e.target.checked)
}} /> show
</div>
</div>
{QualityControlContent}
</>
)
}, [showQualityControl, QualityControlContent])
return {
qualityControl,
}
}

View File

@ -388,7 +388,16 @@ body {
display: flex; display: flex;
} }
.body-item-input { .body-item-input {
width: 90%; width: 60%;
}
.body-item-input-slider {
width: 60%;
}
.body-item-input-slider-label {
margin-right: 1rem;
}
.body-item-input-slider-val {
margin-left: 1rem;
} }
.body-button-container { .body-button-container {

View File

@ -115,8 +115,10 @@ export class VoiceChangerClient {
} }
// forceVfDisable is for the condition that vf is enabled in constructor. // forceVfDisable is for the condition that vf is enabled in constructor.
setup = async (input: string | MediaStream, bufferSize: BufferSize, forceVfDisable: boolean = false) => { //noiseSuppression2 => VoiceFocus
setup = async (input: string | MediaStream, bufferSize: BufferSize, echoCancel: boolean = true, noiseSuppression: boolean = true, noiseSuppression2: boolean = false) => {
const lockNum = await this.lock() const lockNum = await this.lock()
console.log(`Input Setup=> echo: ${echoCancel}, noise1: ${noiseSuppression}, noise2: ${noiseSuppression2}`)
// condition check // condition check
if (!this.vcNode) { if (!this.vcNode) {
console.warn("vc node is not initialized.") console.warn("vc node is not initialized.")
@ -136,10 +138,16 @@ export class VoiceChangerClient {
channelCount: 1, channelCount: 1,
sampleRate: 48000, sampleRate: 48000,
sampleSize: 16, sampleSize: 16,
// echoCancellation: false, autoGainControl: false,
// noiseSuppression: false echoCancellation: echoCancel,
noiseSuppression: noiseSuppression
} }
}) })
// this.currentMediaStream.getAudioTracks().forEach((x) => {
// console.log("MIC Setting(cap)", x.getCapabilities())
// console.log("MIC Setting(const)", x.getConstraints())
// console.log("MIC Setting(setting)", x.getSettings())
// })
} else { } else {
this.currentMediaStream = input this.currentMediaStream = input
} }
@ -160,14 +168,13 @@ export class VoiceChangerClient {
this.inputGainNode = this.ctx.createGain() this.inputGainNode = this.ctx.createGain()
this.inputGainNode.gain.value = this.inputGain this.inputGainNode.gain.value = this.inputGain
this.currentMediaStreamAudioSourceNode.connect(this.inputGainNode) this.currentMediaStreamAudioSourceNode.connect(this.inputGainNode)
if (this.currentDevice && forceVfDisable == false) { if (this.currentDevice && noiseSuppression2) {
this.currentDevice.chooseNewInnerDevice(this.currentMediaStream) this.currentDevice.chooseNewInnerDevice(this.currentMediaStream)
const voiceFocusNode = await this.currentDevice.createAudioNode(this.ctx); // vf node const voiceFocusNode = await this.currentDevice.createAudioNode(this.ctx); // vf node
this.inputGainNode.connect(voiceFocusNode.start) // input node -> vf node this.inputGainNode.connect(voiceFocusNode.start) // input node -> vf node
voiceFocusNode.end.connect(this.outputNodeFromVF!) voiceFocusNode.end.connect(this.outputNodeFromVF!)
this.micStream.setStream(this.outputNodeFromVF!.stream) // vf node -> mic stream this.micStream.setStream(this.outputNodeFromVF!.stream) // vf node -> mic stream
} else { } else {
console.log("VF disabled")
const inputDestinationNodeForMicStream = this.ctx.createMediaStreamDestination() const inputDestinationNodeForMicStream = this.ctx.createMediaStreamDestination()
this.inputGainNode.connect(inputDestinationNodeForMicStream) this.inputGainNode.connect(inputDestinationNodeForMicStream)
this.micStream.setStream(inputDestinationNodeForMicStream.stream) // input device -> mic stream this.micStream.setStream(inputDestinationNodeForMicStream.stream) // input device -> mic stream

View File

@ -23,6 +23,8 @@ export type VoiceChangerServerSetting = {
onnxExecutionProvider: OnnxExecutionProvider, onnxExecutionProvider: OnnxExecutionProvider,
f0Factor: number f0Factor: number
f0Detector: string // dio or harvest
recordIO: number // 0:off, 1:on
} }
export type VoiceChangerClientSetting = { export type VoiceChangerClientSetting = {
@ -34,7 +36,9 @@ export type VoiceChangerClientSetting = {
inputChunkNum: number, // n of (256 x n) for send buffer inputChunkNum: number, // n of (256 x n) for send buffer
speakers: Speaker[], speakers: Speaker[],
correspondences: Correspondence[], correspondences: Correspondence[],
forceVfDisable: boolean, echoCancel: boolean,
noiseSuppression: boolean,
noiseSuppression2: boolean,
voiceChangerMode: VoiceChangerMode, voiceChangerMode: VoiceChangerMode,
downSamplingMode: DownSamplingMode, downSamplingMode: DownSamplingMode,
@ -75,6 +79,8 @@ export type ServerInfo = {
framework: Framework, framework: Framework,
onnxExecutionProvider: string[] onnxExecutionProvider: string[]
f0Factor: number f0Factor: number
f0Detector: string
recordIO: number
} }
@ -130,6 +136,12 @@ export const Framework = {
} }
export type Framework = typeof Framework[keyof typeof Framework] export type Framework = typeof Framework[keyof typeof Framework]
export const F0Detector = {
"dio": "dio",
"harvest": "harvest",
}
export type F0Detector = typeof F0Detector[keyof typeof F0Detector]
export const ServerSettingKey = { export const ServerSettingKey = {
"srcId": "srcId", "srcId": "srcId",
"dstId": "dstId", "dstId": "dstId",
@ -141,7 +153,9 @@ export const ServerSettingKey = {
"crossFadeOverlapRate": "crossFadeOverlapRate", "crossFadeOverlapRate": "crossFadeOverlapRate",
"framework": "framework", "framework": "framework",
"onnxExecutionProvider": "onnxExecutionProvider", "onnxExecutionProvider": "onnxExecutionProvider",
"f0Factor": "f0Factor" "f0Factor": "f0Factor",
"f0Detector": "f0Detector",
"recordIO": "recordIO"
} as const } as const
export type ServerSettingKey = typeof ServerSettingKey[keyof typeof ServerSettingKey] export type ServerSettingKey = typeof ServerSettingKey[keyof typeof ServerSettingKey]
@ -158,8 +172,9 @@ export const DefaultVoiceChangerServerSetting: VoiceChangerServerSetting = {
crossFadeOverlapRate: 0.5, crossFadeOverlapRate: 0.5,
framework: "PyTorch", framework: "PyTorch",
f0Factor: 1.0, f0Factor: 1.0,
onnxExecutionProvider: "CPUExecutionProvider" onnxExecutionProvider: "CPUExecutionProvider",
f0Detector: "dio",
recordIO: 0
} }
export const DefaultVoiceChangerClientSetting: VoiceChangerClientSetting = { export const DefaultVoiceChangerClientSetting: VoiceChangerClientSetting = {
@ -192,7 +207,9 @@ export const DefaultVoiceChangerClientSetting: VoiceChangerClientSetting = {
} }
], ],
correspondences: [], correspondences: [],
forceVfDisable: false, echoCancel: true,
noiseSuppression: true,
noiseSuppression2: false,
voiceChangerMode: "realtime", voiceChangerMode: "realtime",
downSamplingMode: "average", downSamplingMode: "average",
inputGain: 1.0, inputGain: 1.0,

View File

@ -17,7 +17,9 @@ export type ClientSettingState = {
setProtocol: (proto: Protocol) => void; setProtocol: (proto: Protocol) => void;
setAudioInput: (audioInput: string | MediaStream | null) => Promise<void> setAudioInput: (audioInput: string | MediaStream | null) => Promise<void>
setBufferSize: (bufferSize: BufferSize) => Promise<void> setBufferSize: (bufferSize: BufferSize) => Promise<void>
setVfForceDisabled: (vfForceDisabled: boolean) => Promise<void> setEchoCancel: (voiceFocus: boolean) => Promise<void>
setNoiseSuppression: (voiceFocus: boolean) => Promise<void>
setNoiseSuppression2: (voiceFocus: boolean) => Promise<void>
setInputChunkNum: (num: number) => void; setInputChunkNum: (num: number) => void;
setVoiceChangerMode: (mode: VoiceChangerMode) => void setVoiceChangerMode: (mode: VoiceChangerMode) => void
setDownSamplingMode: (mode: DownSamplingMode) => void setDownSamplingMode: (mode: DownSamplingMode) => void
@ -110,15 +112,13 @@ export const useClientSetting = (props: UseClientSettingProps): ClientSettingSta
const _setInput = async () => { const _setInput = async () => {
if (!props.voiceChangerClient) return if (!props.voiceChangerClient) return
// console.log("[useClient] setup!(0)", settingRef.current.audioInput)
if (!settingRef.current.audioInput || settingRef.current.audioInput == "none") { if (!settingRef.current.audioInput || settingRef.current.audioInput == "none") {
// console.log("[useClient] setup!(1)", settingRef.current.audioInput) // console.log("[useClient] setup!(1)", settingRef.current.audioInput)
const ms = createDummyMediaStream(props.audioContext!) const ms = createDummyMediaStream(props.audioContext!)
await props.voiceChangerClient.setup(ms, settingRef.current.bufferSize, settingRef.current.forceVfDisable) await props.voiceChangerClient.setup(ms, settingRef.current.bufferSize, settingRef.current.echoCancel, settingRef.current.noiseSuppression, settingRef.current.noiseSuppression2)
} else { } else {
// console.log("[useClient] setup!(2)", settingRef.current.audioInput) // console.log("[useClient] setup!(2)", settingRef.current.audioInput)
await props.voiceChangerClient.setup(settingRef.current.audioInput, settingRef.current.bufferSize, settingRef.current.forceVfDisable) await props.voiceChangerClient.setup(settingRef.current.audioInput, settingRef.current.bufferSize, settingRef.current.echoCancel, settingRef.current.noiseSuppression, settingRef.current.noiseSuppression2)
} }
} }
@ -140,10 +140,28 @@ export const useClientSetting = (props: UseClientSettingProps): ClientSettingSta
} }
}, [props.voiceChangerClient]) }, [props.voiceChangerClient])
const setVfForceDisabled = useMemo(() => { const setEchoCancel = useMemo(() => {
return async (vfForceDisabled: boolean) => { return async (val: boolean) => {
if (!props.voiceChangerClient) return if (!props.voiceChangerClient) return
settingRef.current.forceVfDisable = vfForceDisabled settingRef.current.echoCancel = val
await _setInput()
setSetting({ ...settingRef.current })
}
}, [props.voiceChangerClient])
const setNoiseSuppression = useMemo(() => {
return async (val: boolean) => {
if (!props.voiceChangerClient) return
settingRef.current.noiseSuppression = val
await _setInput()
setSetting({ ...settingRef.current })
}
}, [props.voiceChangerClient])
const setNoiseSuppression2 = useMemo(() => {
return async (val: boolean) => {
if (!props.voiceChangerClient) return
settingRef.current.noiseSuppression2 = val
await _setInput() await _setInput()
setSetting({ ...settingRef.current }) setSetting({ ...settingRef.current })
} }
@ -271,7 +289,9 @@ export const useClientSetting = (props: UseClientSettingProps): ClientSettingSta
setProtocol, setProtocol,
setAudioInput, setAudioInput,
setBufferSize, setBufferSize,
setVfForceDisabled, setEchoCancel,
setNoiseSuppression,
setNoiseSuppression2,
setInputChunkNum, setInputChunkNum,
setVoiceChangerMode, setVoiceChangerMode,
setDownSamplingMode, setDownSamplingMode,

View File

@ -49,6 +49,8 @@ export type ServerSettingState = {
setCrossFadeEndRate: (num: number) => Promise<boolean>; setCrossFadeEndRate: (num: number) => Promise<boolean>;
setCrossFadeOverlapRate: (num: number) => Promise<boolean>; setCrossFadeOverlapRate: (num: number) => Promise<boolean>;
setF0Factor: (num: number) => Promise<boolean>; setF0Factor: (num: number) => Promise<boolean>;
setF0Detector: (val: string) => Promise<boolean>;
setRecordIO: (num: number) => Promise<boolean>;
reloadServerInfo: () => Promise<void>; reloadServerInfo: () => Promise<void>;
setFileUploadSetting: (val: FileUploadSetting) => void setFileUploadSetting: (val: FileUploadSetting) => void
loadModel: () => Promise<void> loadModel: () => Promise<void>
@ -97,6 +99,9 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
props.voiceChangerClient.updateServerSettings(ServerSettingKey.crossFadeEndRate, "" + setting.crossFadeEndRate) props.voiceChangerClient.updateServerSettings(ServerSettingKey.crossFadeEndRate, "" + setting.crossFadeEndRate)
props.voiceChangerClient.updateServerSettings(ServerSettingKey.crossFadeOverlapRate, "" + setting.crossFadeOverlapRate) props.voiceChangerClient.updateServerSettings(ServerSettingKey.crossFadeOverlapRate, "" + setting.crossFadeOverlapRate)
props.voiceChangerClient.updateServerSettings(ServerSettingKey.f0Factor, "" + setting.f0Factor) props.voiceChangerClient.updateServerSettings(ServerSettingKey.f0Factor, "" + setting.f0Factor)
props.voiceChangerClient.updateServerSettings(ServerSettingKey.f0Detector, "" + setting.f0Detector)
props.voiceChangerClient.updateServerSettings(ServerSettingKey.recordIO, "" + setting.recordIO)
}, [props.voiceChangerClient]) }, [props.voiceChangerClient])
@ -123,7 +128,10 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
crossFadeOverlapRate: res.crossFadeOverlapRate, crossFadeOverlapRate: res.crossFadeOverlapRate,
framework: res.framework, framework: res.framework,
onnxExecutionProvider: (!!res.onnxExecutionProvider && res.onnxExecutionProvider.length > 0) ? res.onnxExecutionProvider[0] as OnnxExecutionProvider : DefaultVoiceChangerServerSetting.onnxExecutionProvider, onnxExecutionProvider: (!!res.onnxExecutionProvider && res.onnxExecutionProvider.length > 0) ? res.onnxExecutionProvider[0] as OnnxExecutionProvider : DefaultVoiceChangerServerSetting.onnxExecutionProvider,
f0Factor: res.f0Factor f0Factor: res.f0Factor,
f0Detector: res.f0Detector,
recordIO: res.recordIO
} }
_setSetting(newSetting) _setSetting(newSetting)
setItem(INDEXEDDB_KEY_SERVER, newSetting) setItem(INDEXEDDB_KEY_SERVER, newSetting)
@ -199,6 +207,17 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
return await _set_and_store(ServerSettingKey.f0Factor, "" + num) return await _set_and_store(ServerSettingKey.f0Factor, "" + num)
} }
}, [props.voiceChangerClient]) }, [props.voiceChangerClient])
const setF0Detector = useMemo(() => {
return async (val: string) => {
return await _set_and_store(ServerSettingKey.f0Detector, "" + val)
}
}, [props.voiceChangerClient])
const setRecordIO = useMemo(() => {
return async (num: number) => {
return await _set_and_store(ServerSettingKey.recordIO, "" + num)
}
}, [props.voiceChangerClient])
////////////// //////////////
// 操作 // 操作
///////////// /////////////
@ -337,7 +356,9 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
crossFadeOverlapRate: res.crossFadeOverlapRate, crossFadeOverlapRate: res.crossFadeOverlapRate,
framework: res.framework, framework: res.framework,
onnxExecutionProvider: (!!res.onnxExecutionProvider && res.onnxExecutionProvider.length > 0) ? res.onnxExecutionProvider[0] as OnnxExecutionProvider : DefaultVoiceChangerServerSetting.onnxExecutionProvider, onnxExecutionProvider: (!!res.onnxExecutionProvider && res.onnxExecutionProvider.length > 0) ? res.onnxExecutionProvider[0] as OnnxExecutionProvider : DefaultVoiceChangerServerSetting.onnxExecutionProvider,
f0Factor: res.f0Factor f0Factor: res.f0Factor,
f0Detector: res.f0Detector,
recordIO: res.recordIO
}) })
} }
}, [props.voiceChangerClient]) }, [props.voiceChangerClient])
@ -364,6 +385,8 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
setCrossFadeEndRate, setCrossFadeEndRate,
setCrossFadeOverlapRate, setCrossFadeOverlapRate,
setF0Factor, setF0Factor,
setF0Detector,
setRecordIO,
reloadServerInfo, reloadServerInfo,
setFileUploadSetting, setFileUploadSetting,
loadModel, loadModel,

View File

@ -16,6 +16,9 @@ NATIVE_CLIENT_FILE_MAC = os.path.join(sys._MEIPASS, "voice-changer-native-client
"voice-changer-native-client") if hasattr(sys, "_MEIPASS") else "voice-changer-native-client" "voice-changer-native-client") if hasattr(sys, "_MEIPASS") else "voice-changer-native-client"
TMP_DIR = os.path.join(tmpdir.name, "tmp_dir") if hasattr(sys, "_MEIPASS") else "tmp_dir"
os.makedirs(TMP_DIR, exist_ok=True)
# SSL_KEY_DIR = os.path.join(sys._MEIPASS, "keys") if hasattr(sys, "_MEIPASS") else "keys" # SSL_KEY_DIR = os.path.join(sys._MEIPASS, "keys") if hasattr(sys, "_MEIPASS") else "keys"
# MODEL_DIR = os.path.join(sys._MEIPASS, "logs") if hasattr(sys, "_MEIPASS") else "logs" # MODEL_DIR = os.path.join(sys._MEIPASS, "logs") if hasattr(sys, "_MEIPASS") else "logs"
# UPLOAD_DIR = os.path.join(sys._MEIPASS, "upload_dir") if hasattr(sys, "_MEIPASS") else "upload_dir" # UPLOAD_DIR = os.path.join(sys._MEIPASS, "upload_dir") if hasattr(sys, "_MEIPASS") else "upload_dir"

View File

@ -9,7 +9,7 @@ from restapi.MMVC_Rest_Hello import MMVC_Rest_Hello
from restapi.MMVC_Rest_VoiceChanger import MMVC_Rest_VoiceChanger from restapi.MMVC_Rest_VoiceChanger import MMVC_Rest_VoiceChanger
from restapi.MMVC_Rest_Fileuploader import MMVC_Rest_Fileuploader from restapi.MMVC_Rest_Fileuploader import MMVC_Rest_Fileuploader
from restapi.MMVC_Rest_Trainer import MMVC_Rest_Trainer from restapi.MMVC_Rest_Trainer import MMVC_Rest_Trainer
from const import frontend_path from const import frontend_path, TMP_DIR
class ValidationErrorLoggingRoute(APIRoute): class ValidationErrorLoggingRoute(APIRoute):
@ -27,6 +27,7 @@ class ValidationErrorLoggingRoute(APIRoute):
return custom_route_handler return custom_route_handler
class MMVC_Rest: class MMVC_Rest:
@classmethod @classmethod
@ -50,6 +51,8 @@ class MMVC_Rest:
app_fastapi.mount( app_fastapi.mount(
"/recorder", StaticFiles(directory=f'{frontend_path}', html=True), name="static") "/recorder", StaticFiles(directory=f'{frontend_path}', html=True), name="static")
app_fastapi.mount(
"/tmp", StaticFiles(directory=f'{TMP_DIR}'), name="static")
restHello = MMVC_Rest_Hello() restHello = MMVC_Rest_Hello()
app_fastapi.include_router(restHello.router) app_fastapi.include_router(restHello.router)

View File

@ -4,6 +4,7 @@ import numpy as np
import socketio import socketio
from voice_changer.VoiceChangerManager import VoiceChangerManager from voice_changer.VoiceChangerManager import VoiceChangerManager
class MMVC_Namespace(socketio.AsyncNamespace): class MMVC_Namespace(socketio.AsyncNamespace):
def __init__(self, namespace: str, voiceChangerManager: VoiceChangerManager): def __init__(self, namespace: str, voiceChangerManager: VoiceChangerManager):
super().__init__(namespace) super().__init__(namespace)
@ -36,4 +37,3 @@ class MMVC_Namespace(socketio.AsyncNamespace):
def on_disconnect(self, sid): def on_disconnect(self, sid):
# print('[{}] disconnect'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))) # print('[{}] disconnect'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
pass pass

2
server/tmp_dir/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*
!.gitignore

View File

@ -1,4 +1,4 @@
from const import ERROR_NO_ONNX_SESSION from const import ERROR_NO_ONNX_SESSION, TMP_DIR
import torch import torch
import os import os
import traceback import traceback
@ -84,15 +84,17 @@ class VocieChangerSettings():
minConvertSize: int = 0 minConvertSize: int = 0
framework: str = "PyTorch" # PyTorch or ONNX framework: str = "PyTorch" # PyTorch or ONNX
f0Factor: float = 1.0 f0Factor: float = 1.0
f0Detector: str = "dio" # dio or harvest
recordIO: int = 1 # 0:off, 1:on
pyTorchModelFile: str = "" pyTorchModelFile: str = ""
onnxModelFile: str = "" onnxModelFile: str = ""
configFile: str = "" configFile: str = ""
# ↓mutableな物だけ列挙 # ↓mutableな物だけ列挙
intData = ["gpu", "srcId", "dstId", "convertChunkNum", "minConvertSize"] intData = ["gpu", "srcId", "dstId", "convertChunkNum", "minConvertSize", "recordIO"]
floatData = ["crossFadeOffsetRate", "crossFadeEndRate", "crossFadeOverlapRate", "f0Factor"] floatData = ["crossFadeOffsetRate", "crossFadeEndRate", "crossFadeOverlapRate", "f0Factor"]
strData = ["framework"] strData = ["framework", "f0Detector"]
class VoiceChanger(): class VoiceChanger():
@ -113,15 +115,26 @@ class VoiceChanger():
self.prev_audio = np.zeros(1) self.prev_audio = np.zeros(1)
self.mps_enabled = getattr(torch.backends, "mps", None) is not None and torch.backends.mps.is_available() self.mps_enabled = getattr(torch.backends, "mps", None) is not None and torch.backends.mps.is_available()
mock_stream = MockStream(24000) self._setupRecordIO()
mock_stream.open_outputfile("out.wav")
self.out = mock_stream
mock_stream_in = MockStream(24000)
mock_stream_in.open_outputfile("in.wav")
self.stream_in = mock_stream_in
print(f"VoiceChanger Initialized (GPU_NUM:{self.gpu_num}, mps_enabled:{self.mps_enabled})") print(f"VoiceChanger Initialized (GPU_NUM:{self.gpu_num}, mps_enabled:{self.mps_enabled})")
def _setupRecordIO(self):
# IO Recorder Setup
mock_stream_out = MockStream(24000)
stream_output_file = os.path.join(TMP_DIR, "out.wav")
if os.path.exists(stream_output_file):
os.unlink(stream_output_file)
mock_stream_out.open_outputfile(stream_output_file)
self.stream_out = mock_stream_out
mock_stream_in = MockStream(24000)
stream_input_file = os.path.join(TMP_DIR, "in.wav")
if os.path.exists(stream_input_file):
os.unlink(stream_input_file)
mock_stream_in.open_outputfile(stream_input_file)
self.stream_in = mock_stream_in
def loadModel(self, config: str, pyTorch_model_file: str = None, onnx_model_file: str = None): def loadModel(self, config: str, pyTorch_model_file: str = None, onnx_model_file: str = None):
self.settings.configFile = config self.settings.configFile = config
self.hps = get_hparams_from_file(config) self.hps = get_hparams_from_file(config)
@ -200,6 +213,8 @@ class VoiceChanger():
self.onnx_session.set_providers(providers=["CUDAExecutionProvider"], provider_options=provider_options) self.onnx_session.set_providers(providers=["CUDAExecutionProvider"], provider_options=provider_options)
if key == "crossFadeOffsetRate" or key == "crossFadeEndRate": if key == "crossFadeOffsetRate" or key == "crossFadeEndRate":
self.unpackedData_length = 0 self.unpackedData_length = 0
if key == "recordIO" and val == 1:
self._setupRecordIO()
elif key in self.settings.floatData: elif key in self.settings.floatData:
setattr(self.settings, key, float(val)) setattr(self.settings, key, float(val))
elif key in self.settings.strData: elif key in self.settings.strData:
@ -256,10 +271,11 @@ class VoiceChanger():
# TBD: numpy <--> pytorch変換が行ったり来たりしているが、まずは動かすことを最優先。 # TBD: numpy <--> pytorch変換が行ったり来たりしているが、まずは動かすことを最優先。
audio_norm_np = audio_norm.squeeze().numpy().astype(np.float64) audio_norm_np = audio_norm.squeeze().numpy().astype(np.float64)
if self.settings.f0Detector == "dio":
_f0, _time = pw.dio(audio_norm_np, self.hps.data.sampling_rate, frame_period=5.5) _f0, _time = pw.dio(audio_norm_np, self.hps.data.sampling_rate, frame_period=5.5)
f0 = pw.stonemask(audio_norm_np, _f0, _time, self.hps.data.sampling_rate) f0 = pw.stonemask(audio_norm_np, _f0, _time, self.hps.data.sampling_rate)
# print("type:", audio_norm_np.dtype) else:
# f0, t = pw.harvest(audio_norm_np, self.hps.data.sampling_rate, frame_period=5.5, f0_floor=71.0, f0_ceil=1000.0) f0, t = pw.harvest(audio_norm_np, self.hps.data.sampling_rate, frame_period=5.5, f0_floor=71.0, f0_ceil=1000.0)
f0 = convert_continuos_f0(f0, int(audio_norm_np.shape[0] / self.hps.data.hop_length)) f0 = convert_continuos_f0(f0, int(audio_norm_np.shape[0] / self.hps.data.hop_length))
f0 = torch.from_numpy(f0.astype(np.float32)) f0 = torch.from_numpy(f0.astype(np.float32))
@ -280,7 +296,7 @@ class VoiceChanger():
f0_factor=self.settings.f0Factor f0_factor=self.settings.f0Factor
)([(spec, sid, f0)]) )([(spec, sid, f0)])
return data return data, f0.numpy()
def _onnx_inference(self, data, inputSize): def _onnx_inference(self, data, inputSize):
if hasattr(self, "onnx_session") == False or self.onnx_session == None: if hasattr(self, "onnx_session") == False or self.onnx_session == None:
@ -401,7 +417,6 @@ class VoiceChanger():
def on_request(self, unpackedData: any): def on_request(self, unpackedData: any):
convertSize = self.settings.convertChunkNum * 128 # 128sample/1chunk convertSize = self.settings.convertChunkNum * 128 # 128sample/1chunk
self.stream_in.write(unpackedData.astype(np.int16).tobytes())
# print("convsize:", unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate)) # print("convsize:", unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate))
if unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate) + 1024 > convertSize: if unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate) + 1024 > convertSize:
convertSize = int(unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate)) + 1024 convertSize = int(unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate)) + 1024
@ -412,7 +427,8 @@ class VoiceChanger():
# convertSize = 8192 # convertSize = 8192
self._generate_strength(unpackedData) self._generate_strength(unpackedData)
data = self._generate_input(unpackedData, convertSize) # f0はデバッグ用
data, f0 = self._generate_input(unpackedData, convertSize)
try: try:
if self.settings.framework == "ONNX": if self.settings.framework == "ONNX":
@ -431,6 +447,9 @@ class VoiceChanger():
result = result.astype(np.int16) result = result.astype(np.int16)
# print("on_request result size:",result.shape) # print("on_request result size:",result.shape)
if self.settings.recordIO == 1:
self.stream_in.write(unpackedData.astype(np.int16).tobytes())
self.stream_out.write(result.tobytes())
return result return result