mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-01-23 13:35:12 +03:00
add quality control
This commit is contained in:
parent
852b4216ca
commit
f73178b9b0
23
client/demo/dist/index.js
vendored
23
client/demo/dist/index.js
vendored
File diff suppressed because one or more lines are too long
@ -8,6 +8,7 @@ import { useAdvancedSetting } from "./105_advanced_setting";
|
|||||||
import { useSpeakerSetting } from "./103_speaker_setting";
|
import { useSpeakerSetting } from "./103_speaker_setting";
|
||||||
import { useServerControl } from "./106_server_control";
|
import { useServerControl } from "./106_server_control";
|
||||||
import { useClient } from "@dannadori/voice-changer-client-js";
|
import { useClient } from "@dannadori/voice-changer-client-js";
|
||||||
|
import { useQualityControl } from "./107_qulity_control";
|
||||||
|
|
||||||
export const useMicrophoneOptions = () => {
|
export const useMicrophoneOptions = () => {
|
||||||
const [audioContext, setAudioContext] = useState<AudioContext | null>(null)
|
const [audioContext, setAudioContext] = useState<AudioContext | null>(null)
|
||||||
@ -23,6 +24,7 @@ export const useMicrophoneOptions = () => {
|
|||||||
const convertSetting = useConvertSetting({ clientState })
|
const convertSetting = useConvertSetting({ clientState })
|
||||||
const advancedSetting = useAdvancedSetting({ clientState })
|
const advancedSetting = useAdvancedSetting({ clientState })
|
||||||
const serverControl = useServerControl({ clientState })
|
const serverControl = useServerControl({ clientState })
|
||||||
|
const qualityControl = useQualityControl({ clientState })
|
||||||
|
|
||||||
const clearSetting = async () => {
|
const clearSetting = async () => {
|
||||||
await clientState.clearSetting()
|
await clientState.clearSetting()
|
||||||
@ -51,6 +53,7 @@ export const useMicrophoneOptions = () => {
|
|||||||
{serverControl.serverControl}
|
{serverControl.serverControl}
|
||||||
{serverSetting.serverSetting}
|
{serverSetting.serverSetting}
|
||||||
{deviceSetting.deviceSetting}
|
{deviceSetting.deviceSetting}
|
||||||
|
{qualityControl.qualityControl}
|
||||||
{speakerSetting.speakerSetting}
|
{speakerSetting.speakerSetting}
|
||||||
{convertSetting.convertSetting}
|
{convertSetting.convertSetting}
|
||||||
{advancedSetting.advancedSetting}
|
{advancedSetting.advancedSetting}
|
||||||
@ -61,7 +64,8 @@ export const useMicrophoneOptions = () => {
|
|||||||
deviceSetting.deviceSetting,
|
deviceSetting.deviceSetting,
|
||||||
speakerSetting.speakerSetting,
|
speakerSetting.speakerSetting,
|
||||||
convertSetting.convertSetting,
|
convertSetting.convertSetting,
|
||||||
advancedSetting.advancedSetting])
|
advancedSetting.advancedSetting,
|
||||||
|
qualityControl.qualityControl])
|
||||||
|
|
||||||
return {
|
return {
|
||||||
voiceChangerSetting,
|
voiceChangerSetting,
|
||||||
|
@ -129,14 +129,13 @@ export const useSpeakerSetting = (props: UseSpeakerSettingProps) => {
|
|||||||
<div className="body-row split-3-2-1-4 left-padding-1 guided">
|
<div className="body-row split-3-2-1-4 left-padding-1 guided">
|
||||||
<div className="body-item-title left-padding-1">F0 Factor</div>
|
<div className="body-item-title left-padding-1">F0 Factor</div>
|
||||||
<div className="body-input-container">
|
<div className="body-input-container">
|
||||||
<input type="range" className="body-item-input" min="0.1" max="5.0" step="0.1" value={props.clientState.serverSetting.setting.f0Factor} onChange={(e) => {
|
<input type="range" className="body-item-input-slider" min="0.1" max="5.0" step="0.1" value={props.clientState.serverSetting.setting.f0Factor} onChange={(e) => {
|
||||||
props.clientState.serverSetting.setF0Factor(Number(e.target.value))
|
props.clientState.serverSetting.setF0Factor(Number(e.target.value))
|
||||||
}}></input>
|
}}></input>
|
||||||
|
<span className="body-item-input-slider-val">{props.clientState.serverSetting.setting.f0Factor}</span>
|
||||||
</div>
|
</div>
|
||||||
<div className="body-item-text">
|
<div className="body-item-text"></div>
|
||||||
<div>{props.clientState.serverSetting.setting.f0Factor}</div>
|
<div className="body-item-text">recommend: {recommendedF0Factor.toFixed(1)}</div>
|
||||||
</div>
|
|
||||||
<div className="body-item-text">recommended:{recommendedF0Factor.toFixed(1)}</div>
|
|
||||||
</div>
|
</div>
|
||||||
)
|
)
|
||||||
}, [props.clientState.serverSetting.setting.f0Factor, props.clientState.serverSetting.setting.srcId, props.clientState.serverSetting.setting.dstId, props.clientState.clientSetting.setting.correspondences, props.clientState.serverSetting.setF0Factor])
|
}, [props.clientState.serverSetting.setting.f0Factor, props.clientState.serverSetting.setting.srcId, props.clientState.serverSetting.setting.dstId, props.clientState.clientSetting.setting.correspondences, props.clientState.serverSetting.setF0Factor])
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
import { BufferSize, DownSamplingMode, Protocol, SampleRate, VoiceChangerMode } from "@dannadori/voice-changer-client-js"
|
import { BufferSize, DownSamplingMode, F0Detector, Protocol, SampleRate, VoiceChangerMode } from "@dannadori/voice-changer-client-js"
|
||||||
import React, { useMemo, useState } from "react"
|
import React, { useMemo, useState } from "react"
|
||||||
import { ClientState } from "@dannadori/voice-changer-client-js";
|
import { ClientState } from "@dannadori/voice-changer-client-js";
|
||||||
|
|
||||||
@ -162,21 +162,6 @@ export const useAdvancedSetting = (props: UseAdvancedSettingProps): AdvancedSett
|
|||||||
}, [props.clientState.serverSetting.setting.crossFadeEndRate, props.clientState.serverSetting.setCrossFadeEndRate])
|
}, [props.clientState.serverSetting.setting.crossFadeEndRate, props.clientState.serverSetting.setCrossFadeEndRate])
|
||||||
|
|
||||||
|
|
||||||
const vfForceDisableRow = useMemo(() => {
|
|
||||||
return (
|
|
||||||
<div className="body-row split-3-3-4 left-padding-1 guided">
|
|
||||||
<div className="body-item-title left-padding-1 ">VF Disabled</div>
|
|
||||||
<div>
|
|
||||||
<input type="checkbox" checked={props.clientState.clientSetting.setting.forceVfDisable} onChange={(e) => {
|
|
||||||
props.clientState.clientSetting.setVfForceDisabled(e.target.checked)
|
|
||||||
}} />
|
|
||||||
</div>
|
|
||||||
<div className="body-button-container">
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
)
|
|
||||||
}, [props.clientState.clientSetting.setting.forceVfDisable, props.clientState.clientSetting.setVfForceDisabled])
|
|
||||||
|
|
||||||
const voiceChangeModeRow = useMemo(() => {
|
const voiceChangeModeRow = useMemo(() => {
|
||||||
return (
|
return (
|
||||||
<div className="body-row split-3-7 left-padding-1 guided">
|
<div className="body-row split-3-7 left-padding-1 guided">
|
||||||
@ -217,7 +202,6 @@ export const useAdvancedSetting = (props: UseAdvancedSettingProps): AdvancedSett
|
|||||||
}, [props.clientState.clientSetting.setting.downSamplingMode, props.clientState.clientSetting.setDownSamplingMode])
|
}, [props.clientState.clientSetting.setting.downSamplingMode, props.clientState.clientSetting.setDownSamplingMode])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
const workletSettingRow = useMemo(() => {
|
const workletSettingRow = useMemo(() => {
|
||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
@ -280,15 +264,15 @@ export const useAdvancedSetting = (props: UseAdvancedSettingProps): AdvancedSett
|
|||||||
{crossFadeOffsetRateRow}
|
{crossFadeOffsetRateRow}
|
||||||
{crossFadeEndRateRow}
|
{crossFadeEndRateRow}
|
||||||
<div className="body-row divider"></div>
|
<div className="body-row divider"></div>
|
||||||
{vfForceDisableRow}
|
|
||||||
{voiceChangeModeRow}
|
{voiceChangeModeRow}
|
||||||
<div className="body-row divider"></div>
|
<div className="body-row divider"></div>
|
||||||
{workletSettingRow}
|
{workletSettingRow}
|
||||||
<div className="body-row divider"></div>
|
<div className="body-row divider"></div>
|
||||||
{downSamplingModeRow}
|
{downSamplingModeRow}
|
||||||
|
|
||||||
</>
|
</>
|
||||||
)
|
)
|
||||||
}, [showAdvancedSetting, mmvcServerUrlRow, protocolRow, sampleRateRow, bufferSizeRow, convertChunkNumRow, minConvertSizeRow, crossFadeOverlapRateRow, crossFadeOffsetRateRow, crossFadeEndRateRow, vfForceDisableRow, voiceChangeModeRow, workletSettingRow, downSamplingModeRow])
|
}, [showAdvancedSetting, mmvcServerUrlRow, protocolRow, sampleRateRow, bufferSizeRow, convertChunkNumRow, minConvertSizeRow, crossFadeOverlapRateRow, crossFadeOffsetRateRow, crossFadeEndRateRow, voiceChangeModeRow, workletSettingRow, downSamplingModeRow])
|
||||||
|
|
||||||
|
|
||||||
const advancedSetting = useMemo(() => {
|
const advancedSetting = useMemo(() => {
|
||||||
|
@ -30,31 +30,13 @@ export const useServerControl = (props: UseServerControlProps) => {
|
|||||||
<div onClick={onStopClicked} className={stopClassName}>stop</div>
|
<div onClick={onStopClicked} className={stopClassName}>stop</div>
|
||||||
</div>
|
</div>
|
||||||
<div>
|
<div>
|
||||||
<div className="body-input-container split-4-4-2">
|
|
||||||
<div>gain(in)</div>
|
|
||||||
<input type="range" className="body-item-input" min="0.0" max="1.0" step="0.1" value={props.clientState.clientSetting.setting.inputGain} onChange={(e) => {
|
|
||||||
props.clientState.clientSetting.setInputGain(Number(e.target.value))
|
|
||||||
}}></input>
|
|
||||||
<div>{props.clientState.clientSetting.setting.inputGain}</div>
|
|
||||||
</div>
|
</div>
|
||||||
<div className="body-input-container split-4-4-2">
|
|
||||||
<div>gain(out)</div>
|
|
||||||
<input type="range" className="body-item-input" min="0.0" max="1.0" step="0.1" value={props.clientState.clientSetting.setting.outputGain} onChange={(e) => {
|
|
||||||
props.clientState.clientSetting.setOutputGain(Number(e.target.value))
|
|
||||||
}}></input>
|
|
||||||
<div>{props.clientState.clientSetting.setting.outputGain}</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div className="body-input-container">
|
<div className="body-input-container">
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
)
|
)
|
||||||
}, [isStarted, props.clientState.clientSetting.start, props.clientState.clientSetting.stop,
|
}, [isStarted, props.clientState.clientSetting.start, props.clientState.clientSetting.stop])
|
||||||
props.clientState.clientSetting.setInputGain, props.clientState.clientSetting.setting.inputGain,
|
|
||||||
props.clientState.clientSetting.setOutputGain, props.clientState.clientSetting.setting.outputGain
|
|
||||||
])
|
|
||||||
|
|
||||||
const performanceRow = useMemo(() => {
|
const performanceRow = useMemo(() => {
|
||||||
return (
|
return (
|
||||||
|
150
client/demo/src/107_qulity_control.tsx
Normal file
150
client/demo/src/107_qulity_control.tsx
Normal file
@ -0,0 +1,150 @@
|
|||||||
|
import { BufferSize, DownSamplingMode, F0Detector, Protocol, SampleRate, VoiceChangerMode } from "@dannadori/voice-changer-client-js"
|
||||||
|
import React, { useMemo, useState } from "react"
|
||||||
|
import { ClientState } from "@dannadori/voice-changer-client-js";
|
||||||
|
|
||||||
|
|
||||||
|
export type UseQualityControlProps = {
|
||||||
|
clientState: ClientState
|
||||||
|
}
|
||||||
|
|
||||||
|
export type QualityControlState = {
|
||||||
|
qualityControl: JSX.Element;
|
||||||
|
}
|
||||||
|
|
||||||
|
export const useQualityControl = (props: UseQualityControlProps): QualityControlState => {
|
||||||
|
const [showQualityControl, setShowQualityControl] = useState<boolean>(false)
|
||||||
|
|
||||||
|
|
||||||
|
const noiseControlRow = useMemo(() => {
|
||||||
|
return (
|
||||||
|
<div className="body-row split-3-2-2-2-1 left-padding-1 guided">
|
||||||
|
<div className="body-item-title left-padding-1 ">Noise Suppression</div>
|
||||||
|
<div>
|
||||||
|
<input type="checkbox" checked={props.clientState.clientSetting.setting.echoCancel} onChange={(e) => {
|
||||||
|
props.clientState.clientSetting.setEchoCancel(e.target.checked)
|
||||||
|
}} /> echo cancel
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<input type="checkbox" checked={props.clientState.clientSetting.setting.noiseSuppression} onChange={(e) => {
|
||||||
|
props.clientState.clientSetting.setNoiseSuppression(e.target.checked)
|
||||||
|
}} /> suppression1
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<input type="checkbox" checked={props.clientState.clientSetting.setting.noiseSuppression2} onChange={(e) => {
|
||||||
|
props.clientState.clientSetting.setNoiseSuppression2(e.target.checked)
|
||||||
|
}} /> suppression2
|
||||||
|
</div>
|
||||||
|
<div className="body-button-container">
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}, [
|
||||||
|
props.clientState.clientSetting.setting.echoCancel, props.clientState.clientSetting.setEchoCancel,
|
||||||
|
props.clientState.clientSetting.setting.noiseSuppression, props.clientState.clientSetting.setNoiseSuppression,
|
||||||
|
props.clientState.clientSetting.setting.noiseSuppression2, props.clientState.clientSetting.setNoiseSuppression2,
|
||||||
|
])
|
||||||
|
|
||||||
|
const gainControlRow = useMemo(() => {
|
||||||
|
return (
|
||||||
|
<div className="body-row split-3-2-2-3 left-padding-1 guided">
|
||||||
|
<div className="body-item-title left-padding-1 ">Gain Control</div>
|
||||||
|
<div>
|
||||||
|
<span className="body-item-input-slider-label">in</span>
|
||||||
|
<input type="range" className="body-item-input-slider" min="0.0" max="1.0" step="0.1" value={props.clientState.clientSetting.setting.inputGain} onChange={(e) => {
|
||||||
|
props.clientState.clientSetting.setInputGain(Number(e.target.value))
|
||||||
|
}}></input>
|
||||||
|
<span className="body-item-input-slider-val">{props.clientState.clientSetting.setting.inputGain}</span>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<span className="body-item-input-slider-label">out</span>
|
||||||
|
<input type="range" className="body-item-input-slider" min="0.0" max="1.0" step="0.1" value={props.clientState.clientSetting.setting.outputGain} onChange={(e) => {
|
||||||
|
props.clientState.clientSetting.setOutputGain(Number(e.target.value))
|
||||||
|
}}></input>
|
||||||
|
<span className="body-item-input-slider-val">{props.clientState.clientSetting.setting.outputGain}</span>
|
||||||
|
</div>
|
||||||
|
<div className="body-button-container">
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}, [
|
||||||
|
props.clientState.clientSetting.setting.inputGain, props.clientState.clientSetting.setting.inputGain,
|
||||||
|
props.clientState.clientSetting.setting.outputGain, props.clientState.clientSetting.setOutputGain,
|
||||||
|
])
|
||||||
|
|
||||||
|
const f0DetectorRow = useMemo(() => {
|
||||||
|
const desc = { "harvest": "High Quality", "dio": "Light Weight" }
|
||||||
|
return (
|
||||||
|
<div className="body-row split-3-7 left-padding-1 guided">
|
||||||
|
<div className="body-item-title left-padding-1 ">F0 Detector</div>
|
||||||
|
<div className="body-select-container">
|
||||||
|
<select className="body-select" value={props.clientState.serverSetting.setting.f0Detector} onChange={(e) => {
|
||||||
|
props.clientState.serverSetting.setF0Detector(e.target.value as F0Detector)
|
||||||
|
}}>
|
||||||
|
{
|
||||||
|
Object.values(F0Detector).map(x => {
|
||||||
|
//@ts-ignore
|
||||||
|
return <option key={x} value={x}>{x}({desc[x]})</option>
|
||||||
|
})
|
||||||
|
}
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}, [props.clientState.serverSetting.setting.f0Detector, props.clientState.serverSetting.setF0Detector])
|
||||||
|
|
||||||
|
|
||||||
|
const recordIORow = useMemo(() => {
|
||||||
|
return (
|
||||||
|
<div className="body-row split-3-7 left-padding-1 guided">
|
||||||
|
<div className="body-item-title left-padding-1 ">recordIO</div>
|
||||||
|
<div className="body-select-container">
|
||||||
|
<select className="body-select" value={props.clientState.serverSetting.setting.recordIO} onChange={(e) => {
|
||||||
|
props.clientState.serverSetting.setRecordIO(Number(e.target.value))
|
||||||
|
}}>
|
||||||
|
{
|
||||||
|
Object.values([0, 1]).map(x => {
|
||||||
|
return <option key={x} value={x}>{x}</option>
|
||||||
|
})
|
||||||
|
}
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}, [props.clientState.serverSetting.setting.recordIO, props.clientState.serverSetting.setRecordIO])
|
||||||
|
|
||||||
|
const QualityControlContent = useMemo(() => {
|
||||||
|
if (!showQualityControl) return <></>
|
||||||
|
return (
|
||||||
|
<>
|
||||||
|
{noiseControlRow}
|
||||||
|
{gainControlRow}
|
||||||
|
{f0DetectorRow}
|
||||||
|
{recordIORow}
|
||||||
|
</>
|
||||||
|
)
|
||||||
|
}, [showQualityControl, gainControlRow, noiseControlRow, f0DetectorRow, recordIORow,])
|
||||||
|
|
||||||
|
|
||||||
|
const qualityControl = useMemo(() => {
|
||||||
|
return (
|
||||||
|
<>
|
||||||
|
<div className="body-row split-3-7 left-padding-1">
|
||||||
|
<div className="body-sub-section-title">Quality Control</div>
|
||||||
|
<div>
|
||||||
|
<input type="checkbox" checked={showQualityControl} onChange={(e) => {
|
||||||
|
setShowQualityControl(e.target.checked)
|
||||||
|
}} /> show
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{QualityControlContent}
|
||||||
|
</>
|
||||||
|
)
|
||||||
|
}, [showQualityControl, QualityControlContent])
|
||||||
|
|
||||||
|
return {
|
||||||
|
qualityControl,
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
@ -388,7 +388,16 @@ body {
|
|||||||
display: flex;
|
display: flex;
|
||||||
}
|
}
|
||||||
.body-item-input {
|
.body-item-input {
|
||||||
width: 90%;
|
width: 60%;
|
||||||
|
}
|
||||||
|
.body-item-input-slider {
|
||||||
|
width: 60%;
|
||||||
|
}
|
||||||
|
.body-item-input-slider-label {
|
||||||
|
margin-right: 1rem;
|
||||||
|
}
|
||||||
|
.body-item-input-slider-val {
|
||||||
|
margin-left: 1rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
.body-button-container {
|
.body-button-container {
|
||||||
|
@ -115,8 +115,10 @@ export class VoiceChangerClient {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// forceVfDisable is for the condition that vf is enabled in constructor.
|
// forceVfDisable is for the condition that vf is enabled in constructor.
|
||||||
setup = async (input: string | MediaStream, bufferSize: BufferSize, forceVfDisable: boolean = false) => {
|
//noiseSuppression2 => VoiceFocus
|
||||||
|
setup = async (input: string | MediaStream, bufferSize: BufferSize, echoCancel: boolean = true, noiseSuppression: boolean = true, noiseSuppression2: boolean = false) => {
|
||||||
const lockNum = await this.lock()
|
const lockNum = await this.lock()
|
||||||
|
console.log(`Input Setup=> echo: ${echoCancel}, noise1: ${noiseSuppression}, noise2: ${noiseSuppression2}`)
|
||||||
// condition check
|
// condition check
|
||||||
if (!this.vcNode) {
|
if (!this.vcNode) {
|
||||||
console.warn("vc node is not initialized.")
|
console.warn("vc node is not initialized.")
|
||||||
@ -136,10 +138,16 @@ export class VoiceChangerClient {
|
|||||||
channelCount: 1,
|
channelCount: 1,
|
||||||
sampleRate: 48000,
|
sampleRate: 48000,
|
||||||
sampleSize: 16,
|
sampleSize: 16,
|
||||||
// echoCancellation: false,
|
autoGainControl: false,
|
||||||
// noiseSuppression: false
|
echoCancellation: echoCancel,
|
||||||
|
noiseSuppression: noiseSuppression
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
// this.currentMediaStream.getAudioTracks().forEach((x) => {
|
||||||
|
// console.log("MIC Setting(cap)", x.getCapabilities())
|
||||||
|
// console.log("MIC Setting(const)", x.getConstraints())
|
||||||
|
// console.log("MIC Setting(setting)", x.getSettings())
|
||||||
|
// })
|
||||||
} else {
|
} else {
|
||||||
this.currentMediaStream = input
|
this.currentMediaStream = input
|
||||||
}
|
}
|
||||||
@ -160,14 +168,13 @@ export class VoiceChangerClient {
|
|||||||
this.inputGainNode = this.ctx.createGain()
|
this.inputGainNode = this.ctx.createGain()
|
||||||
this.inputGainNode.gain.value = this.inputGain
|
this.inputGainNode.gain.value = this.inputGain
|
||||||
this.currentMediaStreamAudioSourceNode.connect(this.inputGainNode)
|
this.currentMediaStreamAudioSourceNode.connect(this.inputGainNode)
|
||||||
if (this.currentDevice && forceVfDisable == false) {
|
if (this.currentDevice && noiseSuppression2) {
|
||||||
this.currentDevice.chooseNewInnerDevice(this.currentMediaStream)
|
this.currentDevice.chooseNewInnerDevice(this.currentMediaStream)
|
||||||
const voiceFocusNode = await this.currentDevice.createAudioNode(this.ctx); // vf node
|
const voiceFocusNode = await this.currentDevice.createAudioNode(this.ctx); // vf node
|
||||||
this.inputGainNode.connect(voiceFocusNode.start) // input node -> vf node
|
this.inputGainNode.connect(voiceFocusNode.start) // input node -> vf node
|
||||||
voiceFocusNode.end.connect(this.outputNodeFromVF!)
|
voiceFocusNode.end.connect(this.outputNodeFromVF!)
|
||||||
this.micStream.setStream(this.outputNodeFromVF!.stream) // vf node -> mic stream
|
this.micStream.setStream(this.outputNodeFromVF!.stream) // vf node -> mic stream
|
||||||
} else {
|
} else {
|
||||||
console.log("VF disabled")
|
|
||||||
const inputDestinationNodeForMicStream = this.ctx.createMediaStreamDestination()
|
const inputDestinationNodeForMicStream = this.ctx.createMediaStreamDestination()
|
||||||
this.inputGainNode.connect(inputDestinationNodeForMicStream)
|
this.inputGainNode.connect(inputDestinationNodeForMicStream)
|
||||||
this.micStream.setStream(inputDestinationNodeForMicStream.stream) // input device -> mic stream
|
this.micStream.setStream(inputDestinationNodeForMicStream.stream) // input device -> mic stream
|
||||||
|
@ -23,6 +23,8 @@ export type VoiceChangerServerSetting = {
|
|||||||
onnxExecutionProvider: OnnxExecutionProvider,
|
onnxExecutionProvider: OnnxExecutionProvider,
|
||||||
|
|
||||||
f0Factor: number
|
f0Factor: number
|
||||||
|
f0Detector: string // dio or harvest
|
||||||
|
recordIO: number // 0:off, 1:on
|
||||||
}
|
}
|
||||||
|
|
||||||
export type VoiceChangerClientSetting = {
|
export type VoiceChangerClientSetting = {
|
||||||
@ -34,7 +36,9 @@ export type VoiceChangerClientSetting = {
|
|||||||
inputChunkNum: number, // n of (256 x n) for send buffer
|
inputChunkNum: number, // n of (256 x n) for send buffer
|
||||||
speakers: Speaker[],
|
speakers: Speaker[],
|
||||||
correspondences: Correspondence[],
|
correspondences: Correspondence[],
|
||||||
forceVfDisable: boolean,
|
echoCancel: boolean,
|
||||||
|
noiseSuppression: boolean,
|
||||||
|
noiseSuppression2: boolean,
|
||||||
voiceChangerMode: VoiceChangerMode,
|
voiceChangerMode: VoiceChangerMode,
|
||||||
downSamplingMode: DownSamplingMode,
|
downSamplingMode: DownSamplingMode,
|
||||||
|
|
||||||
@ -75,6 +79,8 @@ export type ServerInfo = {
|
|||||||
framework: Framework,
|
framework: Framework,
|
||||||
onnxExecutionProvider: string[]
|
onnxExecutionProvider: string[]
|
||||||
f0Factor: number
|
f0Factor: number
|
||||||
|
f0Detector: string
|
||||||
|
recordIO: number
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -130,6 +136,12 @@ export const Framework = {
|
|||||||
}
|
}
|
||||||
export type Framework = typeof Framework[keyof typeof Framework]
|
export type Framework = typeof Framework[keyof typeof Framework]
|
||||||
|
|
||||||
|
export const F0Detector = {
|
||||||
|
"dio": "dio",
|
||||||
|
"harvest": "harvest",
|
||||||
|
}
|
||||||
|
export type F0Detector = typeof F0Detector[keyof typeof F0Detector]
|
||||||
|
|
||||||
export const ServerSettingKey = {
|
export const ServerSettingKey = {
|
||||||
"srcId": "srcId",
|
"srcId": "srcId",
|
||||||
"dstId": "dstId",
|
"dstId": "dstId",
|
||||||
@ -141,7 +153,9 @@ export const ServerSettingKey = {
|
|||||||
"crossFadeOverlapRate": "crossFadeOverlapRate",
|
"crossFadeOverlapRate": "crossFadeOverlapRate",
|
||||||
"framework": "framework",
|
"framework": "framework",
|
||||||
"onnxExecutionProvider": "onnxExecutionProvider",
|
"onnxExecutionProvider": "onnxExecutionProvider",
|
||||||
"f0Factor": "f0Factor"
|
"f0Factor": "f0Factor",
|
||||||
|
"f0Detector": "f0Detector",
|
||||||
|
"recordIO": "recordIO"
|
||||||
} as const
|
} as const
|
||||||
export type ServerSettingKey = typeof ServerSettingKey[keyof typeof ServerSettingKey]
|
export type ServerSettingKey = typeof ServerSettingKey[keyof typeof ServerSettingKey]
|
||||||
|
|
||||||
@ -158,8 +172,9 @@ export const DefaultVoiceChangerServerSetting: VoiceChangerServerSetting = {
|
|||||||
crossFadeOverlapRate: 0.5,
|
crossFadeOverlapRate: 0.5,
|
||||||
framework: "PyTorch",
|
framework: "PyTorch",
|
||||||
f0Factor: 1.0,
|
f0Factor: 1.0,
|
||||||
onnxExecutionProvider: "CPUExecutionProvider"
|
onnxExecutionProvider: "CPUExecutionProvider",
|
||||||
|
f0Detector: "dio",
|
||||||
|
recordIO: 0
|
||||||
}
|
}
|
||||||
|
|
||||||
export const DefaultVoiceChangerClientSetting: VoiceChangerClientSetting = {
|
export const DefaultVoiceChangerClientSetting: VoiceChangerClientSetting = {
|
||||||
@ -192,7 +207,9 @@ export const DefaultVoiceChangerClientSetting: VoiceChangerClientSetting = {
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
correspondences: [],
|
correspondences: [],
|
||||||
forceVfDisable: false,
|
echoCancel: true,
|
||||||
|
noiseSuppression: true,
|
||||||
|
noiseSuppression2: false,
|
||||||
voiceChangerMode: "realtime",
|
voiceChangerMode: "realtime",
|
||||||
downSamplingMode: "average",
|
downSamplingMode: "average",
|
||||||
inputGain: 1.0,
|
inputGain: 1.0,
|
||||||
|
@ -17,7 +17,9 @@ export type ClientSettingState = {
|
|||||||
setProtocol: (proto: Protocol) => void;
|
setProtocol: (proto: Protocol) => void;
|
||||||
setAudioInput: (audioInput: string | MediaStream | null) => Promise<void>
|
setAudioInput: (audioInput: string | MediaStream | null) => Promise<void>
|
||||||
setBufferSize: (bufferSize: BufferSize) => Promise<void>
|
setBufferSize: (bufferSize: BufferSize) => Promise<void>
|
||||||
setVfForceDisabled: (vfForceDisabled: boolean) => Promise<void>
|
setEchoCancel: (voiceFocus: boolean) => Promise<void>
|
||||||
|
setNoiseSuppression: (voiceFocus: boolean) => Promise<void>
|
||||||
|
setNoiseSuppression2: (voiceFocus: boolean) => Promise<void>
|
||||||
setInputChunkNum: (num: number) => void;
|
setInputChunkNum: (num: number) => void;
|
||||||
setVoiceChangerMode: (mode: VoiceChangerMode) => void
|
setVoiceChangerMode: (mode: VoiceChangerMode) => void
|
||||||
setDownSamplingMode: (mode: DownSamplingMode) => void
|
setDownSamplingMode: (mode: DownSamplingMode) => void
|
||||||
@ -110,15 +112,13 @@ export const useClientSetting = (props: UseClientSettingProps): ClientSettingSta
|
|||||||
|
|
||||||
const _setInput = async () => {
|
const _setInput = async () => {
|
||||||
if (!props.voiceChangerClient) return
|
if (!props.voiceChangerClient) return
|
||||||
// console.log("[useClient] setup!(0)", settingRef.current.audioInput)
|
|
||||||
if (!settingRef.current.audioInput || settingRef.current.audioInput == "none") {
|
if (!settingRef.current.audioInput || settingRef.current.audioInput == "none") {
|
||||||
// console.log("[useClient] setup!(1)", settingRef.current.audioInput)
|
// console.log("[useClient] setup!(1)", settingRef.current.audioInput)
|
||||||
const ms = createDummyMediaStream(props.audioContext!)
|
const ms = createDummyMediaStream(props.audioContext!)
|
||||||
await props.voiceChangerClient.setup(ms, settingRef.current.bufferSize, settingRef.current.forceVfDisable)
|
await props.voiceChangerClient.setup(ms, settingRef.current.bufferSize, settingRef.current.echoCancel, settingRef.current.noiseSuppression, settingRef.current.noiseSuppression2)
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
// console.log("[useClient] setup!(2)", settingRef.current.audioInput)
|
// console.log("[useClient] setup!(2)", settingRef.current.audioInput)
|
||||||
await props.voiceChangerClient.setup(settingRef.current.audioInput, settingRef.current.bufferSize, settingRef.current.forceVfDisable)
|
await props.voiceChangerClient.setup(settingRef.current.audioInput, settingRef.current.bufferSize, settingRef.current.echoCancel, settingRef.current.noiseSuppression, settingRef.current.noiseSuppression2)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -140,10 +140,28 @@ export const useClientSetting = (props: UseClientSettingProps): ClientSettingSta
|
|||||||
}
|
}
|
||||||
}, [props.voiceChangerClient])
|
}, [props.voiceChangerClient])
|
||||||
|
|
||||||
const setVfForceDisabled = useMemo(() => {
|
const setEchoCancel = useMemo(() => {
|
||||||
return async (vfForceDisabled: boolean) => {
|
return async (val: boolean) => {
|
||||||
if (!props.voiceChangerClient) return
|
if (!props.voiceChangerClient) return
|
||||||
settingRef.current.forceVfDisable = vfForceDisabled
|
settingRef.current.echoCancel = val
|
||||||
|
await _setInput()
|
||||||
|
setSetting({ ...settingRef.current })
|
||||||
|
}
|
||||||
|
}, [props.voiceChangerClient])
|
||||||
|
|
||||||
|
const setNoiseSuppression = useMemo(() => {
|
||||||
|
return async (val: boolean) => {
|
||||||
|
if (!props.voiceChangerClient) return
|
||||||
|
settingRef.current.noiseSuppression = val
|
||||||
|
await _setInput()
|
||||||
|
setSetting({ ...settingRef.current })
|
||||||
|
}
|
||||||
|
}, [props.voiceChangerClient])
|
||||||
|
|
||||||
|
const setNoiseSuppression2 = useMemo(() => {
|
||||||
|
return async (val: boolean) => {
|
||||||
|
if (!props.voiceChangerClient) return
|
||||||
|
settingRef.current.noiseSuppression2 = val
|
||||||
await _setInput()
|
await _setInput()
|
||||||
setSetting({ ...settingRef.current })
|
setSetting({ ...settingRef.current })
|
||||||
}
|
}
|
||||||
@ -271,7 +289,9 @@ export const useClientSetting = (props: UseClientSettingProps): ClientSettingSta
|
|||||||
setProtocol,
|
setProtocol,
|
||||||
setAudioInput,
|
setAudioInput,
|
||||||
setBufferSize,
|
setBufferSize,
|
||||||
setVfForceDisabled,
|
setEchoCancel,
|
||||||
|
setNoiseSuppression,
|
||||||
|
setNoiseSuppression2,
|
||||||
setInputChunkNum,
|
setInputChunkNum,
|
||||||
setVoiceChangerMode,
|
setVoiceChangerMode,
|
||||||
setDownSamplingMode,
|
setDownSamplingMode,
|
||||||
|
@ -49,6 +49,8 @@ export type ServerSettingState = {
|
|||||||
setCrossFadeEndRate: (num: number) => Promise<boolean>;
|
setCrossFadeEndRate: (num: number) => Promise<boolean>;
|
||||||
setCrossFadeOverlapRate: (num: number) => Promise<boolean>;
|
setCrossFadeOverlapRate: (num: number) => Promise<boolean>;
|
||||||
setF0Factor: (num: number) => Promise<boolean>;
|
setF0Factor: (num: number) => Promise<boolean>;
|
||||||
|
setF0Detector: (val: string) => Promise<boolean>;
|
||||||
|
setRecordIO: (num: number) => Promise<boolean>;
|
||||||
reloadServerInfo: () => Promise<void>;
|
reloadServerInfo: () => Promise<void>;
|
||||||
setFileUploadSetting: (val: FileUploadSetting) => void
|
setFileUploadSetting: (val: FileUploadSetting) => void
|
||||||
loadModel: () => Promise<void>
|
loadModel: () => Promise<void>
|
||||||
@ -97,6 +99,9 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
|||||||
props.voiceChangerClient.updateServerSettings(ServerSettingKey.crossFadeEndRate, "" + setting.crossFadeEndRate)
|
props.voiceChangerClient.updateServerSettings(ServerSettingKey.crossFadeEndRate, "" + setting.crossFadeEndRate)
|
||||||
props.voiceChangerClient.updateServerSettings(ServerSettingKey.crossFadeOverlapRate, "" + setting.crossFadeOverlapRate)
|
props.voiceChangerClient.updateServerSettings(ServerSettingKey.crossFadeOverlapRate, "" + setting.crossFadeOverlapRate)
|
||||||
props.voiceChangerClient.updateServerSettings(ServerSettingKey.f0Factor, "" + setting.f0Factor)
|
props.voiceChangerClient.updateServerSettings(ServerSettingKey.f0Factor, "" + setting.f0Factor)
|
||||||
|
props.voiceChangerClient.updateServerSettings(ServerSettingKey.f0Detector, "" + setting.f0Detector)
|
||||||
|
props.voiceChangerClient.updateServerSettings(ServerSettingKey.recordIO, "" + setting.recordIO)
|
||||||
|
|
||||||
|
|
||||||
}, [props.voiceChangerClient])
|
}, [props.voiceChangerClient])
|
||||||
|
|
||||||
@ -123,7 +128,10 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
|||||||
crossFadeOverlapRate: res.crossFadeOverlapRate,
|
crossFadeOverlapRate: res.crossFadeOverlapRate,
|
||||||
framework: res.framework,
|
framework: res.framework,
|
||||||
onnxExecutionProvider: (!!res.onnxExecutionProvider && res.onnxExecutionProvider.length > 0) ? res.onnxExecutionProvider[0] as OnnxExecutionProvider : DefaultVoiceChangerServerSetting.onnxExecutionProvider,
|
onnxExecutionProvider: (!!res.onnxExecutionProvider && res.onnxExecutionProvider.length > 0) ? res.onnxExecutionProvider[0] as OnnxExecutionProvider : DefaultVoiceChangerServerSetting.onnxExecutionProvider,
|
||||||
f0Factor: res.f0Factor
|
f0Factor: res.f0Factor,
|
||||||
|
f0Detector: res.f0Detector,
|
||||||
|
recordIO: res.recordIO
|
||||||
|
|
||||||
}
|
}
|
||||||
_setSetting(newSetting)
|
_setSetting(newSetting)
|
||||||
setItem(INDEXEDDB_KEY_SERVER, newSetting)
|
setItem(INDEXEDDB_KEY_SERVER, newSetting)
|
||||||
@ -199,6 +207,17 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
|||||||
return await _set_and_store(ServerSettingKey.f0Factor, "" + num)
|
return await _set_and_store(ServerSettingKey.f0Factor, "" + num)
|
||||||
}
|
}
|
||||||
}, [props.voiceChangerClient])
|
}, [props.voiceChangerClient])
|
||||||
|
|
||||||
|
const setF0Detector = useMemo(() => {
|
||||||
|
return async (val: string) => {
|
||||||
|
return await _set_and_store(ServerSettingKey.f0Detector, "" + val)
|
||||||
|
}
|
||||||
|
}, [props.voiceChangerClient])
|
||||||
|
const setRecordIO = useMemo(() => {
|
||||||
|
return async (num: number) => {
|
||||||
|
return await _set_and_store(ServerSettingKey.recordIO, "" + num)
|
||||||
|
}
|
||||||
|
}, [props.voiceChangerClient])
|
||||||
//////////////
|
//////////////
|
||||||
// 操作
|
// 操作
|
||||||
/////////////
|
/////////////
|
||||||
@ -337,7 +356,9 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
|||||||
crossFadeOverlapRate: res.crossFadeOverlapRate,
|
crossFadeOverlapRate: res.crossFadeOverlapRate,
|
||||||
framework: res.framework,
|
framework: res.framework,
|
||||||
onnxExecutionProvider: (!!res.onnxExecutionProvider && res.onnxExecutionProvider.length > 0) ? res.onnxExecutionProvider[0] as OnnxExecutionProvider : DefaultVoiceChangerServerSetting.onnxExecutionProvider,
|
onnxExecutionProvider: (!!res.onnxExecutionProvider && res.onnxExecutionProvider.length > 0) ? res.onnxExecutionProvider[0] as OnnxExecutionProvider : DefaultVoiceChangerServerSetting.onnxExecutionProvider,
|
||||||
f0Factor: res.f0Factor
|
f0Factor: res.f0Factor,
|
||||||
|
f0Detector: res.f0Detector,
|
||||||
|
recordIO: res.recordIO
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}, [props.voiceChangerClient])
|
}, [props.voiceChangerClient])
|
||||||
@ -364,6 +385,8 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
|||||||
setCrossFadeEndRate,
|
setCrossFadeEndRate,
|
||||||
setCrossFadeOverlapRate,
|
setCrossFadeOverlapRate,
|
||||||
setF0Factor,
|
setF0Factor,
|
||||||
|
setF0Detector,
|
||||||
|
setRecordIO,
|
||||||
reloadServerInfo,
|
reloadServerInfo,
|
||||||
setFileUploadSetting,
|
setFileUploadSetting,
|
||||||
loadModel,
|
loadModel,
|
||||||
|
@ -16,6 +16,9 @@ NATIVE_CLIENT_FILE_MAC = os.path.join(sys._MEIPASS, "voice-changer-native-client
|
|||||||
"voice-changer-native-client") if hasattr(sys, "_MEIPASS") else "voice-changer-native-client"
|
"voice-changer-native-client") if hasattr(sys, "_MEIPASS") else "voice-changer-native-client"
|
||||||
|
|
||||||
|
|
||||||
|
TMP_DIR = os.path.join(tmpdir.name, "tmp_dir") if hasattr(sys, "_MEIPASS") else "tmp_dir"
|
||||||
|
os.makedirs(TMP_DIR, exist_ok=True)
|
||||||
|
|
||||||
# SSL_KEY_DIR = os.path.join(sys._MEIPASS, "keys") if hasattr(sys, "_MEIPASS") else "keys"
|
# SSL_KEY_DIR = os.path.join(sys._MEIPASS, "keys") if hasattr(sys, "_MEIPASS") else "keys"
|
||||||
# MODEL_DIR = os.path.join(sys._MEIPASS, "logs") if hasattr(sys, "_MEIPASS") else "logs"
|
# MODEL_DIR = os.path.join(sys._MEIPASS, "logs") if hasattr(sys, "_MEIPASS") else "logs"
|
||||||
# UPLOAD_DIR = os.path.join(sys._MEIPASS, "upload_dir") if hasattr(sys, "_MEIPASS") else "upload_dir"
|
# UPLOAD_DIR = os.path.join(sys._MEIPASS, "upload_dir") if hasattr(sys, "_MEIPASS") else "upload_dir"
|
||||||
|
@ -9,7 +9,7 @@ from restapi.MMVC_Rest_Hello import MMVC_Rest_Hello
|
|||||||
from restapi.MMVC_Rest_VoiceChanger import MMVC_Rest_VoiceChanger
|
from restapi.MMVC_Rest_VoiceChanger import MMVC_Rest_VoiceChanger
|
||||||
from restapi.MMVC_Rest_Fileuploader import MMVC_Rest_Fileuploader
|
from restapi.MMVC_Rest_Fileuploader import MMVC_Rest_Fileuploader
|
||||||
from restapi.MMVC_Rest_Trainer import MMVC_Rest_Trainer
|
from restapi.MMVC_Rest_Trainer import MMVC_Rest_Trainer
|
||||||
from const import frontend_path
|
from const import frontend_path, TMP_DIR
|
||||||
|
|
||||||
|
|
||||||
class ValidationErrorLoggingRoute(APIRoute):
|
class ValidationErrorLoggingRoute(APIRoute):
|
||||||
@ -27,6 +27,7 @@ class ValidationErrorLoggingRoute(APIRoute):
|
|||||||
|
|
||||||
return custom_route_handler
|
return custom_route_handler
|
||||||
|
|
||||||
|
|
||||||
class MMVC_Rest:
|
class MMVC_Rest:
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -50,6 +51,8 @@ class MMVC_Rest:
|
|||||||
|
|
||||||
app_fastapi.mount(
|
app_fastapi.mount(
|
||||||
"/recorder", StaticFiles(directory=f'{frontend_path}', html=True), name="static")
|
"/recorder", StaticFiles(directory=f'{frontend_path}', html=True), name="static")
|
||||||
|
app_fastapi.mount(
|
||||||
|
"/tmp", StaticFiles(directory=f'{TMP_DIR}'), name="static")
|
||||||
|
|
||||||
restHello = MMVC_Rest_Hello()
|
restHello = MMVC_Rest_Hello()
|
||||||
app_fastapi.include_router(restHello.router)
|
app_fastapi.include_router(restHello.router)
|
||||||
|
@ -4,6 +4,7 @@ import numpy as np
|
|||||||
import socketio
|
import socketio
|
||||||
from voice_changer.VoiceChangerManager import VoiceChangerManager
|
from voice_changer.VoiceChangerManager import VoiceChangerManager
|
||||||
|
|
||||||
|
|
||||||
class MMVC_Namespace(socketio.AsyncNamespace):
|
class MMVC_Namespace(socketio.AsyncNamespace):
|
||||||
def __init__(self, namespace: str, voiceChangerManager: VoiceChangerManager):
|
def __init__(self, namespace: str, voiceChangerManager: VoiceChangerManager):
|
||||||
super().__init__(namespace)
|
super().__init__(namespace)
|
||||||
@ -36,4 +37,3 @@ class MMVC_Namespace(socketio.AsyncNamespace):
|
|||||||
def on_disconnect(self, sid):
|
def on_disconnect(self, sid):
|
||||||
# print('[{}] disconnect'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
|
# print('[{}] disconnect'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
2
server/tmp_dir/.gitignore
vendored
Normal file
2
server/tmp_dir/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
*
|
||||||
|
!.gitignore
|
@ -1,4 +1,4 @@
|
|||||||
from const import ERROR_NO_ONNX_SESSION
|
from const import ERROR_NO_ONNX_SESSION, TMP_DIR
|
||||||
import torch
|
import torch
|
||||||
import os
|
import os
|
||||||
import traceback
|
import traceback
|
||||||
@ -84,15 +84,17 @@ class VocieChangerSettings():
|
|||||||
minConvertSize: int = 0
|
minConvertSize: int = 0
|
||||||
framework: str = "PyTorch" # PyTorch or ONNX
|
framework: str = "PyTorch" # PyTorch or ONNX
|
||||||
f0Factor: float = 1.0
|
f0Factor: float = 1.0
|
||||||
|
f0Detector: str = "dio" # dio or harvest
|
||||||
|
recordIO: int = 1 # 0:off, 1:on
|
||||||
|
|
||||||
pyTorchModelFile: str = ""
|
pyTorchModelFile: str = ""
|
||||||
onnxModelFile: str = ""
|
onnxModelFile: str = ""
|
||||||
configFile: str = ""
|
configFile: str = ""
|
||||||
|
|
||||||
# ↓mutableな物だけ列挙
|
# ↓mutableな物だけ列挙
|
||||||
intData = ["gpu", "srcId", "dstId", "convertChunkNum", "minConvertSize"]
|
intData = ["gpu", "srcId", "dstId", "convertChunkNum", "minConvertSize", "recordIO"]
|
||||||
floatData = ["crossFadeOffsetRate", "crossFadeEndRate", "crossFadeOverlapRate", "f0Factor"]
|
floatData = ["crossFadeOffsetRate", "crossFadeEndRate", "crossFadeOverlapRate", "f0Factor"]
|
||||||
strData = ["framework"]
|
strData = ["framework", "f0Detector"]
|
||||||
|
|
||||||
|
|
||||||
class VoiceChanger():
|
class VoiceChanger():
|
||||||
@ -113,15 +115,26 @@ class VoiceChanger():
|
|||||||
self.prev_audio = np.zeros(1)
|
self.prev_audio = np.zeros(1)
|
||||||
self.mps_enabled = getattr(torch.backends, "mps", None) is not None and torch.backends.mps.is_available()
|
self.mps_enabled = getattr(torch.backends, "mps", None) is not None and torch.backends.mps.is_available()
|
||||||
|
|
||||||
mock_stream = MockStream(24000)
|
self._setupRecordIO()
|
||||||
mock_stream.open_outputfile("out.wav")
|
|
||||||
self.out = mock_stream
|
|
||||||
mock_stream_in = MockStream(24000)
|
|
||||||
mock_stream_in.open_outputfile("in.wav")
|
|
||||||
self.stream_in = mock_stream_in
|
|
||||||
|
|
||||||
print(f"VoiceChanger Initialized (GPU_NUM:{self.gpu_num}, mps_enabled:{self.mps_enabled})")
|
print(f"VoiceChanger Initialized (GPU_NUM:{self.gpu_num}, mps_enabled:{self.mps_enabled})")
|
||||||
|
|
||||||
|
def _setupRecordIO(self):
|
||||||
|
# IO Recorder Setup
|
||||||
|
mock_stream_out = MockStream(24000)
|
||||||
|
stream_output_file = os.path.join(TMP_DIR, "out.wav")
|
||||||
|
if os.path.exists(stream_output_file):
|
||||||
|
os.unlink(stream_output_file)
|
||||||
|
mock_stream_out.open_outputfile(stream_output_file)
|
||||||
|
self.stream_out = mock_stream_out
|
||||||
|
|
||||||
|
mock_stream_in = MockStream(24000)
|
||||||
|
stream_input_file = os.path.join(TMP_DIR, "in.wav")
|
||||||
|
if os.path.exists(stream_input_file):
|
||||||
|
os.unlink(stream_input_file)
|
||||||
|
mock_stream_in.open_outputfile(stream_input_file)
|
||||||
|
self.stream_in = mock_stream_in
|
||||||
|
|
||||||
def loadModel(self, config: str, pyTorch_model_file: str = None, onnx_model_file: str = None):
|
def loadModel(self, config: str, pyTorch_model_file: str = None, onnx_model_file: str = None):
|
||||||
self.settings.configFile = config
|
self.settings.configFile = config
|
||||||
self.hps = get_hparams_from_file(config)
|
self.hps = get_hparams_from_file(config)
|
||||||
@ -200,6 +213,8 @@ class VoiceChanger():
|
|||||||
self.onnx_session.set_providers(providers=["CUDAExecutionProvider"], provider_options=provider_options)
|
self.onnx_session.set_providers(providers=["CUDAExecutionProvider"], provider_options=provider_options)
|
||||||
if key == "crossFadeOffsetRate" or key == "crossFadeEndRate":
|
if key == "crossFadeOffsetRate" or key == "crossFadeEndRate":
|
||||||
self.unpackedData_length = 0
|
self.unpackedData_length = 0
|
||||||
|
if key == "recordIO" and val == 1:
|
||||||
|
self._setupRecordIO()
|
||||||
elif key in self.settings.floatData:
|
elif key in self.settings.floatData:
|
||||||
setattr(self.settings, key, float(val))
|
setattr(self.settings, key, float(val))
|
||||||
elif key in self.settings.strData:
|
elif key in self.settings.strData:
|
||||||
@ -256,10 +271,11 @@ class VoiceChanger():
|
|||||||
|
|
||||||
# TBD: numpy <--> pytorch変換が行ったり来たりしているが、まずは動かすことを最優先。
|
# TBD: numpy <--> pytorch変換が行ったり来たりしているが、まずは動かすことを最優先。
|
||||||
audio_norm_np = audio_norm.squeeze().numpy().astype(np.float64)
|
audio_norm_np = audio_norm.squeeze().numpy().astype(np.float64)
|
||||||
|
if self.settings.f0Detector == "dio":
|
||||||
_f0, _time = pw.dio(audio_norm_np, self.hps.data.sampling_rate, frame_period=5.5)
|
_f0, _time = pw.dio(audio_norm_np, self.hps.data.sampling_rate, frame_period=5.5)
|
||||||
f0 = pw.stonemask(audio_norm_np, _f0, _time, self.hps.data.sampling_rate)
|
f0 = pw.stonemask(audio_norm_np, _f0, _time, self.hps.data.sampling_rate)
|
||||||
# print("type:", audio_norm_np.dtype)
|
else:
|
||||||
# f0, t = pw.harvest(audio_norm_np, self.hps.data.sampling_rate, frame_period=5.5, f0_floor=71.0, f0_ceil=1000.0)
|
f0, t = pw.harvest(audio_norm_np, self.hps.data.sampling_rate, frame_period=5.5, f0_floor=71.0, f0_ceil=1000.0)
|
||||||
f0 = convert_continuos_f0(f0, int(audio_norm_np.shape[0] / self.hps.data.hop_length))
|
f0 = convert_continuos_f0(f0, int(audio_norm_np.shape[0] / self.hps.data.hop_length))
|
||||||
f0 = torch.from_numpy(f0.astype(np.float32))
|
f0 = torch.from_numpy(f0.astype(np.float32))
|
||||||
|
|
||||||
@ -280,7 +296,7 @@ class VoiceChanger():
|
|||||||
f0_factor=self.settings.f0Factor
|
f0_factor=self.settings.f0Factor
|
||||||
)([(spec, sid, f0)])
|
)([(spec, sid, f0)])
|
||||||
|
|
||||||
return data
|
return data, f0.numpy()
|
||||||
|
|
||||||
def _onnx_inference(self, data, inputSize):
|
def _onnx_inference(self, data, inputSize):
|
||||||
if hasattr(self, "onnx_session") == False or self.onnx_session == None:
|
if hasattr(self, "onnx_session") == False or self.onnx_session == None:
|
||||||
@ -401,7 +417,6 @@ class VoiceChanger():
|
|||||||
|
|
||||||
def on_request(self, unpackedData: any):
|
def on_request(self, unpackedData: any):
|
||||||
convertSize = self.settings.convertChunkNum * 128 # 128sample/1chunk
|
convertSize = self.settings.convertChunkNum * 128 # 128sample/1chunk
|
||||||
self.stream_in.write(unpackedData.astype(np.int16).tobytes())
|
|
||||||
# print("convsize:", unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate))
|
# print("convsize:", unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate))
|
||||||
if unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate) + 1024 > convertSize:
|
if unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate) + 1024 > convertSize:
|
||||||
convertSize = int(unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate)) + 1024
|
convertSize = int(unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate)) + 1024
|
||||||
@ -412,7 +427,8 @@ class VoiceChanger():
|
|||||||
# convertSize = 8192
|
# convertSize = 8192
|
||||||
|
|
||||||
self._generate_strength(unpackedData)
|
self._generate_strength(unpackedData)
|
||||||
data = self._generate_input(unpackedData, convertSize)
|
# f0はデバッグ用
|
||||||
|
data, f0 = self._generate_input(unpackedData, convertSize)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if self.settings.framework == "ONNX":
|
if self.settings.framework == "ONNX":
|
||||||
@ -431,6 +447,9 @@ class VoiceChanger():
|
|||||||
|
|
||||||
result = result.astype(np.int16)
|
result = result.astype(np.int16)
|
||||||
# print("on_request result size:",result.shape)
|
# print("on_request result size:",result.shape)
|
||||||
|
if self.settings.recordIO == 1:
|
||||||
|
self.stream_in.write(unpackedData.astype(np.int16).tobytes())
|
||||||
|
self.stream_out.write(result.tobytes())
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user