WIP: support v1.5.x, add recording output function 3

This commit is contained in:
wataru 2023-02-12 17:07:28 +09:00
parent 305dac7e38
commit affc55cb24
10 changed files with 175 additions and 29 deletions

5
.gitignore vendored
View File

@ -7,6 +7,10 @@ server/MMVC_Trainer/
server/MMVC_Client/
server/keys
server/info
server/in.wav
server/out.wav
server/G_*.pth
server/train_config.json
server/memo.md
@ -15,3 +19,4 @@ client/lib/worklet/dist
# client/demo/dist/ # demo用に残す
docker/cudnn/

File diff suppressed because one or more lines are too long

View File

@ -29,13 +29,13 @@ const reloadDevices = async () => {
toJSON: () => { }
})
const audioOutputs = mediaDeviceInfos.filter(x => { return x.kind == "audiooutput" })
audioOutputs.push({
deviceId: "record",
groupId: "record",
kind: "audiooutput",
label: "record",
toJSON: () => { }
})
// audioOutputs.push({
// deviceId: "record",
// groupId: "record",
// kind: "audiooutput",
// label: "record",
// toJSON: () => { }
// })
return [audioInputs, audioOutputs]
}
export type UseDeviceSettingProps = {
@ -57,6 +57,8 @@ export const useDeviceSetting = (audioContext: AudioContext | null, props: UseDe
const audioSrcNode = useRef<MediaElementAudioSourceNode>()
const [outputRecordingStarted, setOutputRecordingStarted] = useState<boolean>(false)
useEffect(() => {
const initialize = async () => {
const audioInfo = await reloadDevices()
@ -195,6 +197,36 @@ export const useDeviceSetting = (audioContext: AudioContext | null, props: UseDe
)
}, [outputAudioDeviceInfo, audioOutputForGUI])
const audioOutputRecordingRow = useMemo(() => {
// if (audioOutputForGUI != "record") {
// return <></>
// }
const onOutputRecordStartClicked = async () => {
setOutputRecordingStarted(true)
await props.clientState.workletSetting.startOutputRecording()
}
const onOutputRecordStopClicked = async () => {
setOutputRecordingStarted(false)
await props.clientState.workletSetting.stopOutputRecording()
}
const startClassName = outputRecordingStarted ? "body-button-active" : "body-button-stanby"
const stopClassName = outputRecordingStarted ? "body-button-stanby" : "body-button-active"
return (
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title left-padding-2">output record</div>
<div className="body-button-container">
<div onClick={onOutputRecordStartClicked} className={startClassName}>start</div>
<div onClick={onOutputRecordStopClicked} className={stopClassName}>stop</div>
</div>
<div className="body-input-container">
</div>
</div>
)
}, [audioOutputForGUI, outputRecordingStarted, props.clientState.workletSetting.startOutputRecording, props.clientState.workletSetting.stopOutputRecording])
useEffect(() => {
[AUDIO_ELEMENT_FOR_PLAY_RESULT, AUDIO_ELEMENT_FOR_TEST_ORIGINAL, AUDIO_ELEMENT_FOR_TEST_CONVERTED_ECHOBACK].forEach(x => {
const audio = document.getElementById(x) as HTMLAudioElement
@ -204,17 +236,23 @@ export const useDeviceSetting = (audioContext: AudioContext | null, props: UseDe
audio.setSinkId("")
if (x == AUDIO_ELEMENT_FOR_TEST_CONVERTED_ECHOBACK) {
audio.volume = fileInputEchoback ? 1 : 0
} else {
audio.volume = 1
}
} else if (audioOutputForGUI == "record") {
audio.volume = 0
} else {
// @ts-ignore
audio.setSinkId(audioOutputForGUI)
if (x == AUDIO_ELEMENT_FOR_TEST_CONVERTED_ECHOBACK) {
audio.volume = fileInputEchoback ? 1 : 0
} else {
audio.volume = 1
}
}
}
})
}, [audioOutputForGUI, audioInputForGUI])
}, [audioOutputForGUI])
useEffect(() => {
@ -248,9 +286,75 @@ export const useDeviceSetting = (audioContext: AudioContext | null, props: UseDe
{audioInputRow}
{audioMediaInputRow}
{audioOutputRow}
{audioOutputRecordingRow}
</>
)
}, [audioInputRow, audioMediaInputRow, audioOutputRow])
}, [audioInputRow, audioMediaInputRow, audioOutputRow, audioOutputRecordingRow])
// 出力の録音データ(from worklet)がストアされたら実行
useEffect(() => {
if (!props.clientState.outputRecordData || props.clientState.outputRecordData?.length == 0) {
return
}
const f32Datas = props.clientState.outputRecordData
const sampleSize = f32Datas.reduce((prev, cur) => {
return prev + cur.length
}, 0)
const samples = new Float32Array(sampleSize);
let sampleIndex = 0
for (let i = 0; i < f32Datas.length; i++) {
for (let j = 0; j < f32Datas[i].length; j++) {
samples[sampleIndex] = f32Datas[i][j];
sampleIndex++;
}
}
const writeString = (view: DataView, offset: number, string: string) => {
for (var i = 0; i < string.length; i++) {
view.setUint8(offset + i, string.charCodeAt(i));
}
};
const floatTo16BitPCM = (output: DataView, offset: number, input: Float32Array) => {
for (var i = 0; i < input.length; i++, offset += 2) {
var s = Math.max(-1, Math.min(1, input[i]));
output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
}
};
const buffer = new ArrayBuffer(44 + samples.length * 2);
const view = new DataView(buffer);
// https://www.youfit.co.jp/archives/1418
writeString(view, 0, 'RIFF'); // RIFFヘッダ
view.setUint32(4, 32 + samples.length * 2, true); // これ以降のファイルサイズ
writeString(view, 8, 'WAVE'); // WAVEヘッダ
writeString(view, 12, 'fmt '); // fmtチャンク
view.setUint32(16, 16, true); // fmtチャンクのバイト数
view.setUint16(20, 1, true); // フォーマットID
view.setUint16(22, 1, true); // チャンネル数
view.setUint32(24, 48000, true); // サンプリングレート
view.setUint32(28, 48000 * 2, true); // データ速度
view.setUint16(32, 2, true); // ブロックサイズ
view.setUint16(34, 16, true); // サンプルあたりのビット数
writeString(view, 36, 'data'); // dataチャンク
view.setUint32(40, samples.length * 2, true); // 波形データのバイト数
floatTo16BitPCM(view, 44, samples); // 波形データ
const audioBlob = new Blob([view], { type: 'audio/wav' });
const url = URL.createObjectURL(audioBlob);
const a = document.createElement("a");
a.href = url;
a.download = `output.wav`;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
}, [props.clientState.outputRecordData])
return {
deviceSetting,

View File

@ -214,7 +214,8 @@ export const useAdvancedSetting = (props: UseAdvancedSettingProps): AdvancedSett
</div>
</div>
<div className="body-row split-3-7 left-padding-1 guided">
{/* v.1.5.xより Silent skipは廃止 */}
{/* <div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Trancate Vol</div>
<div className="body-input-container">
<input type="number" min={0.0001} max={0.0009} step={0.0001} value={props.clientState.workletSetting.setting.volTrancateThreshold} onChange={(e) => {
@ -235,7 +236,7 @@ export const useAdvancedSetting = (props: UseAdvancedSettingProps): AdvancedSett
})
}} />
</div>
</div>
</div> */}
</>
)
}, [props.clientState.workletSetting.setting, props.clientState.workletSetting.setSetting])

View File

@ -1,4 +1,4 @@
import { VoiceChangerWorkletNode, VolumeListener } from "./VoiceChangerWorkletNode";
import { VoiceChangerWorkletNode, VoiceChangerWorkletListener } from "./VoiceChangerWorkletNode";
// @ts-ignore
import workerjs from "raw-loader!../worklet/dist/index.js";
import { VoiceFocusDeviceTransformer, VoiceFocusTransformDevice } from "amazon-chime-sdk-js";
@ -65,7 +65,7 @@ export class VoiceChangerClient {
}
}
constructor(ctx: AudioContext, vfEnable: boolean, audioStreamerListeners: AudioStreamerListeners, volumeListener: VolumeListener) {
constructor(ctx: AudioContext, vfEnable: boolean, audioStreamerListeners: AudioStreamerListeners, voiceChangerWorkletListener: VoiceChangerWorkletListener) {
this.sem.enqueue(0);
this.configurator = new ServerConfigurator()
this.ctx = ctx
@ -74,7 +74,7 @@ export class VoiceChangerClient {
const scriptUrl = URL.createObjectURL(new Blob([workerjs], { type: "text/javascript" }));
await this.ctx.audioWorklet.addModule(scriptUrl)
this.vcNode = new VoiceChangerWorkletNode(this.ctx, volumeListener); // vc node
this.vcNode = new VoiceChangerWorkletNode(this.ctx, voiceChangerWorkletListener); // vc node
this.currentMediaStreamAudioDestinationNode = this.ctx.createMediaStreamDestination() // output node
this.vcNode.connect(this.currentMediaStreamAudioDestinationNode) // vc node -> output node
// (vc nodeにはaudio streamerのcallbackでデータが投げ込まれる)
@ -125,7 +125,11 @@ export class VoiceChangerClient {
}
if (typeof input == "string") {
this.currentMediaStream = await navigator.mediaDevices.getUserMedia({
audio: { deviceId: input }
audio: {
deviceId: input,
// echoCancellation: false,
// noiseSuppression: false
}
})
} else {
this.currentMediaStream = input
@ -227,10 +231,10 @@ export class VoiceChangerClient {
this.vcNode.configure(setting)
}
startOutputRecordingWorklet = () => {
this.vcNode.startOutputRecordingWorklet()
}
stopOutputRecordingWorklet = () => {
this.vcNode.stopOutputRecordingWorklet()
}

View File

@ -1,13 +1,14 @@
import { VoiceChangerWorkletProcessorRequest } from "./@types/voice-changer-worklet-processor";
import { WorkletSetting } from "./const";
export type VolumeListener = {
export type VoiceChangerWorkletListener = {
notifyVolume: (vol: number) => void
notifyOutputRecordData: (data: Float32Array[]) => void
}
export class VoiceChangerWorkletNode extends AudioWorkletNode {
private listener: VolumeListener
constructor(context: AudioContext, listener: VolumeListener) {
private listener: VoiceChangerWorkletListener
constructor(context: AudioContext, listener: VoiceChangerWorkletListener) {
super(context, "voice-changer-worklet-processor");
this.port.onmessage = this.handleMessage.bind(this);
this.listener = listener
@ -30,7 +31,7 @@ export class VoiceChangerWorkletNode extends AudioWorkletNode {
if (event.data.responseType === "volume") {
this.listener.notifyVolume(event.data.volume as number)
} else if (event.data.responseType === "recordData") {
this.listener.notifyOutputRecordData(event.data.recordData as Float32Array[])
} else {
console.warn(`[worklet_node][voice-changer-worklet-processor] unknown response ${event.data.responseType}`, event.data)
}

View File

@ -17,6 +17,7 @@ export type ClientState = {
bufferingTime: number;
responseTime: number;
volume: number;
outputRecordData: Float32Array[] | null;
getInfo: () => Promise<void>
clearSetting: () => Promise<void>
@ -47,6 +48,7 @@ export const useClient = (props: UseClientProps): ClientState => {
const [bufferingTime, setBufferingTime] = useState<number>(0)
const [responseTime, setResponseTime] = useState<number>(0)
const [volume, setVolume] = useState<number>(0)
const [outputRecordData, setOutputRecordData] = useState<Float32Array[] | null>(null)
// (1-4) エラーステータス
@ -79,6 +81,9 @@ export const useClient = (props: UseClientProps): ClientState => {
}, {
notifyVolume: (vol: number) => {
setVolume(vol)
},
notifyOutputRecordData: (data: Float32Array[]) => {
setOutputRecordData(data)
}
})
@ -116,6 +121,7 @@ export const useClient = (props: UseClientProps): ClientState => {
bufferingTime,
responseTime,
volume,
outputRecordData,
getInfo,

View File

@ -11,6 +11,8 @@ export type WorkletSettingState = {
setting: WorkletSetting;
clearSetting: () => Promise<void>
setSetting: (setting: WorkletSetting) => void;
startOutputRecording: () => void
stopOutputRecording: () => Promise<void>
}
export const useWorkletSetting = (props: UseWorkletSettingProps): WorkletSettingState => {
@ -64,9 +66,27 @@ export const useWorkletSetting = (props: UseWorkletSettingProps): WorkletSetting
const clearSetting = async () => {
await removeItem(INDEXEDDB_KEY_WORKLET)
}
const startOutputRecording = useMemo(() => {
return () => {
if (!props.voiceChangerClient) return
props.voiceChangerClient.startOutputRecordingWorklet()
}
}, [props.voiceChangerClient])
const stopOutputRecording = useMemo(() => {
return async () => {
if (!props.voiceChangerClient) return
props.voiceChangerClient.stopOutputRecordingWorklet()
}
}, [props.voiceChangerClient])
return {
setting,
clearSetting,
setSetting
setSetting,
startOutputRecording,
stopOutputRecording
}
}

View File

@ -100,6 +100,8 @@ class VoiceChangerWorkletProcessor extends AudioWorkletProcessor {
const float = (x >= 0x8000) ? -(0x10000 - x) / 0x8000 : x / 0x7FFF;
f32Data[i] = float
})
// console.log("[worklet] i16Data", i16Data)
// console.log("[worklet] f32Data", f32Data)
if (this.playBuffer.length > this.numTrancateTreshold) {
console.log("[worklet] Buffer truncated")
@ -156,9 +158,11 @@ class VoiceChangerWorkletProcessor extends AudioWorkletProcessor {
}
// V.1.5.0よりsilent skipで音飛びするようになったので無効化
if (this.volTrancateCount < this.volTrancateLength || this.volTrancateLength < 0) {
break
} else {
break
// console.log("silent...skip")
}
}

View File

@ -399,9 +399,9 @@ class VoiceChanger():
result = result.cpu().float().numpy()
return result
def on_request_(self, unpackedData: any):
def on_request(self, unpackedData: any):
convertSize = self.settings.convertChunkNum * 128 # 128sample/1chunk
self.stream_in.write(unpackedData.astype(np.int16).tobytes())
# self.stream_in.write(unpackedData.astype(np.int16).tobytes())
# print("convsize:", unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate))
if unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate) + 1024 > convertSize:
convertSize = int(unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate)) + 1024
@ -464,7 +464,7 @@ class VoiceChanger():
# signal = signal.astype(np.int16).tobytes()
return signal
def on_request(self, unpackedData: any):
def on_request_(self, unpackedData: any):
self._generate_strength(unpackedData)
@ -546,5 +546,6 @@ class VoiceChanger():
self.prev_audio = audio
self.out.write(audio)
self.stream_in.write(unpackedData.tobytes())
# print(audio1)
return audio1