WIP: support v1.5.x, add recording output function 3

This commit is contained in:
wataru 2023-02-12 17:07:28 +09:00
parent 305dac7e38
commit affc55cb24
10 changed files with 175 additions and 29 deletions

5
.gitignore vendored
View File

@ -7,6 +7,10 @@ server/MMVC_Trainer/
server/MMVC_Client/ server/MMVC_Client/
server/keys server/keys
server/info server/info
server/in.wav
server/out.wav
server/G_*.pth
server/train_config.json
server/memo.md server/memo.md
@ -15,3 +19,4 @@ client/lib/worklet/dist
# client/demo/dist/ # demo用に残す # client/demo/dist/ # demo用に残す
docker/cudnn/ docker/cudnn/

File diff suppressed because one or more lines are too long

View File

@ -29,13 +29,13 @@ const reloadDevices = async () => {
toJSON: () => { } toJSON: () => { }
}) })
const audioOutputs = mediaDeviceInfos.filter(x => { return x.kind == "audiooutput" }) const audioOutputs = mediaDeviceInfos.filter(x => { return x.kind == "audiooutput" })
audioOutputs.push({ // audioOutputs.push({
deviceId: "record", // deviceId: "record",
groupId: "record", // groupId: "record",
kind: "audiooutput", // kind: "audiooutput",
label: "record", // label: "record",
toJSON: () => { } // toJSON: () => { }
}) // })
return [audioInputs, audioOutputs] return [audioInputs, audioOutputs]
} }
export type UseDeviceSettingProps = { export type UseDeviceSettingProps = {
@ -57,6 +57,8 @@ export const useDeviceSetting = (audioContext: AudioContext | null, props: UseDe
const audioSrcNode = useRef<MediaElementAudioSourceNode>() const audioSrcNode = useRef<MediaElementAudioSourceNode>()
const [outputRecordingStarted, setOutputRecordingStarted] = useState<boolean>(false)
useEffect(() => { useEffect(() => {
const initialize = async () => { const initialize = async () => {
const audioInfo = await reloadDevices() const audioInfo = await reloadDevices()
@ -195,6 +197,36 @@ export const useDeviceSetting = (audioContext: AudioContext | null, props: UseDe
) )
}, [outputAudioDeviceInfo, audioOutputForGUI]) }, [outputAudioDeviceInfo, audioOutputForGUI])
const audioOutputRecordingRow = useMemo(() => {
// if (audioOutputForGUI != "record") {
// return <></>
// }
const onOutputRecordStartClicked = async () => {
setOutputRecordingStarted(true)
await props.clientState.workletSetting.startOutputRecording()
}
const onOutputRecordStopClicked = async () => {
setOutputRecordingStarted(false)
await props.clientState.workletSetting.stopOutputRecording()
}
const startClassName = outputRecordingStarted ? "body-button-active" : "body-button-stanby"
const stopClassName = outputRecordingStarted ? "body-button-stanby" : "body-button-active"
return (
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title left-padding-2">output record</div>
<div className="body-button-container">
<div onClick={onOutputRecordStartClicked} className={startClassName}>start</div>
<div onClick={onOutputRecordStopClicked} className={stopClassName}>stop</div>
</div>
<div className="body-input-container">
</div>
</div>
)
}, [audioOutputForGUI, outputRecordingStarted, props.clientState.workletSetting.startOutputRecording, props.clientState.workletSetting.stopOutputRecording])
useEffect(() => { useEffect(() => {
[AUDIO_ELEMENT_FOR_PLAY_RESULT, AUDIO_ELEMENT_FOR_TEST_ORIGINAL, AUDIO_ELEMENT_FOR_TEST_CONVERTED_ECHOBACK].forEach(x => { [AUDIO_ELEMENT_FOR_PLAY_RESULT, AUDIO_ELEMENT_FOR_TEST_ORIGINAL, AUDIO_ELEMENT_FOR_TEST_CONVERTED_ECHOBACK].forEach(x => {
const audio = document.getElementById(x) as HTMLAudioElement const audio = document.getElementById(x) as HTMLAudioElement
@ -204,17 +236,23 @@ export const useDeviceSetting = (audioContext: AudioContext | null, props: UseDe
audio.setSinkId("") audio.setSinkId("")
if (x == AUDIO_ELEMENT_FOR_TEST_CONVERTED_ECHOBACK) { if (x == AUDIO_ELEMENT_FOR_TEST_CONVERTED_ECHOBACK) {
audio.volume = fileInputEchoback ? 1 : 0 audio.volume = fileInputEchoback ? 1 : 0
} else {
audio.volume = 1
} }
} else if (audioOutputForGUI == "record") {
audio.volume = 0
} else { } else {
// @ts-ignore // @ts-ignore
audio.setSinkId(audioOutputForGUI) audio.setSinkId(audioOutputForGUI)
if (x == AUDIO_ELEMENT_FOR_TEST_CONVERTED_ECHOBACK) { if (x == AUDIO_ELEMENT_FOR_TEST_CONVERTED_ECHOBACK) {
audio.volume = fileInputEchoback ? 1 : 0 audio.volume = fileInputEchoback ? 1 : 0
} else {
audio.volume = 1
} }
} }
} }
}) })
}, [audioOutputForGUI, audioInputForGUI]) }, [audioOutputForGUI])
useEffect(() => { useEffect(() => {
@ -248,9 +286,75 @@ export const useDeviceSetting = (audioContext: AudioContext | null, props: UseDe
{audioInputRow} {audioInputRow}
{audioMediaInputRow} {audioMediaInputRow}
{audioOutputRow} {audioOutputRow}
{audioOutputRecordingRow}
</> </>
) )
}, [audioInputRow, audioMediaInputRow, audioOutputRow]) }, [audioInputRow, audioMediaInputRow, audioOutputRow, audioOutputRecordingRow])
// 出力の録音データ(from worklet)がストアされたら実行
useEffect(() => {
if (!props.clientState.outputRecordData || props.clientState.outputRecordData?.length == 0) {
return
}
const f32Datas = props.clientState.outputRecordData
const sampleSize = f32Datas.reduce((prev, cur) => {
return prev + cur.length
}, 0)
const samples = new Float32Array(sampleSize);
let sampleIndex = 0
for (let i = 0; i < f32Datas.length; i++) {
for (let j = 0; j < f32Datas[i].length; j++) {
samples[sampleIndex] = f32Datas[i][j];
sampleIndex++;
}
}
const writeString = (view: DataView, offset: number, string: string) => {
for (var i = 0; i < string.length; i++) {
view.setUint8(offset + i, string.charCodeAt(i));
}
};
const floatTo16BitPCM = (output: DataView, offset: number, input: Float32Array) => {
for (var i = 0; i < input.length; i++, offset += 2) {
var s = Math.max(-1, Math.min(1, input[i]));
output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
}
};
const buffer = new ArrayBuffer(44 + samples.length * 2);
const view = new DataView(buffer);
// https://www.youfit.co.jp/archives/1418
writeString(view, 0, 'RIFF'); // RIFFヘッダ
view.setUint32(4, 32 + samples.length * 2, true); // これ以降のファイルサイズ
writeString(view, 8, 'WAVE'); // WAVEヘッダ
writeString(view, 12, 'fmt '); // fmtチャンク
view.setUint32(16, 16, true); // fmtチャンクのバイト数
view.setUint16(20, 1, true); // フォーマットID
view.setUint16(22, 1, true); // チャンネル数
view.setUint32(24, 48000, true); // サンプリングレート
view.setUint32(28, 48000 * 2, true); // データ速度
view.setUint16(32, 2, true); // ブロックサイズ
view.setUint16(34, 16, true); // サンプルあたりのビット数
writeString(view, 36, 'data'); // dataチャンク
view.setUint32(40, samples.length * 2, true); // 波形データのバイト数
floatTo16BitPCM(view, 44, samples); // 波形データ
const audioBlob = new Blob([view], { type: 'audio/wav' });
const url = URL.createObjectURL(audioBlob);
const a = document.createElement("a");
a.href = url;
a.download = `output.wav`;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
}, [props.clientState.outputRecordData])
return { return {
deviceSetting, deviceSetting,

View File

@ -214,7 +214,8 @@ export const useAdvancedSetting = (props: UseAdvancedSettingProps): AdvancedSett
</div> </div>
</div> </div>
<div className="body-row split-3-7 left-padding-1 guided"> {/* v.1.5.xより Silent skipは廃止 */}
{/* <div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Trancate Vol</div> <div className="body-item-title left-padding-1">Trancate Vol</div>
<div className="body-input-container"> <div className="body-input-container">
<input type="number" min={0.0001} max={0.0009} step={0.0001} value={props.clientState.workletSetting.setting.volTrancateThreshold} onChange={(e) => { <input type="number" min={0.0001} max={0.0009} step={0.0001} value={props.clientState.workletSetting.setting.volTrancateThreshold} onChange={(e) => {
@ -235,7 +236,7 @@ export const useAdvancedSetting = (props: UseAdvancedSettingProps): AdvancedSett
}) })
}} /> }} />
</div> </div>
</div> </div> */}
</> </>
) )
}, [props.clientState.workletSetting.setting, props.clientState.workletSetting.setSetting]) }, [props.clientState.workletSetting.setting, props.clientState.workletSetting.setSetting])

View File

@ -1,4 +1,4 @@
import { VoiceChangerWorkletNode, VolumeListener } from "./VoiceChangerWorkletNode"; import { VoiceChangerWorkletNode, VoiceChangerWorkletListener } from "./VoiceChangerWorkletNode";
// @ts-ignore // @ts-ignore
import workerjs from "raw-loader!../worklet/dist/index.js"; import workerjs from "raw-loader!../worklet/dist/index.js";
import { VoiceFocusDeviceTransformer, VoiceFocusTransformDevice } from "amazon-chime-sdk-js"; import { VoiceFocusDeviceTransformer, VoiceFocusTransformDevice } from "amazon-chime-sdk-js";
@ -65,7 +65,7 @@ export class VoiceChangerClient {
} }
} }
constructor(ctx: AudioContext, vfEnable: boolean, audioStreamerListeners: AudioStreamerListeners, volumeListener: VolumeListener) { constructor(ctx: AudioContext, vfEnable: boolean, audioStreamerListeners: AudioStreamerListeners, voiceChangerWorkletListener: VoiceChangerWorkletListener) {
this.sem.enqueue(0); this.sem.enqueue(0);
this.configurator = new ServerConfigurator() this.configurator = new ServerConfigurator()
this.ctx = ctx this.ctx = ctx
@ -74,7 +74,7 @@ export class VoiceChangerClient {
const scriptUrl = URL.createObjectURL(new Blob([workerjs], { type: "text/javascript" })); const scriptUrl = URL.createObjectURL(new Blob([workerjs], { type: "text/javascript" }));
await this.ctx.audioWorklet.addModule(scriptUrl) await this.ctx.audioWorklet.addModule(scriptUrl)
this.vcNode = new VoiceChangerWorkletNode(this.ctx, volumeListener); // vc node this.vcNode = new VoiceChangerWorkletNode(this.ctx, voiceChangerWorkletListener); // vc node
this.currentMediaStreamAudioDestinationNode = this.ctx.createMediaStreamDestination() // output node this.currentMediaStreamAudioDestinationNode = this.ctx.createMediaStreamDestination() // output node
this.vcNode.connect(this.currentMediaStreamAudioDestinationNode) // vc node -> output node this.vcNode.connect(this.currentMediaStreamAudioDestinationNode) // vc node -> output node
// (vc nodeにはaudio streamerのcallbackでデータが投げ込まれる) // (vc nodeにはaudio streamerのcallbackでデータが投げ込まれる)
@ -125,7 +125,11 @@ export class VoiceChangerClient {
} }
if (typeof input == "string") { if (typeof input == "string") {
this.currentMediaStream = await navigator.mediaDevices.getUserMedia({ this.currentMediaStream = await navigator.mediaDevices.getUserMedia({
audio: { deviceId: input } audio: {
deviceId: input,
// echoCancellation: false,
// noiseSuppression: false
}
}) })
} else { } else {
this.currentMediaStream = input this.currentMediaStream = input
@ -227,10 +231,10 @@ export class VoiceChangerClient {
this.vcNode.configure(setting) this.vcNode.configure(setting)
} }
startOutputRecordingWorklet = () => { startOutputRecordingWorklet = () => {
this.vcNode.startOutputRecordingWorklet()
} }
stopOutputRecordingWorklet = () => { stopOutputRecordingWorklet = () => {
this.vcNode.stopOutputRecordingWorklet()
} }

View File

@ -1,13 +1,14 @@
import { VoiceChangerWorkletProcessorRequest } from "./@types/voice-changer-worklet-processor"; import { VoiceChangerWorkletProcessorRequest } from "./@types/voice-changer-worklet-processor";
import { WorkletSetting } from "./const"; import { WorkletSetting } from "./const";
export type VolumeListener = { export type VoiceChangerWorkletListener = {
notifyVolume: (vol: number) => void notifyVolume: (vol: number) => void
notifyOutputRecordData: (data: Float32Array[]) => void
} }
export class VoiceChangerWorkletNode extends AudioWorkletNode { export class VoiceChangerWorkletNode extends AudioWorkletNode {
private listener: VolumeListener private listener: VoiceChangerWorkletListener
constructor(context: AudioContext, listener: VolumeListener) { constructor(context: AudioContext, listener: VoiceChangerWorkletListener) {
super(context, "voice-changer-worklet-processor"); super(context, "voice-changer-worklet-processor");
this.port.onmessage = this.handleMessage.bind(this); this.port.onmessage = this.handleMessage.bind(this);
this.listener = listener this.listener = listener
@ -30,7 +31,7 @@ export class VoiceChangerWorkletNode extends AudioWorkletNode {
if (event.data.responseType === "volume") { if (event.data.responseType === "volume") {
this.listener.notifyVolume(event.data.volume as number) this.listener.notifyVolume(event.data.volume as number)
} else if (event.data.responseType === "recordData") { } else if (event.data.responseType === "recordData") {
this.listener.notifyOutputRecordData(event.data.recordData as Float32Array[])
} else { } else {
console.warn(`[worklet_node][voice-changer-worklet-processor] unknown response ${event.data.responseType}`, event.data) console.warn(`[worklet_node][voice-changer-worklet-processor] unknown response ${event.data.responseType}`, event.data)
} }

View File

@ -17,6 +17,7 @@ export type ClientState = {
bufferingTime: number; bufferingTime: number;
responseTime: number; responseTime: number;
volume: number; volume: number;
outputRecordData: Float32Array[] | null;
getInfo: () => Promise<void> getInfo: () => Promise<void>
clearSetting: () => Promise<void> clearSetting: () => Promise<void>
@ -47,6 +48,7 @@ export const useClient = (props: UseClientProps): ClientState => {
const [bufferingTime, setBufferingTime] = useState<number>(0) const [bufferingTime, setBufferingTime] = useState<number>(0)
const [responseTime, setResponseTime] = useState<number>(0) const [responseTime, setResponseTime] = useState<number>(0)
const [volume, setVolume] = useState<number>(0) const [volume, setVolume] = useState<number>(0)
const [outputRecordData, setOutputRecordData] = useState<Float32Array[] | null>(null)
// (1-4) エラーステータス // (1-4) エラーステータス
@ -79,6 +81,9 @@ export const useClient = (props: UseClientProps): ClientState => {
}, { }, {
notifyVolume: (vol: number) => { notifyVolume: (vol: number) => {
setVolume(vol) setVolume(vol)
},
notifyOutputRecordData: (data: Float32Array[]) => {
setOutputRecordData(data)
} }
}) })
@ -116,6 +121,7 @@ export const useClient = (props: UseClientProps): ClientState => {
bufferingTime, bufferingTime,
responseTime, responseTime,
volume, volume,
outputRecordData,
getInfo, getInfo,

View File

@ -11,6 +11,8 @@ export type WorkletSettingState = {
setting: WorkletSetting; setting: WorkletSetting;
clearSetting: () => Promise<void> clearSetting: () => Promise<void>
setSetting: (setting: WorkletSetting) => void; setSetting: (setting: WorkletSetting) => void;
startOutputRecording: () => void
stopOutputRecording: () => Promise<void>
} }
export const useWorkletSetting = (props: UseWorkletSettingProps): WorkletSettingState => { export const useWorkletSetting = (props: UseWorkletSettingProps): WorkletSettingState => {
@ -64,9 +66,27 @@ export const useWorkletSetting = (props: UseWorkletSettingProps): WorkletSetting
const clearSetting = async () => { const clearSetting = async () => {
await removeItem(INDEXEDDB_KEY_WORKLET) await removeItem(INDEXEDDB_KEY_WORKLET)
} }
const startOutputRecording = useMemo(() => {
return () => {
if (!props.voiceChangerClient) return
props.voiceChangerClient.startOutputRecordingWorklet()
}
}, [props.voiceChangerClient])
const stopOutputRecording = useMemo(() => {
return async () => {
if (!props.voiceChangerClient) return
props.voiceChangerClient.stopOutputRecordingWorklet()
}
}, [props.voiceChangerClient])
return { return {
setting, setting,
clearSetting, clearSetting,
setSetting setSetting,
startOutputRecording,
stopOutputRecording
} }
} }

View File

@ -100,6 +100,8 @@ class VoiceChangerWorkletProcessor extends AudioWorkletProcessor {
const float = (x >= 0x8000) ? -(0x10000 - x) / 0x8000 : x / 0x7FFF; const float = (x >= 0x8000) ? -(0x10000 - x) / 0x8000 : x / 0x7FFF;
f32Data[i] = float f32Data[i] = float
}) })
// console.log("[worklet] i16Data", i16Data)
// console.log("[worklet] f32Data", f32Data)
if (this.playBuffer.length > this.numTrancateTreshold) { if (this.playBuffer.length > this.numTrancateTreshold) {
console.log("[worklet] Buffer truncated") console.log("[worklet] Buffer truncated")
@ -156,9 +158,11 @@ class VoiceChangerWorkletProcessor extends AudioWorkletProcessor {
} }
// V.1.5.0よりsilent skipで音飛びするようになったので無効化
if (this.volTrancateCount < this.volTrancateLength || this.volTrancateLength < 0) { if (this.volTrancateCount < this.volTrancateLength || this.volTrancateLength < 0) {
break break
} else { } else {
break
// console.log("silent...skip") // console.log("silent...skip")
} }
} }

View File

@ -399,9 +399,9 @@ class VoiceChanger():
result = result.cpu().float().numpy() result = result.cpu().float().numpy()
return result return result
def on_request_(self, unpackedData: any): def on_request(self, unpackedData: any):
convertSize = self.settings.convertChunkNum * 128 # 128sample/1chunk convertSize = self.settings.convertChunkNum * 128 # 128sample/1chunk
self.stream_in.write(unpackedData.astype(np.int16).tobytes()) # self.stream_in.write(unpackedData.astype(np.int16).tobytes())
# print("convsize:", unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate)) # print("convsize:", unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate))
if unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate) + 1024 > convertSize: if unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate) + 1024 > convertSize:
convertSize = int(unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate)) + 1024 convertSize = int(unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate)) + 1024
@ -464,7 +464,7 @@ class VoiceChanger():
# signal = signal.astype(np.int16).tobytes() # signal = signal.astype(np.int16).tobytes()
return signal return signal
def on_request(self, unpackedData: any): def on_request_(self, unpackedData: any):
self._generate_strength(unpackedData) self._generate_strength(unpackedData)
@ -546,5 +546,6 @@ class VoiceChanger():
self.prev_audio = audio self.prev_audio = audio
self.out.write(audio) self.out.write(audio)
self.stream_in.write(unpackedData.tobytes()) self.stream_in.write(unpackedData.tobytes())
# print(audio1)
return audio1 return audio1