WIPsilent skip

2025-03-13 19:34:02 +03:00 · 2023-01-12 03:49:22 +09:00 · 2023-01-12 03:49:22 +09:00 · ccd5111fd2
commit ccd5111fd2
parent fdb8e30382
9 changed files with 94 additions and 23 deletions
--- a/client/demo/dist/index.js
+++ b/client/demo/dist/index.js
--- a/client/demo/src/hooks/useClient.ts
+++ b/client/demo/src/hooks/useClient.ts
@ -389,7 +389,7 @@ export const useClient = (props: UseClientProps): ClientState => {
                srcId: serverInfo.srcId,
                dstId: serverInfo.dstId,
                framework: serverInfo.framework,
-                onnxExecutionProvider: serverInfo.providers.length > 0 ? serverInfo.providers[0] as OnnxExecutionProvider : "CPUExecutionProvider"
+                onnxExecutionProvider: !!serverInfo.providers && serverInfo.providers.length > 0 ? serverInfo.providers[0] as OnnxExecutionProvider : "CPUExecutionProvider"
            })
        } else {
            setDisplaySettingState({
--- a/client/lib/src/@types/voice-changer-worklet-processor.d.ts
+++ b/client/lib/src/@types/voice-changer-worklet-processor.d.ts
@ -0,0 +1,12 @@
+export declare const RequestType: {
+    readonly voice: "voice";
+    readonly config: "config";
+};
+export type RequestType = typeof RequestType[keyof typeof RequestType];
+export type VoiceChangerWorkletProcessorRequest = {
+    requestType: RequestType;
+    voice: ArrayBuffer;
+    numTrancateTreshold: number;
+    volTrancateThreshold: number;
+    volTrancateLength: number;
+};
--- a/client/lib/src/VoiceChangerClient.ts
+++ b/client/lib/src/VoiceChangerClient.ts
@ -7,7 +7,7 @@ import { BufferSize, DefaultVoiceChangerOptions, Protocol, ServerSettingKey, Voi
 import MicrophoneStream from "microphone-stream";
 import { AudioStreamer, Callbacks, AudioStreamerListeners } from "./AudioStreamer";
 import { ServerConfigurator } from "./ServerConfigurator";
-
+import { VoiceChangerWorkletProcessorRequest } from "./@types/voice-changer-worklet-processor";

 // オーディオデータの流れ
 // input node(mic or MediaStream) -> [vf node] -> microphne stream -> audio streamer -> 
@ -39,7 +39,15 @@ export class VoiceChnagerClient {
        onVoiceReceived: (voiceChangerMode: VoiceChangerMode, data: ArrayBuffer): void => {
            // console.log(voiceChangerMode, data)
            if (voiceChangerMode === "realtime") {
-                this.vcNode.postReceivedVoice(data)
+                const req: VoiceChangerWorkletProcessorRequest = {
+                    requestType: "voice",
+                    voice: data,
+                    numTrancateTreshold: 0,
+                    volTrancateThreshold: 0,
+                    volTrancateLength: 0
+                }
+
+                this.vcNode.postReceivedVoice(req)
                return
            }

--- a/client/lib/src/VoiceChangerWorkletNode.ts
+++ b/client/lib/src/VoiceChangerWorkletNode.ts
@ -1,3 +1,5 @@
+import { VoiceChangerWorkletProcessorRequest } from "./@types/voice-changer-worklet-processor";
+
 export type VolumeListener = {
    notifyVolume: (vol: number) => void
 }
@ -11,10 +13,10 @@ export class VoiceChangerWorkletNode extends AudioWorkletNode {
        console.log(`[worklet_node][voice-changer-worklet-processor] created.`);
    }

-    postReceivedVoice = (data: ArrayBuffer) => {
+    postReceivedVoice = (req: VoiceChangerWorkletProcessorRequest) => {
        this.port.postMessage({
-            data: data,
-        }, [data]);
+            request: req
+        }, [req.voice]);
    }

    handleMessage(event: any) {
--- a/client/lib/src/const.ts
+++ b/client/lib/src/const.ts
@ -1,7 +1,7 @@

 // (★1) chunk sizeは 128サンプル, 256byte(int16)と定義。
 // (★2) 256byte(最低バッファサイズ256から間引いた個数x2byte)をchunkとして管理。
-
+// 24000sample -> 1sec, 128sample(1chunk) -> 5.333msec

 // types
 export type VoiceChangerRequestParamas = {
--- a/client/lib/tsconfig.json
+++ b/client/lib/tsconfig.json
@ -29,6 +29,6 @@
    "skipLibCheck": true
  },
  /* tscコマンドで読み込むファイルを指定 */
-  "include": ["src/*.ts"],
+  "include": ["src/**/*.ts"],
  "exclude": ["node_modules"]
 }
--- a/client/lib/tsconfig.worklet.json
+++ b/client/lib/tsconfig.worklet.json
@ -2,6 +2,8 @@
  "compilerOptions": {
    "target": "ES2020",
    "lib":["ES2020"],
+    "outDir": "./worklet/dist",
+    "declaration": true,
    /* ファイル名の大文字小文字を区別 */
    "forceConsistentCasingInFileNames": true,

--- a/client/lib/worklet/src/voice-changer-worklet-processor.ts
+++ b/client/lib/worklet/src/voice-changer-worklet-processor.ts
@ -1,8 +1,26 @@
+export const RequestType = {
+    "voice": "voice",
+    "config": "config"
+} as const
+export type RequestType = typeof RequestType[keyof typeof RequestType]
+
+export type VoiceChangerWorkletProcessorRequest = {
+    requestType: RequestType,
+    voice: ArrayBuffer,
+    numTrancateTreshold: number
+    volTrancateThreshold: number
+    volTrancateLength: number
+}

 class VoiceChangerWorkletProcessor extends AudioWorkletProcessor {
    private BLOCK_SIZE = 128
    private initialized = false;
    private volume = 0
+    private numTrancateTreshold = 50
+    private volTrancateThreshold = 0.0005
+    private volTrancateLength = 32
+    private volTrancateCount = 0
+
    playBuffer: Float32Array[] = []
    /**
     * @constructor
@ -13,9 +31,25 @@ class VoiceChangerWorkletProcessor extends AudioWorkletProcessor {
        this.port.onmessage = this.handleMessage.bind(this);
    }

+    calcVol = (data: Float32Array, prevVol: number) => {
+        const sum = data.reduce((prev, cur) => {
+            return prev + cur * cur
+        }, 0)
+        const rms = Math.sqrt(sum / data.length)
+        return Math.max(rms, prevVol * 0.95)
+    }
+
    handleMessage(event: any) {
-        // noop
-        const arrayBuffer = event.data.data as ArrayBuffer
+        const request = event.data.request as VoiceChangerWorkletProcessorRequest
+        if (request.requestType === "config") {
+            this.numTrancateTreshold = request.numTrancateTreshold
+            this.volTrancateLength = request.volTrancateLength
+            this.volTrancateThreshold = request.volTrancateThreshold
+            console.log("[worklet] worklet configured", request)
+            return
+        }
+
+        const arrayBuffer = request.voice
        // データは(int16)で受信
        const i16Data = new Int16Array(arrayBuffer)
        const f32Data = new Float32Array(i16Data.length)
@ -25,7 +59,7 @@ class VoiceChangerWorkletProcessor extends AudioWorkletProcessor {
            f32Data[i] = float
        })

-        if (this.playBuffer.length > 50) {
+        if (this.playBuffer.length > this.numTrancateTreshold) {
            console.log("[worklet] Buffer truncated")
            while (this.playBuffer.length > 2) {
                this.playBuffer.shift()
@ -58,23 +92,36 @@ class VoiceChangerWorkletProcessor extends AudioWorkletProcessor {
        }

        if (this.playBuffer.length === 0) {
-            // console.log("[worklet] no play buffer")
+            console.log("[worklet] no play buffer")
            return true
        }

-        const data = this.playBuffer.shift()!
-
-        const sum = data.reduce((prev, cur) => {
-            return prev + cur * cur
-        }, 0)
-        const rms = Math.sqrt(sum / data.length)
-
-        this.volume = Math.max(rms, this.volume * 0.95)
-        this.port.postMessage({ volume: this.volume });
+        //// 一定期間無音状態が続いている場合はスキップ。
+        let voice: Float32Array | undefined
+        while (true) {
+            voice = this.playBuffer.shift()
+            if (!voice) {
+                break
+            }
+            this.volume = this.calcVol(voice, this.volume)
+            if (this.volume < this.volTrancateThreshold) {
+                this.volTrancateCount += 1
+            } else {
+                this.volTrancateCount = 0
+            }


+            if (this.volTrancateCount < this.volTrancateLength) {
+                break
+            } else {
+                console.log("silent...skip")
+            }
+        }

-        outputs[0][0].set(data)
+        if (voice) {
+            this.port.postMessage({ volume: this.volume });
+            outputs[0][0].set(voice)
+        }

        return true;
    }