add downsampling mode

2025-01-23 21:45:00 +03:00 · 2023-02-14 22:32:25 +09:00 · 2023-02-14 22:32:25 +09:00 · 852b4216ca
commit 852b4216ca
parent 9d84046a77
11 changed files with 734 additions and 92 deletions
--- a/client/demo/dist/index.html
+++ b/client/demo/dist/index.html
@ -1 +1,10 @@
-<!doctype html><html style="width:100%;height:100%;overflow:hidden"><head><meta charset="utf-8"/><title>Voice Changer Client Demo</title><script defer="defer" src="index.js"></script></head><body style="width:100%;height:100%;margin:0"><div id="app" style="width:100%;height:100%"></div></body></html>
+<!DOCTYPE html>
 <html style="width: 100%; height: 100%; overflow: hidden">
    <head>
        <meta charset="utf-8" />
        <title>Voice Changer Client Demo</title>
    <script defer src="index.js"></script></head>
    <body style="width: 100%; height: 100%; margin: 0px">
        <div id="app" style="width: 100%; height: 100%"></div>
    </body>
 </html>
--- a/client/demo/dist/index.js
+++ b/client/demo/dist/index.js
--- a/client/demo/dist/index.js.LICENSE.txt
+++ b/client/demo/dist/index.js.LICENSE.txt
@ -1,31 +0,0 @@
 /*! regenerator-runtime -- Copyright (c) 2014-present, Facebook, Inc. -- license (MIT): https://github.com/facebook/regenerator/blob/main/LICENSE */
 /**
 * @license React
 * react-dom.production.min.js
 *
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 /**
 * @license React
 * react.production.min.js
 *
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 /**
 * @license React
 * scheduler.production.min.js
 *
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
--- a/client/demo/package.json
+++ b/client/demo/package.json
@ -10,6 +10,7 @@
        "build:prod": "npm-run-all clean webpack:prod",
        "build:dev": "npm-run-all clean webpack:dev",
        "start": "webpack-dev-server  --config webpack.dev.js",
        "build:mod": "cd ../lib && npm run build:dev && cd - && cp -r ../lib/dist/* node_modules/@dannadori/voice-changer-client-js/dist/",
        "test": "echo \"Error: no test specified\" && exit 1"
    },
    "keywords": [
--- a/client/demo/src/100_options_microphone.tsx
+++ b/client/demo/src/100_options_microphone.tsx
@ -30,7 +30,9 @@ export const useMicrophoneOptions = () => {
    useEffect(() => {
        const createAudioContext = () => {
-            const ctx = new AudioContext()
+            const ctx = new AudioContext({
                sampleRate: 48000,
            })
            setAudioContext(ctx)
            document.removeEventListener('touchstart', createAudioContext);
            document.removeEventListener('mousedown', createAudioContext);
--- a/client/demo/src/105_advanced_setting.tsx
+++ b/client/demo/src/105_advanced_setting.tsx
@ -1,4 +1,4 @@
-import { BufferSize, Protocol, SampleRate, VoiceChangerMode } from "@dannadori/voice-changer-client-js"
+import { BufferSize, DownSamplingMode, Protocol, SampleRate, VoiceChangerMode } from "@dannadori/voice-changer-client-js"
 import React, { useMemo, useState } from "react"
 import { ClientState } from "@dannadori/voice-changer-client-js";
@ -197,6 +197,26 @@ export const useAdvancedSetting = (props: UseAdvancedSettingProps): AdvancedSett
    }, [props.clientState.clientSetting.setting.voiceChangerMode, props.clientState.clientSetting.setVoiceChangerMode])
    const downSamplingModeRow = useMemo(() => {
        return (
            <div className="body-row split-3-7 left-padding-1 guided">
                <div className="body-item-title left-padding-1 ">DownSamplingMode</div>
                <div className="body-select-container">
                    <select className="body-select" value={props.clientState.clientSetting.setting.downSamplingMode} onChange={(e) => {
                        props.clientState.clientSetting.setDownSamplingMode(e.target.value as DownSamplingMode)
                    }}>
                        {
                            Object.values(DownSamplingMode).map(x => {
                                return <option key={x} value={x}>{x}</option>
                            })
                        }
                    </select>
                </div>
            </div>
        )
    }, [props.clientState.clientSetting.setting.downSamplingMode, props.clientState.clientSetting.setDownSamplingMode])
    const workletSettingRow = useMemo(() => {
        return (
@ -265,9 +285,10 @@ export const useAdvancedSetting = (props: UseAdvancedSettingProps): AdvancedSett
                <div className="body-row divider"></div>
                {workletSettingRow}
                <div className="body-row divider"></div>
                {downSamplingModeRow}
            </>
        )
-    }, [showAdvancedSetting, mmvcServerUrlRow, protocolRow, sampleRateRow, bufferSizeRow, convertChunkNumRow, minConvertSizeRow, crossFadeOverlapRateRow, crossFadeOffsetRateRow, crossFadeEndRateRow, vfForceDisableRow, voiceChangeModeRow, workletSettingRow])
+    }, [showAdvancedSetting, mmvcServerUrlRow, protocolRow, sampleRateRow, bufferSizeRow, convertChunkNumRow, minConvertSizeRow, crossFadeOverlapRateRow, crossFadeOffsetRateRow, crossFadeEndRateRow, vfForceDisableRow, voiceChangeModeRow, workletSettingRow, downSamplingModeRow])
    const advancedSetting = useMemo(() => {
--- a/client/lib/src/AudioStreamer.ts
+++ b/client/lib/src/AudioStreamer.ts
@ -1,7 +1,7 @@
 import { io, Socket } from "socket.io-client";
 import { DefaultEventsMap } from "@socket.io/component-emitter";
 import { Duplex, DuplexOptions } from "readable-stream";
-import { Protocol, VoiceChangerMode, VOICE_CHANGER_CLIENT_EXCEPTION } from "./const";
+import { DownSamplingMode, Protocol, VoiceChangerMode, VOICE_CHANGER_CLIENT_EXCEPTION } from "./const";
 export type Callbacks = {
    onVoiceReceived: (voiceChangerMode: VoiceChangerMode, data: ArrayBuffer) => void
@ -19,6 +19,7 @@ export type AudioStreamerSettings = {
    voiceChangerMode: VoiceChangerMode;
 }
 export class AudioStreamer extends Duplex {
    private callbacks: Callbacks
    private audioStreamerListeners: AudioStreamerListeners
@ -34,6 +35,11 @@ export class AudioStreamer extends Duplex {
    // performance monitor
    private bufferStart = 0;
    // Flags 
    // private downSamplingMode: DownSamplingMode = DownSamplingMode.decimate
    private downSamplingMode: DownSamplingMode = DownSamplingMode.average
    constructor(callbacks: Callbacks, audioStreamerListeners: AudioStreamerListeners, options?: DuplexOptions) {
        super(options);
        this.callbacks = callbacks
@ -84,6 +90,11 @@ export class AudioStreamer extends Duplex {
        this.voiceChangerMode = val
    }
    // set Flags
    setDownSamplingMode = (val: DownSamplingMode) => {
        this.downSamplingMode = val
    }
    getSettings = (): AudioStreamerSettings => {
        return {
            serverUrl: this.serverUrl,
@ -107,21 +118,63 @@ export class AudioStreamer extends Duplex {
        callback();
    }
    _averageDownsampleBuffer(buffer: Float32Array, originalSampleRate: number, destinationSamplerate: number) {
        if (originalSampleRate == destinationSamplerate) {
            return buffer;
        }
        if (destinationSamplerate > originalSampleRate) {
            throw "downsampling rate show be smaller than original sample rate";
        }
        const sampleRateRatio = originalSampleRate / destinationSamplerate;
        const newLength = Math.round(buffer.length / sampleRateRatio);
        const result = new Float32Array(newLength);
        let offsetResult = 0;
        let offsetBuffer = 0;
        while (offsetResult < result.length) {
            var nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
            // Use average value of skipped samples
            var accum = 0, count = 0;
            for (var i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) {
                accum += buffer[i];
                count++;
            }
            result[offsetResult] = accum / count;
            // Or you can simply get rid of the skipped samples:
            // result[offsetResult] = buffer[nextOffsetBuffer];
            offsetResult++;
            offsetBuffer = nextOffsetBuffer;
        }
        return result;
    }
    private _write_realtime = (buffer: Float32Array) => {
        let downsampledBuffer: Float32Array | null = null
        if (this.downSamplingMode == DownSamplingMode.decimate) {
            //////// (Kind 1) 間引き //////////
            // bufferSize個のデータ（48Khz）が入ってくる。
            //// 48000Hz で入ってくるので間引いて24000Hzに変換する。
-        //// バイトサイズは周波数変換で(x1/2), 16bit(2byte)で(x2)
+            downsampledBuffer = new Float32Array(buffer.length / 2);
        const arrayBuffer = new ArrayBuffer((buffer.length / 2) * 2)
        const dataView = new DataView(arrayBuffer);
            for (let i = 0; i < buffer.length; i++) {
                if (i % 2 == 0) {
-                let s = Math.max(-1, Math.min(1, buffer[i]));
+                    downsampledBuffer[i / 2] = buffer[i]
                }
            }
        } else {
            //////// (Kind 2) 平均 //////////
            downsampledBuffer = this._averageDownsampleBuffer(buffer, 48000, 24000)
        }
        // Float to signed16
        const arrayBuffer = new ArrayBuffer(downsampledBuffer.length * 2)
        const dataView = new DataView(arrayBuffer);
        for (let i = 0; i < downsampledBuffer.length; i++) {
            let s = Math.max(-1, Math.min(1, downsampledBuffer[i]));
            s = s < 0 ? s * 0x8000 : s * 0x7FFF
-                // ２分の１個目で２バイトずつ進むので((i/2)*2)
+            dataView.setInt16(i * 2, s, true);
                dataView.setInt16((i / 2) * 2, s, true);
            }
        }
        // 256byte(最低バッファサイズ256から間引いた個数x2byte)をchunkとして管理
        const chunkByteSize = 256 // (const.ts ★1)
        for (let i = 0; i < arrayBuffer.byteLength / chunkByteSize; i++) {
@ -129,6 +182,7 @@ export class AudioStreamer extends Duplex {
            this.requestChunks.push(ab)
        }
        //// リクエストバッファの中身が、リクエスト送信数と違う場合は処理終了。
        if (this.requestChunks.length < this.inputChunkNum) {
            return
@ -198,15 +252,7 @@ export class AudioStreamer extends Duplex {
    }
    private sendBuffer = async (newBuffer: Uint8Array) => {
        // if (this.serverUrl.length == 0) {
        //     // console.warn("no server url")
        //     // return
        //     // throw "no server url"
        // }
        const timestamp = Date.now()
        // console.log("REQUEST_MESSAGE:", [this.gpu, this.srcId, this.dstId, timestamp, newBuffer.buffer])
        // console.log("SERVER_URL", this.serverUrl, this.protocol)
        // const convertChunkNum = this.voiceChangerMode === "realtime" ? this.requestParamas.convertChunkNum : 0
        if (this.protocol === "sio") {
            if (!this.socket) {
                console.warn(`sio is not initialized`)
@ -214,26 +260,12 @@ export class AudioStreamer extends Duplex {
            }
            // console.log("emit!")
            this.socket.emit('request_message', [
                // this.requestParamas.gpu,
                // this.requestParamas.srcId,
                // this.requestParamas.dstId,
                timestamp,
                // convertChunkNum,
                // this.requestParamas.crossFadeLowerValue,
                // this.requestParamas.crossFadeOffsetRate,
                // this.requestParamas.crossFadeEndRate,
                newBuffer.buffer]);
        } else {
            const res = await postVoice(
                this.serverUrl + "/test",
                // this.requestParamas.gpu,
                // this.requestParamas.srcId,
                // this.requestParamas.dstId,
                timestamp,
                // convertChunkNum,
                // this.requestParamas.crossFadeLowerValue,
                // this.requestParamas.crossFadeOffsetRate,
                // this.requestParamas.crossFadeEndRate,
                newBuffer.buffer)
            if (res.byteLength < 128 * 2) {
@ -248,24 +280,10 @@ export class AudioStreamer extends Duplex {
 export const postVoice = async (
    url: string,
    // gpu: number,
    // srcId: number,
    // dstId: number,
    timestamp: number,
    // convertChunkNum: number,
    // crossFadeLowerValue: number,
    // crossFadeOffsetRate: number,
    // crossFadeEndRate: number,
    buffer: ArrayBuffer) => {
    const obj = {
        // gpu,
        // srcId,
        // dstId,
        timestamp,
        // convertChunkNum,
        // crossFadeLowerValue,
        // crossFadeOffsetRate,
        // crossFadeEndRate,
        buffer: Buffer.from(buffer).toString('base64')
    };
    const body = JSON.stringify(obj);
@ -283,7 +301,6 @@ export const postVoice = async (
    const changedVoiceBase64 = receivedJson["changedVoiceBase64"]
    const buf = Buffer.from(changedVoiceBase64, "base64")
    const ab = new ArrayBuffer(buf.length);
    // console.log("RECIV", buf.length)
    const view = new Uint8Array(ab);
    for (let i = 0; i < buf.length; ++i) {
        view[i] = buf[i];
--- a/client/lib/src/VoiceChangerClient.ts
+++ b/client/lib/src/VoiceChangerClient.ts
@ -3,7 +3,7 @@ import { VoiceChangerWorkletNode, VoiceChangerWorkletListener } from "./VoiceCha
 import workerjs from "raw-loader!../worklet/dist/index.js";
 import { VoiceFocusDeviceTransformer, VoiceFocusTransformDevice } from "amazon-chime-sdk-js";
 import { createDummyMediaStream, validateUrl } from "./util";
-import { BufferSize, DefaultVoiceChangerClientSetting, Protocol, ServerSettingKey, VoiceChangerMode, VOICE_CHANGER_CLIENT_EXCEPTION, WorkletSetting } from "./const";
+import { BufferSize, DefaultVoiceChangerClientSetting, DownSamplingMode, Protocol, ServerSettingKey, VoiceChangerMode, VOICE_CHANGER_CLIENT_EXCEPTION, WorkletSetting } from "./const";
 import MicrophoneStream from "microphone-stream";
 import { AudioStreamer, Callbacks, AudioStreamerListeners } from "./AudioStreamer";
 import { ServerConfigurator } from "./ServerConfigurator";
@ -133,6 +133,9 @@ export class VoiceChangerClient {
            this.currentMediaStream = await navigator.mediaDevices.getUserMedia({
                audio: {
                    deviceId: input,
                    channelCount: 1,
                    sampleRate: 48000,
                    sampleSize: 16,
                    // echoCancellation: false,
                    // noiseSuppression: false
                }
@ -228,6 +231,10 @@ export class VoiceChangerClient {
    setVoiceChangerMode = (val: VoiceChangerMode) => {
        this.audioStreamer.setVoiceChangerMode(val)
    }
    ////  Audio Streamer Flag
    setDownSamplingMode = (val: DownSamplingMode) => {
        this.audioStreamer.setDownSamplingMode(val)
    }
    // configure worklet
    configureWorklet = (setting: WorkletSetting) => {
--- a/client/lib/src/const.ts
+++ b/client/lib/src/const.ts
@ -4,6 +4,7 @@
 // 24000sample -> 1sec, 128sample(1chunk) -> 5.333msec
 // 187.5chunk -> 1sec
 // types
 export type VoiceChangerServerSetting = {
    convertChunkNum: number, // VITSに入力する変換サイズ。(入力データの2倍以上の大きさで指定。それより小さいものが指定された場合は、サーバ側で自動的に入力の2倍のサイズが設定される。)
@ -35,6 +36,7 @@ export type VoiceChangerClientSetting = {
    correspondences: Correspondence[],
    forceVfDisable: boolean,
    voiceChangerMode: VoiceChangerMode,
    downSamplingMode: DownSamplingMode,
    inputGain: number
    outputGain: number
@ -92,6 +94,12 @@ export const VoiceChangerMode = {
 } as const
 export type VoiceChangerMode = typeof VoiceChangerMode[keyof typeof VoiceChangerMode]
 export const DownSamplingMode = {
    "decimate": "decimate",
    "average": "average"
 } as const
 export type DownSamplingMode = typeof DownSamplingMode[keyof typeof DownSamplingMode]
 export const SampleRate = {
    "48000": 48000,
 } as const
@ -186,6 +194,7 @@ export const DefaultVoiceChangerClientSetting: VoiceChangerClientSetting = {
    correspondences: [],
    forceVfDisable: false,
    voiceChangerMode: "realtime",
    downSamplingMode: "average",
    inputGain: 1.0,
    outputGain: 1.0
 }
--- a/client/lib/src/hooks/useClientSetting.ts
+++ b/client/lib/src/hooks/useClientSetting.ts
@ -1,5 +1,6 @@
 import { useState, useMemo, useRef, useEffect } from "react"
-import { VoiceChangerClientSetting, Protocol, BufferSize, VoiceChangerMode, SampleRate, Speaker, DefaultVoiceChangerClientSetting, INDEXEDDB_KEY_CLIENT, Correspondence } from "../const"
+
 import { VoiceChangerClientSetting, Protocol, BufferSize, VoiceChangerMode, SampleRate, Speaker, DefaultVoiceChangerClientSetting, INDEXEDDB_KEY_CLIENT, Correspondence, DownSamplingMode } from "../const"
 import { createDummyMediaStream } from "../util"
 import { VoiceChangerClient } from "../VoiceChangerClient"
 import { useIndexedDB } from "./useIndexedDB"
@ -19,6 +20,7 @@ export type ClientSettingState = {
    setVfForceDisabled: (vfForceDisabled: boolean) => Promise<void>
    setInputChunkNum: (num: number) => void;
    setVoiceChangerMode: (mode: VoiceChangerMode) => void
    setDownSamplingMode: (mode: DownSamplingMode) => void
    setSampleRate: (num: SampleRate) => void
    setSpeakers: (speakers: Speaker[]) => void
    setCorrespondences: (file: File | null) => Promise<void>
@ -165,6 +167,17 @@ export const useClientSetting = (props: UseClientSettingProps): ClientSettingSta
        }
    }, [props.voiceChangerClient])
    const setDownSamplingMode = useMemo(() => {
        return (mode: DownSamplingMode) => {
            if (!props.voiceChangerClient) return
            props.voiceChangerClient.setDownSamplingMode(mode)
            settingRef.current.downSamplingMode = mode
            setSetting({ ...settingRef.current })
        }
    }, [props.voiceChangerClient])
    const setSampleRate = useMemo(() => {
        return (num: SampleRate) => {
            if (!props.voiceChangerClient) return
@ -261,6 +274,7 @@ export const useClientSetting = (props: UseClientSettingProps): ClientSettingSta
        setVfForceDisabled,
        setInputChunkNum,
        setVoiceChangerMode,
        setDownSamplingMode,
        setSampleRate,
        setSpeakers,
        setCorrespondences,
--- a/server/voice_changer/VoiceChanger.py
+++ b/server/voice_changer/VoiceChanger.py
@ -401,7 +401,7 @@ class VoiceChanger():
    def on_request(self, unpackedData: any):
        convertSize = self.settings.convertChunkNum * 128  # 128sample/1chunk
-        # self.stream_in.write(unpackedData.astype(np.int16).tobytes())
+        self.stream_in.write(unpackedData.astype(np.int16).tobytes())
        # print("convsize:", unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate))
        if unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate) + 1024 > convertSize:
            convertSize = int(unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate)) + 1024