add downsampling mode

This commit is contained in:
wataru 2023-02-14 22:32:25 +09:00
parent 9d84046a77
commit 852b4216ca
11 changed files with 734 additions and 92 deletions

View File

@ -1 +1,10 @@
<!doctype html><html style="width:100%;height:100%;overflow:hidden"><head><meta charset="utf-8"/><title>Voice Changer Client Demo</title><script defer="defer" src="index.js"></script></head><body style="width:100%;height:100%;margin:0"><div id="app" style="width:100%;height:100%"></div></body></html>
<!DOCTYPE html>
<html style="width: 100%; height: 100%; overflow: hidden">
<head>
<meta charset="utf-8" />
<title>Voice Changer Client Demo</title>
<script defer src="index.js"></script></head>
<body style="width: 100%; height: 100%; margin: 0px">
<div id="app" style="width: 100%; height: 100%"></div>
</body>
</html>

File diff suppressed because one or more lines are too long

View File

@ -1,31 +0,0 @@
/*! regenerator-runtime -- Copyright (c) 2014-present, Facebook, Inc. -- license (MIT): https://github.com/facebook/regenerator/blob/main/LICENSE */
/**
* @license React
* react-dom.production.min.js
*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
/**
* @license React
* react.production.min.js
*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
/**
* @license React
* scheduler.production.min.js
*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

View File

@ -10,6 +10,7 @@
"build:prod": "npm-run-all clean webpack:prod",
"build:dev": "npm-run-all clean webpack:dev",
"start": "webpack-dev-server --config webpack.dev.js",
"build:mod": "cd ../lib && npm run build:dev && cd - && cp -r ../lib/dist/* node_modules/@dannadori/voice-changer-client-js/dist/",
"test": "echo \"Error: no test specified\" && exit 1"
},
"keywords": [

View File

@ -30,7 +30,9 @@ export const useMicrophoneOptions = () => {
useEffect(() => {
const createAudioContext = () => {
const ctx = new AudioContext()
const ctx = new AudioContext({
sampleRate: 48000,
})
setAudioContext(ctx)
document.removeEventListener('touchstart', createAudioContext);
document.removeEventListener('mousedown', createAudioContext);

View File

@ -1,4 +1,4 @@
import { BufferSize, Protocol, SampleRate, VoiceChangerMode } from "@dannadori/voice-changer-client-js"
import { BufferSize, DownSamplingMode, Protocol, SampleRate, VoiceChangerMode } from "@dannadori/voice-changer-client-js"
import React, { useMemo, useState } from "react"
import { ClientState } from "@dannadori/voice-changer-client-js";
@ -197,6 +197,26 @@ export const useAdvancedSetting = (props: UseAdvancedSettingProps): AdvancedSett
}, [props.clientState.clientSetting.setting.voiceChangerMode, props.clientState.clientSetting.setVoiceChangerMode])
const downSamplingModeRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1 ">DownSamplingMode</div>
<div className="body-select-container">
<select className="body-select" value={props.clientState.clientSetting.setting.downSamplingMode} onChange={(e) => {
props.clientState.clientSetting.setDownSamplingMode(e.target.value as DownSamplingMode)
}}>
{
Object.values(DownSamplingMode).map(x => {
return <option key={x} value={x}>{x}</option>
})
}
</select>
</div>
</div>
)
}, [props.clientState.clientSetting.setting.downSamplingMode, props.clientState.clientSetting.setDownSamplingMode])
const workletSettingRow = useMemo(() => {
return (
@ -265,9 +285,10 @@ export const useAdvancedSetting = (props: UseAdvancedSettingProps): AdvancedSett
<div className="body-row divider"></div>
{workletSettingRow}
<div className="body-row divider"></div>
{downSamplingModeRow}
</>
)
}, [showAdvancedSetting, mmvcServerUrlRow, protocolRow, sampleRateRow, bufferSizeRow, convertChunkNumRow, minConvertSizeRow, crossFadeOverlapRateRow, crossFadeOffsetRateRow, crossFadeEndRateRow, vfForceDisableRow, voiceChangeModeRow, workletSettingRow])
}, [showAdvancedSetting, mmvcServerUrlRow, protocolRow, sampleRateRow, bufferSizeRow, convertChunkNumRow, minConvertSizeRow, crossFadeOverlapRateRow, crossFadeOffsetRateRow, crossFadeEndRateRow, vfForceDisableRow, voiceChangeModeRow, workletSettingRow, downSamplingModeRow])
const advancedSetting = useMemo(() => {

View File

@ -1,7 +1,7 @@
import { io, Socket } from "socket.io-client";
import { DefaultEventsMap } from "@socket.io/component-emitter";
import { Duplex, DuplexOptions } from "readable-stream";
import { Protocol, VoiceChangerMode, VOICE_CHANGER_CLIENT_EXCEPTION } from "./const";
import { DownSamplingMode, Protocol, VoiceChangerMode, VOICE_CHANGER_CLIENT_EXCEPTION } from "./const";
export type Callbacks = {
onVoiceReceived: (voiceChangerMode: VoiceChangerMode, data: ArrayBuffer) => void
@ -19,6 +19,7 @@ export type AudioStreamerSettings = {
voiceChangerMode: VoiceChangerMode;
}
export class AudioStreamer extends Duplex {
private callbacks: Callbacks
private audioStreamerListeners: AudioStreamerListeners
@ -34,6 +35,11 @@ export class AudioStreamer extends Duplex {
// performance monitor
private bufferStart = 0;
// Flags
// private downSamplingMode: DownSamplingMode = DownSamplingMode.decimate
private downSamplingMode: DownSamplingMode = DownSamplingMode.average
constructor(callbacks: Callbacks, audioStreamerListeners: AudioStreamerListeners, options?: DuplexOptions) {
super(options);
this.callbacks = callbacks
@ -84,6 +90,11 @@ export class AudioStreamer extends Duplex {
this.voiceChangerMode = val
}
// set Flags
setDownSamplingMode = (val: DownSamplingMode) => {
this.downSamplingMode = val
}
getSettings = (): AudioStreamerSettings => {
return {
serverUrl: this.serverUrl,
@ -107,21 +118,63 @@ export class AudioStreamer extends Duplex {
callback();
}
_averageDownsampleBuffer(buffer: Float32Array, originalSampleRate: number, destinationSamplerate: number) {
if (originalSampleRate == destinationSamplerate) {
return buffer;
}
if (destinationSamplerate > originalSampleRate) {
throw "downsampling rate show be smaller than original sample rate";
}
const sampleRateRatio = originalSampleRate / destinationSamplerate;
const newLength = Math.round(buffer.length / sampleRateRatio);
const result = new Float32Array(newLength);
let offsetResult = 0;
let offsetBuffer = 0;
while (offsetResult < result.length) {
var nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
// Use average value of skipped samples
var accum = 0, count = 0;
for (var i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) {
accum += buffer[i];
count++;
}
result[offsetResult] = accum / count;
// Or you can simply get rid of the skipped samples:
// result[offsetResult] = buffer[nextOffsetBuffer];
offsetResult++;
offsetBuffer = nextOffsetBuffer;
}
return result;
}
private _write_realtime = (buffer: Float32Array) => {
let downsampledBuffer: Float32Array | null = null
if (this.downSamplingMode == DownSamplingMode.decimate) {
//////// (Kind 1) 間引き //////////
// bufferSize個のデータ48Khzが入ってくる。
//// 48000Hz で入ってくるので間引いて24000Hzに変換する。
//// バイトサイズは周波数変換で(x1/2), 16bit(2byte)で(x2)
const arrayBuffer = new ArrayBuffer((buffer.length / 2) * 2)
const dataView = new DataView(arrayBuffer);
downsampledBuffer = new Float32Array(buffer.length / 2);
for (let i = 0; i < buffer.length; i++) {
if (i % 2 == 0) {
let s = Math.max(-1, Math.min(1, buffer[i]));
downsampledBuffer[i / 2] = buffer[i]
}
}
} else {
//////// (Kind 2) 平均 //////////
downsampledBuffer = this._averageDownsampleBuffer(buffer, 48000, 24000)
}
// Float to signed16
const arrayBuffer = new ArrayBuffer(downsampledBuffer.length * 2)
const dataView = new DataView(arrayBuffer);
for (let i = 0; i < downsampledBuffer.length; i++) {
let s = Math.max(-1, Math.min(1, downsampledBuffer[i]));
s = s < 0 ? s * 0x8000 : s * 0x7FFF
// 2分の1個目で2バイトずつ進むので((i/2)*2)
dataView.setInt16((i / 2) * 2, s, true);
}
dataView.setInt16(i * 2, s, true);
}
// 256byte(最低バッファサイズ256から間引いた個数x2byte)をchunkとして管理
const chunkByteSize = 256 // (const.ts ★1)
for (let i = 0; i < arrayBuffer.byteLength / chunkByteSize; i++) {
@ -129,6 +182,7 @@ export class AudioStreamer extends Duplex {
this.requestChunks.push(ab)
}
//// リクエストバッファの中身が、リクエスト送信数と違う場合は処理終了。
if (this.requestChunks.length < this.inputChunkNum) {
return
@ -198,15 +252,7 @@ export class AudioStreamer extends Duplex {
}
private sendBuffer = async (newBuffer: Uint8Array) => {
// if (this.serverUrl.length == 0) {
// // console.warn("no server url")
// // return
// // throw "no server url"
// }
const timestamp = Date.now()
// console.log("REQUEST_MESSAGE:", [this.gpu, this.srcId, this.dstId, timestamp, newBuffer.buffer])
// console.log("SERVER_URL", this.serverUrl, this.protocol)
// const convertChunkNum = this.voiceChangerMode === "realtime" ? this.requestParamas.convertChunkNum : 0
if (this.protocol === "sio") {
if (!this.socket) {
console.warn(`sio is not initialized`)
@ -214,26 +260,12 @@ export class AudioStreamer extends Duplex {
}
// console.log("emit!")
this.socket.emit('request_message', [
// this.requestParamas.gpu,
// this.requestParamas.srcId,
// this.requestParamas.dstId,
timestamp,
// convertChunkNum,
// this.requestParamas.crossFadeLowerValue,
// this.requestParamas.crossFadeOffsetRate,
// this.requestParamas.crossFadeEndRate,
newBuffer.buffer]);
} else {
const res = await postVoice(
this.serverUrl + "/test",
// this.requestParamas.gpu,
// this.requestParamas.srcId,
// this.requestParamas.dstId,
timestamp,
// convertChunkNum,
// this.requestParamas.crossFadeLowerValue,
// this.requestParamas.crossFadeOffsetRate,
// this.requestParamas.crossFadeEndRate,
newBuffer.buffer)
if (res.byteLength < 128 * 2) {
@ -248,24 +280,10 @@ export class AudioStreamer extends Duplex {
export const postVoice = async (
url: string,
// gpu: number,
// srcId: number,
// dstId: number,
timestamp: number,
// convertChunkNum: number,
// crossFadeLowerValue: number,
// crossFadeOffsetRate: number,
// crossFadeEndRate: number,
buffer: ArrayBuffer) => {
const obj = {
// gpu,
// srcId,
// dstId,
timestamp,
// convertChunkNum,
// crossFadeLowerValue,
// crossFadeOffsetRate,
// crossFadeEndRate,
buffer: Buffer.from(buffer).toString('base64')
};
const body = JSON.stringify(obj);
@ -283,7 +301,6 @@ export const postVoice = async (
const changedVoiceBase64 = receivedJson["changedVoiceBase64"]
const buf = Buffer.from(changedVoiceBase64, "base64")
const ab = new ArrayBuffer(buf.length);
// console.log("RECIV", buf.length)
const view = new Uint8Array(ab);
for (let i = 0; i < buf.length; ++i) {
view[i] = buf[i];

View File

@ -3,7 +3,7 @@ import { VoiceChangerWorkletNode, VoiceChangerWorkletListener } from "./VoiceCha
import workerjs from "raw-loader!../worklet/dist/index.js";
import { VoiceFocusDeviceTransformer, VoiceFocusTransformDevice } from "amazon-chime-sdk-js";
import { createDummyMediaStream, validateUrl } from "./util";
import { BufferSize, DefaultVoiceChangerClientSetting, Protocol, ServerSettingKey, VoiceChangerMode, VOICE_CHANGER_CLIENT_EXCEPTION, WorkletSetting } from "./const";
import { BufferSize, DefaultVoiceChangerClientSetting, DownSamplingMode, Protocol, ServerSettingKey, VoiceChangerMode, VOICE_CHANGER_CLIENT_EXCEPTION, WorkletSetting } from "./const";
import MicrophoneStream from "microphone-stream";
import { AudioStreamer, Callbacks, AudioStreamerListeners } from "./AudioStreamer";
import { ServerConfigurator } from "./ServerConfigurator";
@ -133,6 +133,9 @@ export class VoiceChangerClient {
this.currentMediaStream = await navigator.mediaDevices.getUserMedia({
audio: {
deviceId: input,
channelCount: 1,
sampleRate: 48000,
sampleSize: 16,
// echoCancellation: false,
// noiseSuppression: false
}
@ -228,6 +231,10 @@ export class VoiceChangerClient {
setVoiceChangerMode = (val: VoiceChangerMode) => {
this.audioStreamer.setVoiceChangerMode(val)
}
//// Audio Streamer Flag
setDownSamplingMode = (val: DownSamplingMode) => {
this.audioStreamer.setDownSamplingMode(val)
}
// configure worklet
configureWorklet = (setting: WorkletSetting) => {

View File

@ -4,6 +4,7 @@
// 24000sample -> 1sec, 128sample(1chunk) -> 5.333msec
// 187.5chunk -> 1sec
// types
export type VoiceChangerServerSetting = {
convertChunkNum: number, // VITSに入力する変換サイズ。(入力データの2倍以上の大きさで指定。それより小さいものが指定された場合は、サーバ側で自動的に入力の2倍のサイズが設定される。)
@ -35,6 +36,7 @@ export type VoiceChangerClientSetting = {
correspondences: Correspondence[],
forceVfDisable: boolean,
voiceChangerMode: VoiceChangerMode,
downSamplingMode: DownSamplingMode,
inputGain: number
outputGain: number
@ -92,6 +94,12 @@ export const VoiceChangerMode = {
} as const
export type VoiceChangerMode = typeof VoiceChangerMode[keyof typeof VoiceChangerMode]
export const DownSamplingMode = {
"decimate": "decimate",
"average": "average"
} as const
export type DownSamplingMode = typeof DownSamplingMode[keyof typeof DownSamplingMode]
export const SampleRate = {
"48000": 48000,
} as const
@ -186,6 +194,7 @@ export const DefaultVoiceChangerClientSetting: VoiceChangerClientSetting = {
correspondences: [],
forceVfDisable: false,
voiceChangerMode: "realtime",
downSamplingMode: "average",
inputGain: 1.0,
outputGain: 1.0
}

View File

@ -1,5 +1,6 @@
import { useState, useMemo, useRef, useEffect } from "react"
import { VoiceChangerClientSetting, Protocol, BufferSize, VoiceChangerMode, SampleRate, Speaker, DefaultVoiceChangerClientSetting, INDEXEDDB_KEY_CLIENT, Correspondence } from "../const"
import { VoiceChangerClientSetting, Protocol, BufferSize, VoiceChangerMode, SampleRate, Speaker, DefaultVoiceChangerClientSetting, INDEXEDDB_KEY_CLIENT, Correspondence, DownSamplingMode } from "../const"
import { createDummyMediaStream } from "../util"
import { VoiceChangerClient } from "../VoiceChangerClient"
import { useIndexedDB } from "./useIndexedDB"
@ -19,6 +20,7 @@ export type ClientSettingState = {
setVfForceDisabled: (vfForceDisabled: boolean) => Promise<void>
setInputChunkNum: (num: number) => void;
setVoiceChangerMode: (mode: VoiceChangerMode) => void
setDownSamplingMode: (mode: DownSamplingMode) => void
setSampleRate: (num: SampleRate) => void
setSpeakers: (speakers: Speaker[]) => void
setCorrespondences: (file: File | null) => Promise<void>
@ -165,6 +167,17 @@ export const useClientSetting = (props: UseClientSettingProps): ClientSettingSta
}
}, [props.voiceChangerClient])
const setDownSamplingMode = useMemo(() => {
return (mode: DownSamplingMode) => {
if (!props.voiceChangerClient) return
props.voiceChangerClient.setDownSamplingMode(mode)
settingRef.current.downSamplingMode = mode
setSetting({ ...settingRef.current })
}
}, [props.voiceChangerClient])
const setSampleRate = useMemo(() => {
return (num: SampleRate) => {
if (!props.voiceChangerClient) return
@ -261,6 +274,7 @@ export const useClientSetting = (props: UseClientSettingProps): ClientSettingSta
setVfForceDisabled,
setInputChunkNum,
setVoiceChangerMode,
setDownSamplingMode,
setSampleRate,
setSpeakers,
setCorrespondences,

View File

@ -401,7 +401,7 @@ class VoiceChanger():
def on_request(self, unpackedData: any):
convertSize = self.settings.convertChunkNum * 128 # 128sample/1chunk
# self.stream_in.write(unpackedData.astype(np.int16).tobytes())
self.stream_in.write(unpackedData.astype(np.int16).tobytes())
# print("convsize:", unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate))
if unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate) + 1024 > convertSize:
convertSize = int(unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate)) + 1024