mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-01-23 13:35:12 +03:00
add downsampling mode
This commit is contained in:
parent
9d84046a77
commit
852b4216ca
11
client/demo/dist/index.html
vendored
11
client/demo/dist/index.html
vendored
@ -1 +1,10 @@
|
||||
<!doctype html><html style="width:100%;height:100%;overflow:hidden"><head><meta charset="utf-8"/><title>Voice Changer Client Demo</title><script defer="defer" src="index.js"></script></head><body style="width:100%;height:100%;margin:0"><div id="app" style="width:100%;height:100%"></div></body></html>
|
||||
<!DOCTYPE html>
|
||||
<html style="width: 100%; height: 100%; overflow: hidden">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>Voice Changer Client Demo</title>
|
||||
<script defer src="index.js"></script></head>
|
||||
<body style="width: 100%; height: 100%; margin: 0px">
|
||||
<div id="app" style="width: 100%; height: 100%"></div>
|
||||
</body>
|
||||
</html>
|
||||
|
597
client/demo/dist/index.js
vendored
597
client/demo/dist/index.js
vendored
File diff suppressed because one or more lines are too long
31
client/demo/dist/index.js.LICENSE.txt
vendored
31
client/demo/dist/index.js.LICENSE.txt
vendored
@ -1,31 +0,0 @@
|
||||
/*! regenerator-runtime -- Copyright (c) 2014-present, Facebook, Inc. -- license (MIT): https://github.com/facebook/regenerator/blob/main/LICENSE */
|
||||
|
||||
/**
|
||||
* @license React
|
||||
* react-dom.production.min.js
|
||||
*
|
||||
* Copyright (c) Facebook, Inc. and its affiliates.
|
||||
*
|
||||
* This source code is licensed under the MIT license found in the
|
||||
* LICENSE file in the root directory of this source tree.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @license React
|
||||
* react.production.min.js
|
||||
*
|
||||
* Copyright (c) Facebook, Inc. and its affiliates.
|
||||
*
|
||||
* This source code is licensed under the MIT license found in the
|
||||
* LICENSE file in the root directory of this source tree.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @license React
|
||||
* scheduler.production.min.js
|
||||
*
|
||||
* Copyright (c) Facebook, Inc. and its affiliates.
|
||||
*
|
||||
* This source code is licensed under the MIT license found in the
|
||||
* LICENSE file in the root directory of this source tree.
|
||||
*/
|
@ -10,6 +10,7 @@
|
||||
"build:prod": "npm-run-all clean webpack:prod",
|
||||
"build:dev": "npm-run-all clean webpack:dev",
|
||||
"start": "webpack-dev-server --config webpack.dev.js",
|
||||
"build:mod": "cd ../lib && npm run build:dev && cd - && cp -r ../lib/dist/* node_modules/@dannadori/voice-changer-client-js/dist/",
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"keywords": [
|
||||
|
@ -30,7 +30,9 @@ export const useMicrophoneOptions = () => {
|
||||
|
||||
useEffect(() => {
|
||||
const createAudioContext = () => {
|
||||
const ctx = new AudioContext()
|
||||
const ctx = new AudioContext({
|
||||
sampleRate: 48000,
|
||||
})
|
||||
setAudioContext(ctx)
|
||||
document.removeEventListener('touchstart', createAudioContext);
|
||||
document.removeEventListener('mousedown', createAudioContext);
|
||||
|
@ -1,4 +1,4 @@
|
||||
import { BufferSize, Protocol, SampleRate, VoiceChangerMode } from "@dannadori/voice-changer-client-js"
|
||||
import { BufferSize, DownSamplingMode, Protocol, SampleRate, VoiceChangerMode } from "@dannadori/voice-changer-client-js"
|
||||
import React, { useMemo, useState } from "react"
|
||||
import { ClientState } from "@dannadori/voice-changer-client-js";
|
||||
|
||||
@ -197,6 +197,26 @@ export const useAdvancedSetting = (props: UseAdvancedSettingProps): AdvancedSett
|
||||
}, [props.clientState.clientSetting.setting.voiceChangerMode, props.clientState.clientSetting.setVoiceChangerMode])
|
||||
|
||||
|
||||
const downSamplingModeRow = useMemo(() => {
|
||||
return (
|
||||
<div className="body-row split-3-7 left-padding-1 guided">
|
||||
<div className="body-item-title left-padding-1 ">DownSamplingMode</div>
|
||||
<div className="body-select-container">
|
||||
<select className="body-select" value={props.clientState.clientSetting.setting.downSamplingMode} onChange={(e) => {
|
||||
props.clientState.clientSetting.setDownSamplingMode(e.target.value as DownSamplingMode)
|
||||
}}>
|
||||
{
|
||||
Object.values(DownSamplingMode).map(x => {
|
||||
return <option key={x} value={x}>{x}</option>
|
||||
})
|
||||
}
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}, [props.clientState.clientSetting.setting.downSamplingMode, props.clientState.clientSetting.setDownSamplingMode])
|
||||
|
||||
|
||||
|
||||
const workletSettingRow = useMemo(() => {
|
||||
return (
|
||||
@ -265,9 +285,10 @@ export const useAdvancedSetting = (props: UseAdvancedSettingProps): AdvancedSett
|
||||
<div className="body-row divider"></div>
|
||||
{workletSettingRow}
|
||||
<div className="body-row divider"></div>
|
||||
{downSamplingModeRow}
|
||||
</>
|
||||
)
|
||||
}, [showAdvancedSetting, mmvcServerUrlRow, protocolRow, sampleRateRow, bufferSizeRow, convertChunkNumRow, minConvertSizeRow, crossFadeOverlapRateRow, crossFadeOffsetRateRow, crossFadeEndRateRow, vfForceDisableRow, voiceChangeModeRow, workletSettingRow])
|
||||
}, [showAdvancedSetting, mmvcServerUrlRow, protocolRow, sampleRateRow, bufferSizeRow, convertChunkNumRow, minConvertSizeRow, crossFadeOverlapRateRow, crossFadeOffsetRateRow, crossFadeEndRateRow, vfForceDisableRow, voiceChangeModeRow, workletSettingRow, downSamplingModeRow])
|
||||
|
||||
|
||||
const advancedSetting = useMemo(() => {
|
||||
|
@ -1,7 +1,7 @@
|
||||
import { io, Socket } from "socket.io-client";
|
||||
import { DefaultEventsMap } from "@socket.io/component-emitter";
|
||||
import { Duplex, DuplexOptions } from "readable-stream";
|
||||
import { Protocol, VoiceChangerMode, VOICE_CHANGER_CLIENT_EXCEPTION } from "./const";
|
||||
import { DownSamplingMode, Protocol, VoiceChangerMode, VOICE_CHANGER_CLIENT_EXCEPTION } from "./const";
|
||||
|
||||
export type Callbacks = {
|
||||
onVoiceReceived: (voiceChangerMode: VoiceChangerMode, data: ArrayBuffer) => void
|
||||
@ -19,6 +19,7 @@ export type AudioStreamerSettings = {
|
||||
voiceChangerMode: VoiceChangerMode;
|
||||
}
|
||||
|
||||
|
||||
export class AudioStreamer extends Duplex {
|
||||
private callbacks: Callbacks
|
||||
private audioStreamerListeners: AudioStreamerListeners
|
||||
@ -34,6 +35,11 @@ export class AudioStreamer extends Duplex {
|
||||
// performance monitor
|
||||
private bufferStart = 0;
|
||||
|
||||
// Flags
|
||||
// private downSamplingMode: DownSamplingMode = DownSamplingMode.decimate
|
||||
private downSamplingMode: DownSamplingMode = DownSamplingMode.average
|
||||
|
||||
|
||||
constructor(callbacks: Callbacks, audioStreamerListeners: AudioStreamerListeners, options?: DuplexOptions) {
|
||||
super(options);
|
||||
this.callbacks = callbacks
|
||||
@ -84,6 +90,11 @@ export class AudioStreamer extends Duplex {
|
||||
this.voiceChangerMode = val
|
||||
}
|
||||
|
||||
// set Flags
|
||||
setDownSamplingMode = (val: DownSamplingMode) => {
|
||||
this.downSamplingMode = val
|
||||
}
|
||||
|
||||
getSettings = (): AudioStreamerSettings => {
|
||||
return {
|
||||
serverUrl: this.serverUrl,
|
||||
@ -107,21 +118,63 @@ export class AudioStreamer extends Duplex {
|
||||
callback();
|
||||
}
|
||||
|
||||
_averageDownsampleBuffer(buffer: Float32Array, originalSampleRate: number, destinationSamplerate: number) {
|
||||
if (originalSampleRate == destinationSamplerate) {
|
||||
return buffer;
|
||||
}
|
||||
if (destinationSamplerate > originalSampleRate) {
|
||||
throw "downsampling rate show be smaller than original sample rate";
|
||||
}
|
||||
const sampleRateRatio = originalSampleRate / destinationSamplerate;
|
||||
const newLength = Math.round(buffer.length / sampleRateRatio);
|
||||
const result = new Float32Array(newLength);
|
||||
let offsetResult = 0;
|
||||
let offsetBuffer = 0;
|
||||
while (offsetResult < result.length) {
|
||||
var nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
|
||||
// Use average value of skipped samples
|
||||
var accum = 0, count = 0;
|
||||
for (var i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) {
|
||||
accum += buffer[i];
|
||||
count++;
|
||||
}
|
||||
result[offsetResult] = accum / count;
|
||||
// Or you can simply get rid of the skipped samples:
|
||||
// result[offsetResult] = buffer[nextOffsetBuffer];
|
||||
offsetResult++;
|
||||
offsetBuffer = nextOffsetBuffer;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
private _write_realtime = (buffer: Float32Array) => {
|
||||
let downsampledBuffer: Float32Array | null = null
|
||||
if (this.downSamplingMode == DownSamplingMode.decimate) {
|
||||
//////// (Kind 1) 間引き //////////
|
||||
// bufferSize個のデータ(48Khz)が入ってくる。
|
||||
//// 48000Hz で入ってくるので間引いて24000Hzに変換する。
|
||||
//// バイトサイズは周波数変換で(x1/2), 16bit(2byte)で(x2)
|
||||
const arrayBuffer = new ArrayBuffer((buffer.length / 2) * 2)
|
||||
const dataView = new DataView(arrayBuffer);
|
||||
|
||||
downsampledBuffer = new Float32Array(buffer.length / 2);
|
||||
for (let i = 0; i < buffer.length; i++) {
|
||||
if (i % 2 == 0) {
|
||||
let s = Math.max(-1, Math.min(1, buffer[i]));
|
||||
downsampledBuffer[i / 2] = buffer[i]
|
||||
}
|
||||
}
|
||||
} else {
|
||||
//////// (Kind 2) 平均 //////////
|
||||
downsampledBuffer = this._averageDownsampleBuffer(buffer, 48000, 24000)
|
||||
}
|
||||
|
||||
// Float to signed16
|
||||
const arrayBuffer = new ArrayBuffer(downsampledBuffer.length * 2)
|
||||
const dataView = new DataView(arrayBuffer);
|
||||
for (let i = 0; i < downsampledBuffer.length; i++) {
|
||||
let s = Math.max(-1, Math.min(1, downsampledBuffer[i]));
|
||||
s = s < 0 ? s * 0x8000 : s * 0x7FFF
|
||||
// 2分の1個目で2バイトずつ進むので((i/2)*2)
|
||||
dataView.setInt16((i / 2) * 2, s, true);
|
||||
}
|
||||
dataView.setInt16(i * 2, s, true);
|
||||
}
|
||||
|
||||
|
||||
// 256byte(最低バッファサイズ256から間引いた個数x2byte)をchunkとして管理
|
||||
const chunkByteSize = 256 // (const.ts ★1)
|
||||
for (let i = 0; i < arrayBuffer.byteLength / chunkByteSize; i++) {
|
||||
@ -129,6 +182,7 @@ export class AudioStreamer extends Duplex {
|
||||
this.requestChunks.push(ab)
|
||||
}
|
||||
|
||||
|
||||
//// リクエストバッファの中身が、リクエスト送信数と違う場合は処理終了。
|
||||
if (this.requestChunks.length < this.inputChunkNum) {
|
||||
return
|
||||
@ -198,15 +252,7 @@ export class AudioStreamer extends Duplex {
|
||||
}
|
||||
|
||||
private sendBuffer = async (newBuffer: Uint8Array) => {
|
||||
// if (this.serverUrl.length == 0) {
|
||||
// // console.warn("no server url")
|
||||
// // return
|
||||
// // throw "no server url"
|
||||
// }
|
||||
const timestamp = Date.now()
|
||||
// console.log("REQUEST_MESSAGE:", [this.gpu, this.srcId, this.dstId, timestamp, newBuffer.buffer])
|
||||
// console.log("SERVER_URL", this.serverUrl, this.protocol)
|
||||
// const convertChunkNum = this.voiceChangerMode === "realtime" ? this.requestParamas.convertChunkNum : 0
|
||||
if (this.protocol === "sio") {
|
||||
if (!this.socket) {
|
||||
console.warn(`sio is not initialized`)
|
||||
@ -214,26 +260,12 @@ export class AudioStreamer extends Duplex {
|
||||
}
|
||||
// console.log("emit!")
|
||||
this.socket.emit('request_message', [
|
||||
// this.requestParamas.gpu,
|
||||
// this.requestParamas.srcId,
|
||||
// this.requestParamas.dstId,
|
||||
timestamp,
|
||||
// convertChunkNum,
|
||||
// this.requestParamas.crossFadeLowerValue,
|
||||
// this.requestParamas.crossFadeOffsetRate,
|
||||
// this.requestParamas.crossFadeEndRate,
|
||||
newBuffer.buffer]);
|
||||
} else {
|
||||
const res = await postVoice(
|
||||
this.serverUrl + "/test",
|
||||
// this.requestParamas.gpu,
|
||||
// this.requestParamas.srcId,
|
||||
// this.requestParamas.dstId,
|
||||
timestamp,
|
||||
// convertChunkNum,
|
||||
// this.requestParamas.crossFadeLowerValue,
|
||||
// this.requestParamas.crossFadeOffsetRate,
|
||||
// this.requestParamas.crossFadeEndRate,
|
||||
newBuffer.buffer)
|
||||
|
||||
if (res.byteLength < 128 * 2) {
|
||||
@ -248,24 +280,10 @@ export class AudioStreamer extends Duplex {
|
||||
|
||||
export const postVoice = async (
|
||||
url: string,
|
||||
// gpu: number,
|
||||
// srcId: number,
|
||||
// dstId: number,
|
||||
timestamp: number,
|
||||
// convertChunkNum: number,
|
||||
// crossFadeLowerValue: number,
|
||||
// crossFadeOffsetRate: number,
|
||||
// crossFadeEndRate: number,
|
||||
buffer: ArrayBuffer) => {
|
||||
const obj = {
|
||||
// gpu,
|
||||
// srcId,
|
||||
// dstId,
|
||||
timestamp,
|
||||
// convertChunkNum,
|
||||
// crossFadeLowerValue,
|
||||
// crossFadeOffsetRate,
|
||||
// crossFadeEndRate,
|
||||
buffer: Buffer.from(buffer).toString('base64')
|
||||
};
|
||||
const body = JSON.stringify(obj);
|
||||
@ -283,7 +301,6 @@ export const postVoice = async (
|
||||
const changedVoiceBase64 = receivedJson["changedVoiceBase64"]
|
||||
const buf = Buffer.from(changedVoiceBase64, "base64")
|
||||
const ab = new ArrayBuffer(buf.length);
|
||||
// console.log("RECIV", buf.length)
|
||||
const view = new Uint8Array(ab);
|
||||
for (let i = 0; i < buf.length; ++i) {
|
||||
view[i] = buf[i];
|
||||
|
@ -3,7 +3,7 @@ import { VoiceChangerWorkletNode, VoiceChangerWorkletListener } from "./VoiceCha
|
||||
import workerjs from "raw-loader!../worklet/dist/index.js";
|
||||
import { VoiceFocusDeviceTransformer, VoiceFocusTransformDevice } from "amazon-chime-sdk-js";
|
||||
import { createDummyMediaStream, validateUrl } from "./util";
|
||||
import { BufferSize, DefaultVoiceChangerClientSetting, Protocol, ServerSettingKey, VoiceChangerMode, VOICE_CHANGER_CLIENT_EXCEPTION, WorkletSetting } from "./const";
|
||||
import { BufferSize, DefaultVoiceChangerClientSetting, DownSamplingMode, Protocol, ServerSettingKey, VoiceChangerMode, VOICE_CHANGER_CLIENT_EXCEPTION, WorkletSetting } from "./const";
|
||||
import MicrophoneStream from "microphone-stream";
|
||||
import { AudioStreamer, Callbacks, AudioStreamerListeners } from "./AudioStreamer";
|
||||
import { ServerConfigurator } from "./ServerConfigurator";
|
||||
@ -133,6 +133,9 @@ export class VoiceChangerClient {
|
||||
this.currentMediaStream = await navigator.mediaDevices.getUserMedia({
|
||||
audio: {
|
||||
deviceId: input,
|
||||
channelCount: 1,
|
||||
sampleRate: 48000,
|
||||
sampleSize: 16,
|
||||
// echoCancellation: false,
|
||||
// noiseSuppression: false
|
||||
}
|
||||
@ -228,6 +231,10 @@ export class VoiceChangerClient {
|
||||
setVoiceChangerMode = (val: VoiceChangerMode) => {
|
||||
this.audioStreamer.setVoiceChangerMode(val)
|
||||
}
|
||||
//// Audio Streamer Flag
|
||||
setDownSamplingMode = (val: DownSamplingMode) => {
|
||||
this.audioStreamer.setDownSamplingMode(val)
|
||||
}
|
||||
|
||||
// configure worklet
|
||||
configureWorklet = (setting: WorkletSetting) => {
|
||||
|
@ -4,6 +4,7 @@
|
||||
// 24000sample -> 1sec, 128sample(1chunk) -> 5.333msec
|
||||
// 187.5chunk -> 1sec
|
||||
|
||||
|
||||
// types
|
||||
export type VoiceChangerServerSetting = {
|
||||
convertChunkNum: number, // VITSに入力する変換サイズ。(入力データの2倍以上の大きさで指定。それより小さいものが指定された場合は、サーバ側で自動的に入力の2倍のサイズが設定される。)
|
||||
@ -35,6 +36,7 @@ export type VoiceChangerClientSetting = {
|
||||
correspondences: Correspondence[],
|
||||
forceVfDisable: boolean,
|
||||
voiceChangerMode: VoiceChangerMode,
|
||||
downSamplingMode: DownSamplingMode,
|
||||
|
||||
inputGain: number
|
||||
outputGain: number
|
||||
@ -92,6 +94,12 @@ export const VoiceChangerMode = {
|
||||
} as const
|
||||
export type VoiceChangerMode = typeof VoiceChangerMode[keyof typeof VoiceChangerMode]
|
||||
|
||||
export const DownSamplingMode = {
|
||||
"decimate": "decimate",
|
||||
"average": "average"
|
||||
} as const
|
||||
export type DownSamplingMode = typeof DownSamplingMode[keyof typeof DownSamplingMode]
|
||||
|
||||
export const SampleRate = {
|
||||
"48000": 48000,
|
||||
} as const
|
||||
@ -186,6 +194,7 @@ export const DefaultVoiceChangerClientSetting: VoiceChangerClientSetting = {
|
||||
correspondences: [],
|
||||
forceVfDisable: false,
|
||||
voiceChangerMode: "realtime",
|
||||
downSamplingMode: "average",
|
||||
inputGain: 1.0,
|
||||
outputGain: 1.0
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
import { useState, useMemo, useRef, useEffect } from "react"
|
||||
import { VoiceChangerClientSetting, Protocol, BufferSize, VoiceChangerMode, SampleRate, Speaker, DefaultVoiceChangerClientSetting, INDEXEDDB_KEY_CLIENT, Correspondence } from "../const"
|
||||
|
||||
import { VoiceChangerClientSetting, Protocol, BufferSize, VoiceChangerMode, SampleRate, Speaker, DefaultVoiceChangerClientSetting, INDEXEDDB_KEY_CLIENT, Correspondence, DownSamplingMode } from "../const"
|
||||
import { createDummyMediaStream } from "../util"
|
||||
import { VoiceChangerClient } from "../VoiceChangerClient"
|
||||
import { useIndexedDB } from "./useIndexedDB"
|
||||
@ -19,6 +20,7 @@ export type ClientSettingState = {
|
||||
setVfForceDisabled: (vfForceDisabled: boolean) => Promise<void>
|
||||
setInputChunkNum: (num: number) => void;
|
||||
setVoiceChangerMode: (mode: VoiceChangerMode) => void
|
||||
setDownSamplingMode: (mode: DownSamplingMode) => void
|
||||
setSampleRate: (num: SampleRate) => void
|
||||
setSpeakers: (speakers: Speaker[]) => void
|
||||
setCorrespondences: (file: File | null) => Promise<void>
|
||||
@ -165,6 +167,17 @@ export const useClientSetting = (props: UseClientSettingProps): ClientSettingSta
|
||||
}
|
||||
}, [props.voiceChangerClient])
|
||||
|
||||
const setDownSamplingMode = useMemo(() => {
|
||||
return (mode: DownSamplingMode) => {
|
||||
if (!props.voiceChangerClient) return
|
||||
props.voiceChangerClient.setDownSamplingMode(mode)
|
||||
settingRef.current.downSamplingMode = mode
|
||||
setSetting({ ...settingRef.current })
|
||||
}
|
||||
}, [props.voiceChangerClient])
|
||||
|
||||
|
||||
|
||||
const setSampleRate = useMemo(() => {
|
||||
return (num: SampleRate) => {
|
||||
if (!props.voiceChangerClient) return
|
||||
@ -261,6 +274,7 @@ export const useClientSetting = (props: UseClientSettingProps): ClientSettingSta
|
||||
setVfForceDisabled,
|
||||
setInputChunkNum,
|
||||
setVoiceChangerMode,
|
||||
setDownSamplingMode,
|
||||
setSampleRate,
|
||||
setSpeakers,
|
||||
setCorrespondences,
|
||||
|
@ -401,7 +401,7 @@ class VoiceChanger():
|
||||
|
||||
def on_request(self, unpackedData: any):
|
||||
convertSize = self.settings.convertChunkNum * 128 # 128sample/1chunk
|
||||
# self.stream_in.write(unpackedData.astype(np.int16).tobytes())
|
||||
self.stream_in.write(unpackedData.astype(np.int16).tobytes())
|
||||
# print("convsize:", unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate))
|
||||
if unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate) + 1024 > convertSize:
|
||||
convertSize = int(unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate)) + 1024
|
||||
|
Loading…
Reference in New Issue
Block a user