WIP:Internal version

This commit is contained in:
w-okada 2023-11-04 01:41:51 +09:00
parent 4f534a2c44
commit e62a140698
10 changed files with 3332 additions and 30 deletions

View File

@ -1,10 +1 @@
<!DOCTYPE html> <!doctype html><html style="width:100%;height:100%;overflow:hidden"><head><meta charset="utf-8"/><title>Voice Changer Client Demo</title><script defer="defer" src="index.js"></script></head><body style="width:100%;height:100%;margin:0"><div id="app" style="width:100%;height:100%"></div></body></html>
<html style="width: 100%; height: 100%; overflow: hidden">
<head>
<meta charset="utf-8" />
<title>Voice Changer Client Demo</title>
<script defer src="index.js"></script></head>
<body style="width: 100%; height: 100%; margin: 0px">
<div id="app" style="width: 100%; height: 100%"></div>
</body>
</html>

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@ -5,6 +5,8 @@ import { useVCClient } from "../001_globalHooks/001_useVCClient";
import { useAppRoot } from "./001_AppRootProvider"; import { useAppRoot } from "./001_AppRootProvider";
import { useMessageBuilder } from "../hooks/useMessageBuilder"; import { useMessageBuilder } from "../hooks/useMessageBuilder";
import { VoiceChangerJSClient } from "./VoiceChangerJSClient";
type Props = { type Props = {
children: ReactNode; children: ReactNode;
}; };
@ -27,6 +29,7 @@ export const AppStateProvider = ({ children }: Props) => {
const appRoot = useAppRoot(); const appRoot = useAppRoot();
const clientState = useVCClient({ audioContext: appRoot.audioContextState.audioContext }); const clientState = useVCClient({ audioContext: appRoot.audioContextState.audioContext });
const messageBuilderState = useMessageBuilder(); const messageBuilderState = useMessageBuilder();
const voiceChangerJSClient = useRef<VoiceChangerJSClient>();
useEffect(() => { useEffect(() => {
messageBuilderState.setMessage(__filename, "ioError", { messageBuilderState.setMessage(__filename, "ioError", {
@ -53,15 +56,29 @@ export const AppStateProvider = ({ children }: Props) => {
} }
}, [clientState.clientState.ioErrorCount]); }, [clientState.clientState.ioErrorCount]);
useEffect(() => { // useEffect(() => {
if (clientState.clientState.initialized) { // if (clientState.clientState.initialized) {
clientState.clientState.setInternalAudioProcessCallback({ // voiceChangerJSClient.current = new VoiceChangerJSClient();
processAudio: (data: Uint8Array) => { // voiceChangerJSClient.current.initialize();
return data; // clientState.clientState.setInternalAudioProcessCallback({
}, // processAudio: async (data: Uint8Array) => {
}); // console.log("[CLIENTJS] start --------------------------------------");
} // const audioF32 = new Float32Array(data.buffer);
}, [clientState.clientState.initialized]); // const converted = await voiceChangerJSClient.current!.convert(audioF32);
// let audio_int16_out = new Int16Array(converted.length);
// for (let i = 0; i < converted.length; i++) {
// audio_int16_out[i] = converted[i] * 32768.0;
// }
// const res = new Uint8Array(audio_int16_out.buffer);
// console.log("AUDIO::::audio_int16_out", audio_int16_out);
// console.log("[CLIENTJS] end --------------------------------------");
// return res;
// },
// });
// }
// }, [clientState.clientState.initialized]);
const providerValue: AppStateValue = { const providerValue: AppStateValue = {
audioContext: appRoot.audioContextState.audioContext!, audioContext: appRoot.audioContextState.audioContext!,

View File

@ -0,0 +1,149 @@
import { create, ConverterType } from "@alexanderolsen/libsamplerate-js";
import { BlockingQueue } from "./_BlockingQueue";
import { WorkerManager, generateConfig, VoiceChangerProcessorInitializeParams, VoiceChangerProcessorConvertParams, FunctionType, VoiceChangerProcessorResult } from "@dannadori/voice-changer-js";
export class VoiceChangerJSClient {
private wm = new WorkerManager();
private audioBuffer: Float32Array = new Float32Array(0);
private audioInputLength = 24000;
private inputSamplingRate = 48000;
private outputSamplingRate = 48000;
private modelInputSamplingRate = 16000;
private modelOutputSamplingRate = 40000;
private sem = new BlockingQueue<number>();
private crossfadeChunks = 1;
private solaChunks = 0.5;
constructor() {
this.sem.enqueue(0);
}
private lock = async () => {
const num = await this.sem.dequeue();
return num;
};
private unlock = (num: number) => {
this.sem.enqueue(num + 1);
};
initialize = async () => {
console.log("Voice Changer Initializing,,,");
const baseUrl = "http://127.0.0.1:18888";
this.wm = new WorkerManager();
const config = generateConfig();
config.processorURL = `${baseUrl}/process.js`;
config.onnxWasmPaths = `${baseUrl}/`;
await this.wm.init(config);
const initializeParams: VoiceChangerProcessorInitializeParams = {
type: FunctionType.initialize,
inputLength: 24000,
f0_min: 50,
f0_max: 1100,
embPitchUrl: "http://127.0.0.1:18888/models/emb_pit_24000.bin",
rvcv2InputLength: 148,
// rvcv2Url: "http://127.0.0.1:18888/models/rvc2v_24000.bin",
rvcv2Url: "http://127.0.0.1:18888/models/rvc2vnof0_24000.bin",
transfer: [],
};
const res = (await this.wm.execute(initializeParams)) as VoiceChangerProcessorResult;
console.log("Voice Changer Initialized..", res);
};
convert = async (audio: Float32Array): Promise<Float32Array> => {
console.log("convert start....", audio);
const lockNum = await this.lock();
//resample
const audio_16k = await this.resample(audio, this.inputSamplingRate, this.modelInputSamplingRate);
//store data and get target data
//// store
const newAudioBuffer = new Float32Array(this.audioBuffer.length + audio_16k.length);
newAudioBuffer.set(this.audioBuffer);
newAudioBuffer.set(audio_16k, this.audioBuffer.length);
this.audioBuffer = newAudioBuffer;
//// Buffering.....
if (this.audioBuffer.length < this.audioInputLength * 1) {
console.log(`skip covert length:${this.audioBuffer.length}, audio_16k:${audio_16k.length}`);
await this.unlock(lockNum);
return new Float32Array(1);
} else {
console.log(`--------------- convert start... length:${this.audioBuffer.length}, audio_16k:${audio_16k.length}`);
}
//// get chunks
let chunkIndex = 0;
const audioChunks: Float32Array[] = [];
while (true) {
const chunkOffset = chunkIndex * this.audioInputLength - (this.crossfadeChunks + this.solaChunks) * 320 * chunkIndex;
const chunkEnd = chunkOffset + this.audioInputLength;
if (chunkEnd > this.audioBuffer.length) {
this.audioBuffer = this.audioBuffer.slice(chunkOffset);
break;
} else {
const chunk = this.audioBuffer.slice(chunkOffset, chunkEnd);
audioChunks.push(chunk);
}
chunkIndex++;
}
if (audioChunks.length == 0) {
await this.unlock(lockNum);
console.log(`skip covert length:${this.audioBuffer.length}, audio_16k:${audio_16k.length}`);
return new Float32Array(1);
}
//convert (each)
const convetedAudioChunks: Float32Array[] = [];
for (let i = 0; i < audioChunks.length; i++) {
const convertParams: VoiceChangerProcessorConvertParams = {
type: FunctionType.convert,
transfer: [audioChunks[i].buffer],
};
const res = (await this.wm.execute(convertParams)) as VoiceChangerProcessorResult;
const converted = new Float32Array(res.transfer[0] as ArrayBuffer);
console.log(`converted.length:::${i}:${converted.length}`);
convetedAudioChunks.push(converted);
}
//concat
let totalLength = convetedAudioChunks.reduce((prev, cur) => prev + cur.length, 0);
let convetedAudio = new Float32Array(totalLength);
let offset = 0;
for (let chunk of convetedAudioChunks) {
convetedAudio.set(chunk, offset);
offset += chunk.length;
}
console.log(`converted.length:::convetedAudio:${convetedAudio.length}`);
//resample
// const response = await this.resample(convetedAudio, this.params.modelOutputSamplingRate, this.params.outputSamplingRate);
const outputDuration = (this.audioInputLength * audioChunks.length - this.crossfadeChunks * 320) / 16000;
const outputSamples = outputDuration * this.outputSamplingRate;
const convertedOutputRatio = outputSamples / convetedAudio.length;
const realOutputSamplingRate = this.modelOutputSamplingRate * convertedOutputRatio;
console.log(`realOutputSamplingRate:${realOutputSamplingRate}, `, this.modelOutputSamplingRate, convertedOutputRatio);
// const response2 = await this.resample(convetedAudio, this.params.modelOutputSamplingRate, realOutputSamplingRate);
const response2 = await this.resample(convetedAudio, this.modelOutputSamplingRate, this.outputSamplingRate);
console.log(`converted from :${audioChunks.length * this.audioInputLength} to:${convetedAudio.length} to:${response2.length}`);
console.log(`outputDuration :${outputDuration} outputSamples:${outputSamples}, convertedOutputRatio:${convertedOutputRatio}, realOutputSamplingRate:${realOutputSamplingRate}`);
await this.unlock(lockNum);
return response2;
};
// Utility
resample = async (data: Float32Array, srcSampleRate: number, dstSampleRate: number) => {
const converterType = ConverterType.SRC_SINC_BEST_QUALITY;
const nChannels = 1;
const converter = await create(nChannels, srcSampleRate, dstSampleRate, {
converterType: converterType, // default SRC_SINC_FASTEST. see API for more
});
const res = converter.simple(data);
return res;
};
}

View File

@ -0,0 +1,41 @@
export class BlockingQueue<T> {
private _promises: Promise<T>[];
private _resolvers: ((t: T) => void)[];
constructor() {
this._resolvers = [];
this._promises = [];
}
private _add() {
this._promises.push(
new Promise((resolve) => {
this._resolvers.push(resolve);
})
);
}
enqueue(t: T) {
if (this._resolvers.length == 0) this._add();
const resolve = this._resolvers.shift()!;
resolve(t);
}
dequeue() {
if (this._promises.length == 0) this._add();
const promise = this._promises.shift()!;
return promise;
}
isEmpty() {
return this._promises.length == 0;
}
isBlocked() {
return this._resolvers.length != 0;
}
get length() {
return this._promises.length - this._resolvers.length;
}
}

View File

@ -64,8 +64,11 @@ module.exports = {
new CopyPlugin({ new CopyPlugin({
patterns: [{ from: "public/models/emb_pit_24000.bin", to: "models/emb_pit_24000.bin" }], patterns: [{ from: "public/models/emb_pit_24000.bin", to: "models/emb_pit_24000.bin" }],
}), }),
new CopyPlugin({ // new CopyPlugin({
patterns: [{ from: "public/models/rvc2v_24000.bin", to: "models/rvc2v_24000.bin" }], // patterns: [{ from: "public/models/rvc2v_24000.bin", to: "models/rvc2v_24000.bin" }],
}), // }),
// new CopyPlugin({
// patterns: [{ from: "public/models/rvc2vnof0_24000.bin", to: "models/rvc2vnof0_24000.bin" }],
// }),
], ],
}; };

View File

@ -296,7 +296,7 @@ export class VoiceChangerWorkletNode extends AudioWorkletNode {
return; return;
} }
const res = await this.internalCallback.processAudio(newBuffer); const res = await this.internalCallback.processAudio(newBuffer);
if (res.length == 0) { if (res.length < 128 * 2) {
return; return;
} }
if (this.outputNode != null) { if (this.outputNode != null) {

View File

@ -493,7 +493,8 @@ export type ClientSetting = {
}; };
export const DefaultClientSettng: ClientSetting = { export const DefaultClientSettng: ClientSetting = {
workletSetting: { workletSetting: {
numTrancateTreshold: 512 * 2, // numTrancateTreshold: 512 * 2,
numTrancateTreshold: 100,
volTrancateThreshold: 0.0005, volTrancateThreshold: 0.0005,
volTrancateLength: 32, volTrancateLength: 32,
}, },

View File

@ -34,7 +34,7 @@ class VoiceChangerWorkletProcessor extends AudioWorkletProcessor {
private BLOCK_SIZE = 128; private BLOCK_SIZE = 128;
private initialized = false; private initialized = false;
private volume = 0; private volume = 0;
private numTrancateTreshold = 100; // private numTrancateTreshold = 100;
// private volTrancateThreshold = 0.0005 // private volTrancateThreshold = 0.0005
// private volTrancateLength = 32 // private volTrancateLength = 32
// private volTrancateCount = 0 // private volTrancateCount = 0
@ -69,7 +69,7 @@ class VoiceChangerWorkletProcessor extends AudioWorkletProcessor {
handleMessage(event: any) { handleMessage(event: any) {
const request = event.data as VoiceChangerWorkletProcessorRequest; const request = event.data as VoiceChangerWorkletProcessorRequest;
if (request.requestType === "config") { if (request.requestType === "config") {
this.numTrancateTreshold = request.numTrancateTreshold; // this.numTrancateTreshold = request.numTrancateTreshold;
// this.volTrancateLength = request.volTrancateLength // this.volTrancateLength = request.volTrancateLength
// this.volTrancateThreshold = request.volTrancateThreshold // this.volTrancateThreshold = request.volTrancateThreshold
console.log("[worklet] worklet configured", request); console.log("[worklet] worklet configured", request);
@ -101,12 +101,16 @@ class VoiceChangerWorkletProcessor extends AudioWorkletProcessor {
return; return;
} }
if (this.playBuffer.length > this.numTrancateTreshold) { const f32Data = request.voice;
console.log(`[worklet] Truncate ${this.playBuffer.length} > ${this.numTrancateTreshold}`); // if (this.playBuffer.length > this.numTrancateTreshold) {
// console.log(`[worklet] Truncate ${this.playBuffer.length} > ${this.numTrancateTreshold}`);
// this.trancateBuffer();
// }
if (this.playBuffer.length > f32Data.length / this.BLOCK_SIZE) {
console.log(`[worklet] Truncate ${this.playBuffer.length} > ${f32Data.length / this.BLOCK_SIZE}`);
this.trancateBuffer(); this.trancateBuffer();
} }
const f32Data = request.voice;
const concatedF32Data = new Float32Array(this.unpushedF32Data.length + f32Data.length); const concatedF32Data = new Float32Array(this.unpushedF32Data.length + f32Data.length);
concatedF32Data.set(this.unpushedF32Data); concatedF32Data.set(this.unpushedF32Data);
concatedF32Data.set(f32Data, this.unpushedF32Data.length); concatedF32Data.set(f32Data, this.unpushedF32Data.length);