mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-02-09 03:37:51 +03:00
server resampling added
This commit is contained in:
parent
c6309b83f8
commit
15686caf50
4
client/demo/dist/index.js
vendored
4
client/demo/dist/index.js
vendored
File diff suppressed because one or more lines are too long
@ -1,6 +1,5 @@
|
|||||||
import { BufferSize, DownSamplingMode, Protocol, SampleRate, VoiceChangerMode } from "@dannadori/voice-changer-client-js"
|
import { BufferSize, DownSamplingMode, InputSampleRate, Protocol, SampleRate, VoiceChangerMode } from "@dannadori/voice-changer-client-js"
|
||||||
import React, { useMemo, useState } from "react"
|
import React, { useMemo } from "react"
|
||||||
import { ClientState } from "@dannadori/voice-changer-client-js";
|
|
||||||
import { useAppState } from "./001_provider/001_AppStateProvider";
|
import { useAppState } from "./001_provider/001_AppStateProvider";
|
||||||
import { AnimationTypes, HeaderButton, HeaderButtonProps } from "./components/101_HeaderButton";
|
import { AnimationTypes, HeaderButton, HeaderButtonProps } from "./components/101_HeaderButton";
|
||||||
|
|
||||||
@ -83,6 +82,27 @@ export const useAdvancedSetting = (): AdvancedSettingState => {
|
|||||||
)
|
)
|
||||||
}, [appState.clientSetting.setting.sampleRate, appState.clientSetting.setSampleRate])
|
}, [appState.clientSetting.setting.sampleRate, appState.clientSetting.setSampleRate])
|
||||||
|
|
||||||
|
const sendingSampleRateRow = useMemo(() => {
|
||||||
|
return (
|
||||||
|
<div className="body-row split-3-7 left-padding-1 guided">
|
||||||
|
<div className="body-item-title left-padding-1">Sending Sample Rate</div>
|
||||||
|
<div className="body-select-container">
|
||||||
|
<select className="body-select" value={appState.clientSetting.setting.sendingSampleRate} onChange={(e) => {
|
||||||
|
appState.clientSetting.setSendingSampleRate(Number(e.target.value) as InputSampleRate)
|
||||||
|
appState.serverSetting.setInputSampleRate(Number(e.target.value) as InputSampleRate)
|
||||||
|
|
||||||
|
}}>
|
||||||
|
{
|
||||||
|
Object.values(InputSampleRate).map(x => {
|
||||||
|
return <option key={x} value={x}>{x}</option>
|
||||||
|
})
|
||||||
|
}
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}, [appState.clientSetting.setting.sendingSampleRate, appState.clientSetting.setSendingSampleRate, appState.serverSetting.setInputSampleRate])
|
||||||
|
|
||||||
const bufferSizeRow = useMemo(() => {
|
const bufferSizeRow = useMemo(() => {
|
||||||
return (
|
return (
|
||||||
|
|
||||||
@ -263,6 +283,7 @@ export const useAdvancedSetting = (): AdvancedSettingState => {
|
|||||||
{protocolRow}
|
{protocolRow}
|
||||||
<div className="body-row divider"></div>
|
<div className="body-row divider"></div>
|
||||||
{sampleRateRow}
|
{sampleRateRow}
|
||||||
|
{sendingSampleRateRow}
|
||||||
{bufferSizeRow}
|
{bufferSizeRow}
|
||||||
<div className="body-row divider"></div>
|
<div className="body-row divider"></div>
|
||||||
|
|
||||||
@ -280,7 +301,7 @@ export const useAdvancedSetting = (): AdvancedSettingState => {
|
|||||||
|
|
||||||
</>
|
</>
|
||||||
)
|
)
|
||||||
}, [mmvcServerUrlRow, protocolRow, sampleRateRow, bufferSizeRow, convertChunkNumRow, minConvertSizeRow, crossFadeOverlapRateRow, crossFadeOffsetRateRow, crossFadeEndRateRow, voiceChangeModeRow, workletSettingRow, downSamplingModeRow])
|
}, [mmvcServerUrlRow, protocolRow, sampleRateRow, sendingSampleRateRow, bufferSizeRow, convertChunkNumRow, minConvertSizeRow, crossFadeOverlapRateRow, crossFadeOffsetRateRow, crossFadeEndRateRow, voiceChangeModeRow, workletSettingRow, downSamplingModeRow])
|
||||||
|
|
||||||
|
|
||||||
const advancedSetting = useMemo(() => {
|
const advancedSetting = useMemo(() => {
|
||||||
|
1693
client/lib/package-lock.json
generated
1693
client/lib/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@ -27,7 +27,7 @@
|
|||||||
"license": "ISC",
|
"license": "ISC",
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@types/audioworklet": "^0.0.36",
|
"@types/audioworklet": "^0.0.36",
|
||||||
"@types/node": "^18.13.0",
|
"@types/node": "^18.14.0",
|
||||||
"@types/react": "18.0.28",
|
"@types/react": "18.0.28",
|
||||||
"@types/react-dom": "18.0.11",
|
"@types/react-dom": "18.0.11",
|
||||||
"eslint": "^8.34.0",
|
"eslint": "^8.34.0",
|
||||||
@ -47,9 +47,11 @@
|
|||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@types/readable-stream": "^2.3.15",
|
"@types/readable-stream": "^2.3.15",
|
||||||
"amazon-chime-sdk-js": "^3.10.0",
|
"amazon-chime-sdk-js": "^3.11.0",
|
||||||
|
"install": "^0.13.0",
|
||||||
"localforage": "^1.10.0",
|
"localforage": "^1.10.0",
|
||||||
"microphone-stream": "^6.0.1",
|
"microphone-stream": "^6.0.1",
|
||||||
|
"path-browserify": "^1.0.1",
|
||||||
"react": "^18.2.0",
|
"react": "^18.2.0",
|
||||||
"react-dom": "^18.2.0",
|
"react-dom": "^18.2.0",
|
||||||
"readable-stream": "^4.3.0",
|
"readable-stream": "^4.3.0",
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
import { io, Socket } from "socket.io-client";
|
import { io, Socket } from "socket.io-client";
|
||||||
import { DefaultEventsMap } from "@socket.io/component-emitter";
|
import { DefaultEventsMap } from "@socket.io/component-emitter";
|
||||||
import { Duplex, DuplexOptions } from "readable-stream";
|
import { Duplex, DuplexOptions } from "readable-stream";
|
||||||
import { DownSamplingMode, Protocol, VoiceChangerMode, VOICE_CHANGER_CLIENT_EXCEPTION } from "./const";
|
import { DefaultVoiceChangerClientSetting, DownSamplingMode, Protocol, SendingSampleRate, VoiceChangerMode, VOICE_CHANGER_CLIENT_EXCEPTION } from "./const";
|
||||||
|
|
||||||
|
|
||||||
export type Callbacks = {
|
export type Callbacks = {
|
||||||
onVoiceReceived: (voiceChangerMode: VoiceChangerMode, data: ArrayBuffer) => void
|
onVoiceReceived: (voiceChangerMode: VoiceChangerMode, data: ArrayBuffer) => void
|
||||||
@ -38,7 +39,7 @@ export class AudioStreamer extends Duplex {
|
|||||||
// Flags
|
// Flags
|
||||||
// private downSamplingMode: DownSamplingMode = DownSamplingMode.decimate
|
// private downSamplingMode: DownSamplingMode = DownSamplingMode.decimate
|
||||||
private downSamplingMode: DownSamplingMode = DownSamplingMode.average
|
private downSamplingMode: DownSamplingMode = DownSamplingMode.average
|
||||||
|
private sendingSampleRate: number = DefaultVoiceChangerClientSetting.sendingSampleRate
|
||||||
|
|
||||||
constructor(callbacks: Callbacks, audioStreamerListeners: AudioStreamerListeners, options?: DuplexOptions) {
|
constructor(callbacks: Callbacks, audioStreamerListeners: AudioStreamerListeners, options?: DuplexOptions) {
|
||||||
super(options);
|
super(options);
|
||||||
@ -97,6 +98,9 @@ export class AudioStreamer extends Duplex {
|
|||||||
setDownSamplingMode = (val: DownSamplingMode) => {
|
setDownSamplingMode = (val: DownSamplingMode) => {
|
||||||
this.downSamplingMode = val
|
this.downSamplingMode = val
|
||||||
}
|
}
|
||||||
|
setSendingSampleRate = (val: SendingSampleRate) => {
|
||||||
|
this.sendingSampleRate = val
|
||||||
|
}
|
||||||
|
|
||||||
getSettings = (): AudioStreamerSettings => {
|
getSettings = (): AudioStreamerSettings => {
|
||||||
return {
|
return {
|
||||||
@ -156,9 +160,12 @@ export class AudioStreamer extends Duplex {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private _write_realtime = (buffer: Float32Array) => {
|
private _write_realtime = async (buffer: Float32Array) => {
|
||||||
|
|
||||||
let downsampledBuffer: Float32Array | null = null
|
let downsampledBuffer: Float32Array | null = null
|
||||||
if (this.downSamplingMode == DownSamplingMode.decimate) {
|
if (this.sendingSampleRate == 48000) {
|
||||||
|
downsampledBuffer = buffer
|
||||||
|
} else if (this.downSamplingMode == DownSamplingMode.decimate) {
|
||||||
//////// (Kind 1) 間引き //////////
|
//////// (Kind 1) 間引き //////////
|
||||||
// bufferSize個のデータ(48Khz)が入ってくる。
|
// bufferSize個のデータ(48Khz)が入ってくる。
|
||||||
//// 48000Hz で入ってくるので間引いて24000Hzに変換する。
|
//// 48000Hz で入ってくるので間引いて24000Hzに変換する。
|
||||||
@ -170,7 +177,8 @@ export class AudioStreamer extends Duplex {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
//////// (Kind 2) 平均 //////////
|
//////// (Kind 2) 平均 //////////
|
||||||
downsampledBuffer = this._averageDownsampleBuffer(buffer, 48000, 24000)
|
// downsampledBuffer = this._averageDownsampleBuffer(buffer, 48000, 24000)
|
||||||
|
downsampledBuffer = this._averageDownsampleBuffer(buffer, 48000, this.sendingSampleRate)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Float to signed16
|
// Float to signed16
|
||||||
@ -184,7 +192,9 @@ export class AudioStreamer extends Duplex {
|
|||||||
|
|
||||||
|
|
||||||
// 256byte(最低バッファサイズ256から間引いた個数x2byte)をchunkとして管理
|
// 256byte(最低バッファサイズ256から間引いた個数x2byte)をchunkとして管理
|
||||||
const chunkByteSize = 256 // (const.ts ★1)
|
// const chunkByteSize = 256 // (const.ts ★1)
|
||||||
|
// const chunkByteSize = 256 * 2 // (const.ts ★1)
|
||||||
|
const chunkByteSize = (256 * 2) * (this.sendingSampleRate / 48000) // (const.ts ★1)
|
||||||
for (let i = 0; i < arrayBuffer.byteLength / chunkByteSize; i++) {
|
for (let i = 0; i < arrayBuffer.byteLength / chunkByteSize; i++) {
|
||||||
const ab = arrayBuffer.slice(i * chunkByteSize, (i + 1) * chunkByteSize)
|
const ab = arrayBuffer.slice(i * chunkByteSize, (i + 1) * chunkByteSize)
|
||||||
this.requestChunks.push(ab)
|
this.requestChunks.push(ab)
|
||||||
|
@ -3,7 +3,7 @@ import { VoiceChangerWorkletNode, VoiceChangerWorkletListener } from "./VoiceCha
|
|||||||
import workerjs from "raw-loader!../worklet/dist/index.js";
|
import workerjs from "raw-loader!../worklet/dist/index.js";
|
||||||
import { VoiceFocusDeviceTransformer, VoiceFocusTransformDevice } from "amazon-chime-sdk-js";
|
import { VoiceFocusDeviceTransformer, VoiceFocusTransformDevice } from "amazon-chime-sdk-js";
|
||||||
import { createDummyMediaStream, validateUrl } from "./util";
|
import { createDummyMediaStream, validateUrl } from "./util";
|
||||||
import { BufferSize, DefaultVoiceChangerClientSetting, DownSamplingMode, Protocol, ServerSettingKey, VoiceChangerMode, VOICE_CHANGER_CLIENT_EXCEPTION, WorkletSetting } from "./const";
|
import { BufferSize, DefaultVoiceChangerClientSetting, DownSamplingMode, Protocol, SendingSampleRate, ServerSettingKey, VoiceChangerMode, VOICE_CHANGER_CLIENT_EXCEPTION, WorkletSetting } from "./const";
|
||||||
import MicrophoneStream from "microphone-stream";
|
import MicrophoneStream from "microphone-stream";
|
||||||
import { AudioStreamer, Callbacks, AudioStreamerListeners } from "./AudioStreamer";
|
import { AudioStreamer, Callbacks, AudioStreamerListeners } from "./AudioStreamer";
|
||||||
import { ServerConfigurator } from "./ServerConfigurator";
|
import { ServerConfigurator } from "./ServerConfigurator";
|
||||||
@ -255,6 +255,9 @@ export class VoiceChangerClient {
|
|||||||
setDownSamplingMode = (val: DownSamplingMode) => {
|
setDownSamplingMode = (val: DownSamplingMode) => {
|
||||||
this.audioStreamer.setDownSamplingMode(val)
|
this.audioStreamer.setDownSamplingMode(val)
|
||||||
}
|
}
|
||||||
|
setSendingSampleRate = (val: SendingSampleRate) => {
|
||||||
|
this.audioStreamer.setSendingSampleRate(val)
|
||||||
|
}
|
||||||
|
|
||||||
// configure worklet
|
// configure worklet
|
||||||
configureWorklet = (setting: WorkletSetting) => {
|
configureWorklet = (setting: WorkletSetting) => {
|
||||||
|
@ -26,6 +26,7 @@ export type VoiceChangerServerSetting = {
|
|||||||
f0Detector: string // dio or harvest
|
f0Detector: string // dio or harvest
|
||||||
recordIO: number // 0:off, 1:on
|
recordIO: number // 0:off, 1:on
|
||||||
serverMicProps: string
|
serverMicProps: string
|
||||||
|
inputSampleRate: InputSampleRate
|
||||||
}
|
}
|
||||||
|
|
||||||
export type VoiceChangerClientSetting = {
|
export type VoiceChangerClientSetting = {
|
||||||
@ -33,6 +34,7 @@ export type VoiceChangerClientSetting = {
|
|||||||
mmvcServerUrl: string,
|
mmvcServerUrl: string,
|
||||||
protocol: Protocol,
|
protocol: Protocol,
|
||||||
sampleRate: SampleRate, // 48000Hz
|
sampleRate: SampleRate, // 48000Hz
|
||||||
|
sendingSampleRate: SendingSampleRate,
|
||||||
bufferSize: BufferSize, // 256, 512, 1024, 2048, 4096, 8192, 16384 (for mic stream)
|
bufferSize: BufferSize, // 256, 512, 1024, 2048, 4096, 8192, 16384 (for mic stream)
|
||||||
inputChunkNum: number, // n of (256 x n) for send buffer
|
inputChunkNum: number, // n of (256 x n) for send buffer
|
||||||
speakers: Speaker[],
|
speakers: Speaker[],
|
||||||
@ -83,6 +85,7 @@ export type ServerInfo = {
|
|||||||
f0Detector: string
|
f0Detector: string
|
||||||
recordIO: number
|
recordIO: number
|
||||||
serverMicProps: string
|
serverMicProps: string
|
||||||
|
inputSampleRate: InputSampleRate
|
||||||
}
|
}
|
||||||
|
|
||||||
export type ServerAudioDevice = {
|
export type ServerAudioDevice = {
|
||||||
@ -123,6 +126,18 @@ export const SampleRate = {
|
|||||||
} as const
|
} as const
|
||||||
export type SampleRate = typeof SampleRate[keyof typeof SampleRate]
|
export type SampleRate = typeof SampleRate[keyof typeof SampleRate]
|
||||||
|
|
||||||
|
export const SendingSampleRate = {
|
||||||
|
"48000": 48000,
|
||||||
|
"24000": 24000
|
||||||
|
} as const
|
||||||
|
export type SendingSampleRate = typeof SendingSampleRate[keyof typeof SendingSampleRate]
|
||||||
|
|
||||||
|
export const InputSampleRate = {
|
||||||
|
"48000": 48000,
|
||||||
|
"24000": 24000
|
||||||
|
} as const
|
||||||
|
export type InputSampleRate = typeof InputSampleRate[keyof typeof InputSampleRate]
|
||||||
|
|
||||||
export const BufferSize = {
|
export const BufferSize = {
|
||||||
"256": 256,
|
"256": 256,
|
||||||
"512": 512,
|
"512": 512,
|
||||||
@ -169,6 +184,7 @@ export const ServerSettingKey = {
|
|||||||
"f0Detector": "f0Detector",
|
"f0Detector": "f0Detector",
|
||||||
"recordIO": "recordIO",
|
"recordIO": "recordIO",
|
||||||
"serverMicProps": "serverMicProps",
|
"serverMicProps": "serverMicProps",
|
||||||
|
"inputSampleRate": "inputSampleRate",
|
||||||
} as const
|
} as const
|
||||||
export type ServerSettingKey = typeof ServerSettingKey[keyof typeof ServerSettingKey]
|
export type ServerSettingKey = typeof ServerSettingKey[keyof typeof ServerSettingKey]
|
||||||
|
|
||||||
@ -188,8 +204,8 @@ export const DefaultVoiceChangerServerSetting: VoiceChangerServerSetting = {
|
|||||||
onnxExecutionProvider: "CPUExecutionProvider",
|
onnxExecutionProvider: "CPUExecutionProvider",
|
||||||
f0Detector: "dio",
|
f0Detector: "dio",
|
||||||
recordIO: 0,
|
recordIO: 0,
|
||||||
serverMicProps: ""
|
serverMicProps: "",
|
||||||
|
inputSampleRate: 48000
|
||||||
}
|
}
|
||||||
|
|
||||||
export const DefaultVoiceChangerClientSetting: VoiceChangerClientSetting = {
|
export const DefaultVoiceChangerClientSetting: VoiceChangerClientSetting = {
|
||||||
@ -197,6 +213,7 @@ export const DefaultVoiceChangerClientSetting: VoiceChangerClientSetting = {
|
|||||||
mmvcServerUrl: "",
|
mmvcServerUrl: "",
|
||||||
protocol: "sio",
|
protocol: "sio",
|
||||||
sampleRate: 48000,
|
sampleRate: 48000,
|
||||||
|
sendingSampleRate: 48000,
|
||||||
bufferSize: 1024,
|
bufferSize: 1024,
|
||||||
inputChunkNum: 48,
|
inputChunkNum: 48,
|
||||||
speakers: [
|
speakers: [
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
import { useState, useMemo, useRef, useEffect } from "react"
|
import { useState, useMemo, useRef, useEffect } from "react"
|
||||||
|
|
||||||
import { VoiceChangerClientSetting, Protocol, BufferSize, VoiceChangerMode, SampleRate, Speaker, DefaultVoiceChangerClientSetting, INDEXEDDB_KEY_CLIENT, Correspondence, DownSamplingMode } from "../const"
|
import { VoiceChangerClientSetting, Protocol, BufferSize, VoiceChangerMode, SampleRate, Speaker, DefaultVoiceChangerClientSetting, INDEXEDDB_KEY_CLIENT, Correspondence, DownSamplingMode, SendingSampleRate } from "../const"
|
||||||
import { VoiceChangerClient } from "../VoiceChangerClient"
|
import { VoiceChangerClient } from "../VoiceChangerClient"
|
||||||
import { useIndexedDB } from "./useIndexedDB"
|
import { useIndexedDB } from "./useIndexedDB"
|
||||||
|
|
||||||
@ -22,6 +22,7 @@ export type ClientSettingState = {
|
|||||||
setInputChunkNum: (num: number) => void;
|
setInputChunkNum: (num: number) => void;
|
||||||
setVoiceChangerMode: (mode: VoiceChangerMode) => void
|
setVoiceChangerMode: (mode: VoiceChangerMode) => void
|
||||||
setDownSamplingMode: (mode: DownSamplingMode) => void
|
setDownSamplingMode: (mode: DownSamplingMode) => void
|
||||||
|
setSendingSampleRate: (val: SendingSampleRate) => void
|
||||||
setSampleRate: (num: SampleRate) => void
|
setSampleRate: (num: SampleRate) => void
|
||||||
setSpeakers: (speakers: Speaker[]) => void
|
setSpeakers: (speakers: Speaker[]) => void
|
||||||
setCorrespondences: (file: File | null) => Promise<void>
|
setCorrespondences: (file: File | null) => Promise<void>
|
||||||
@ -191,6 +192,15 @@ export const useClientSetting = (props: UseClientSettingProps): ClientSettingSta
|
|||||||
}
|
}
|
||||||
}, [props.voiceChangerClient])
|
}, [props.voiceChangerClient])
|
||||||
|
|
||||||
|
const setSendingSampleRate = useMemo(() => {
|
||||||
|
return (val: SendingSampleRate) => {
|
||||||
|
if (!props.voiceChangerClient) return
|
||||||
|
props.voiceChangerClient.setSendingSampleRate(val)
|
||||||
|
settingRef.current.sendingSampleRate = val
|
||||||
|
setSetting({ ...settingRef.current })
|
||||||
|
}
|
||||||
|
}, [props.voiceChangerClient])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
const setSampleRate = useMemo(() => {
|
const setSampleRate = useMemo(() => {
|
||||||
@ -292,6 +302,7 @@ export const useClientSetting = (props: UseClientSettingProps): ClientSettingSta
|
|||||||
setInputChunkNum,
|
setInputChunkNum,
|
||||||
setVoiceChangerMode,
|
setVoiceChangerMode,
|
||||||
setDownSamplingMode,
|
setDownSamplingMode,
|
||||||
|
setSendingSampleRate,
|
||||||
setSampleRate,
|
setSampleRate,
|
||||||
setSpeakers,
|
setSpeakers,
|
||||||
setCorrespondences,
|
setCorrespondences,
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
import { useState, useMemo, useRef, useEffect } from "react"
|
import { useState, useMemo, useRef, useEffect } from "react"
|
||||||
import { VoiceChangerServerSetting, ServerInfo, Framework, OnnxExecutionProvider, DefaultVoiceChangerServerSetting, ServerSettingKey, INDEXEDDB_KEY_SERVER, INDEXEDDB_KEY_MODEL_DATA, ServerAudioDevices } from "../const"
|
import { VoiceChangerServerSetting, ServerInfo, Framework, OnnxExecutionProvider, DefaultVoiceChangerServerSetting, ServerSettingKey, INDEXEDDB_KEY_SERVER, INDEXEDDB_KEY_MODEL_DATA, ServerAudioDevices, InputSampleRate } from "../const"
|
||||||
import { VoiceChangerClient } from "../VoiceChangerClient"
|
import { VoiceChangerClient } from "../VoiceChangerClient"
|
||||||
import { useIndexedDB } from "./useIndexedDB"
|
import { useIndexedDB } from "./useIndexedDB"
|
||||||
|
|
||||||
@ -52,6 +52,7 @@ export type ServerSettingState = {
|
|||||||
setF0Detector: (val: string) => Promise<boolean>;
|
setF0Detector: (val: string) => Promise<boolean>;
|
||||||
setRecordIO: (num: number) => Promise<boolean>;
|
setRecordIO: (num: number) => Promise<boolean>;
|
||||||
setServerMicrophone: (index: number) => Promise<boolean | undefined>
|
setServerMicrophone: (index: number) => Promise<boolean | undefined>
|
||||||
|
setInputSampleRate: (num: InputSampleRate) => Promise<boolean>
|
||||||
reloadServerInfo: () => Promise<void>;
|
reloadServerInfo: () => Promise<void>;
|
||||||
setFileUploadSetting: (val: FileUploadSetting) => void
|
setFileUploadSetting: (val: FileUploadSetting) => void
|
||||||
loadModel: () => Promise<void>
|
loadModel: () => Promise<void>
|
||||||
@ -233,6 +234,12 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
|||||||
return await _set_and_store(ServerSettingKey.serverMicProps, JSON.stringify(serverMicProps))
|
return await _set_and_store(ServerSettingKey.serverMicProps, JSON.stringify(serverMicProps))
|
||||||
}
|
}
|
||||||
}, [props.voiceChangerClient])
|
}, [props.voiceChangerClient])
|
||||||
|
|
||||||
|
const setInputSampleRate = useMemo(() => {
|
||||||
|
return async (num: number) => {
|
||||||
|
return await _set_and_store(ServerSettingKey.inputSampleRate, "" + num)
|
||||||
|
}
|
||||||
|
}, [props.voiceChangerClient])
|
||||||
//////////////
|
//////////////
|
||||||
// 操作
|
// 操作
|
||||||
/////////////
|
/////////////
|
||||||
@ -413,6 +420,7 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
|||||||
setF0Detector,
|
setF0Detector,
|
||||||
setRecordIO,
|
setRecordIO,
|
||||||
setServerMicrophone,
|
setServerMicrophone,
|
||||||
|
setInputSampleRate,
|
||||||
reloadServerInfo,
|
reloadServerInfo,
|
||||||
setFileUploadSetting,
|
setFileUploadSetting,
|
||||||
loadModel,
|
loadModel,
|
||||||
|
@ -4,6 +4,8 @@ module.exports = {
|
|||||||
entry: "./src/index.ts",
|
entry: "./src/index.ts",
|
||||||
resolve: {
|
resolve: {
|
||||||
extensions: [".ts", ".js"],
|
extensions: [".ts", ".js"],
|
||||||
|
fallback: {
|
||||||
|
}
|
||||||
},
|
},
|
||||||
module: {
|
module: {
|
||||||
rules: [
|
rules: [
|
||||||
|
@ -30,7 +30,6 @@ class MMVC_Namespace(socketio.AsyncNamespace):
|
|||||||
else:
|
else:
|
||||||
unpackedData = np.array(struct.unpack('<%sh' % (len(data) // struct.calcsize('<h')), data))
|
unpackedData = np.array(struct.unpack('<%sh' % (len(data) // struct.calcsize('<h')), data))
|
||||||
audio1 = self.voiceChangerManager.changeVoice(unpackedData)
|
audio1 = self.voiceChangerManager.changeVoice(unpackedData)
|
||||||
# print("sio result:", len(audio1), audio1.shape)
|
|
||||||
bin = struct.pack('<%sh' % len(audio1), *audio1)
|
bin = struct.pack('<%sh' % len(audio1), *audio1)
|
||||||
await self.emit('response', [timestamp, bin], to=sid)
|
await self.emit('response', [timestamp, bin], to=sid)
|
||||||
|
|
||||||
|
@ -4,6 +4,7 @@ import os
|
|||||||
import traceback
|
import traceback
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from dataclasses import dataclass, asdict
|
from dataclasses import dataclass, asdict
|
||||||
|
import resampy
|
||||||
|
|
||||||
import onnxruntime
|
import onnxruntime
|
||||||
|
|
||||||
@ -98,13 +99,14 @@ class VocieChangerSettings():
|
|||||||
f0Detector: str = "dio" # dio or harvest
|
f0Detector: str = "dio" # dio or harvest
|
||||||
recordIO: int = 1 # 0:off, 1:on
|
recordIO: int = 1 # 0:off, 1:on
|
||||||
serverMicProps: str = ""
|
serverMicProps: str = ""
|
||||||
|
inputSampleRate: int = 48000 # 48000 or 24000
|
||||||
|
|
||||||
pyTorchModelFile: str = ""
|
pyTorchModelFile: str = ""
|
||||||
onnxModelFile: str = ""
|
onnxModelFile: str = ""
|
||||||
configFile: str = ""
|
configFile: str = ""
|
||||||
|
|
||||||
# ↓mutableな物だけ列挙
|
# ↓mutableな物だけ列挙
|
||||||
intData = ["gpu", "srcId", "dstId", "convertChunkNum", "minConvertSize", "recordIO"]
|
intData = ["gpu", "srcId", "dstId", "convertChunkNum", "minConvertSize", "recordIO", "inputSampleRate"]
|
||||||
floatData = ["crossFadeOffsetRate", "crossFadeEndRate", "crossFadeOverlapRate", "f0Factor"]
|
floatData = ["crossFadeOffsetRate", "crossFadeEndRate", "crossFadeOverlapRate", "f0Factor"]
|
||||||
strData = ["framework", "f0Detector", "serverMicProps"]
|
strData = ["framework", "f0Detector", "serverMicProps"]
|
||||||
|
|
||||||
@ -512,6 +514,9 @@ class VoiceChanger():
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
def on_request(self, unpackedData: any):
|
def on_request(self, unpackedData: any):
|
||||||
|
if self.settings.inputSampleRate != 24000:
|
||||||
|
print("convert sampling rate!", self.settings.inputSampleRate)
|
||||||
|
unpackedData = resampy.resample(unpackedData, 48000, 24000)
|
||||||
convertSize = self.settings.convertChunkNum * 128 # 128sample/1chunk
|
convertSize = self.settings.convertChunkNum * 128 # 128sample/1chunk
|
||||||
# print("convsize:", unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate))
|
# print("convsize:", unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate))
|
||||||
if unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate) + 1024 > convertSize:
|
if unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate) + 1024 > convertSize:
|
||||||
@ -547,123 +552,3 @@ class VoiceChanger():
|
|||||||
self.stream_in.write(unpackedData.astype(np.int16).tobytes())
|
self.stream_in.write(unpackedData.astype(np.int16).tobytes())
|
||||||
self.stream_out.write(result.tobytes())
|
self.stream_out.write(result.tobytes())
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
#########################################################################################
|
|
||||||
|
|
||||||
|
|
||||||
def overlap_merge(self, now_wav, prev_wav, overlap_length):
|
|
||||||
"""
|
|
||||||
生成したwavデータを前回生成したwavデータとoverlap_lengthだけ重ねてグラデーション的にマージします
|
|
||||||
終端のoverlap_lengthぶんは次回マージしてから再生するので削除します
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
now_wav: 今回生成した音声wavデータ
|
|
||||||
prev_wav: 前回生成した音声wavデータ
|
|
||||||
overlap_length: 重ねる長さ
|
|
||||||
"""
|
|
||||||
if overlap_length == 0:
|
|
||||||
return now_wav
|
|
||||||
gradation = np.arange(overlap_length) / overlap_length
|
|
||||||
now = np.frombuffer(now_wav, dtype='int16')
|
|
||||||
prev = np.frombuffer(prev_wav, dtype='int16')
|
|
||||||
now_head = now[:overlap_length]
|
|
||||||
prev_tail = prev[-overlap_length:]
|
|
||||||
print("merge params:", gradation.shape, now.shape, prev.shape, now_head.shape, prev_tail.shape)
|
|
||||||
merged = prev_tail * (np.cos(gradation * np.pi * 0.5) ** 2) + now_head * (np.cos((1 - gradation) * np.pi * 0.5) ** 2)
|
|
||||||
# merged = prev_tail * (1 - gradation) + now_head * gradation
|
|
||||||
overlapped = np.append(merged, now[overlap_length:-overlap_length])
|
|
||||||
signal = np.round(overlapped, decimals=0)
|
|
||||||
signal = signal.astype(np.int16)
|
|
||||||
# signal = signal.astype(np.int16).tobytes()
|
|
||||||
return signal
|
|
||||||
|
|
||||||
def on_request_(self, unpackedData: any):
|
|
||||||
|
|
||||||
self._generate_strength(unpackedData)
|
|
||||||
|
|
||||||
convertSize = 8192
|
|
||||||
unpackedData = unpackedData.astype(np.int16)
|
|
||||||
if hasattr(self, 'stored_raw_input') == False:
|
|
||||||
self.stored_raw_input = unpackedData
|
|
||||||
else:
|
|
||||||
self.stored_raw_input = np.concatenate([self.stored_raw_input, unpackedData])
|
|
||||||
|
|
||||||
self.stored_raw_input = self.stored_raw_input[-1 * (convertSize):]
|
|
||||||
processing_input = self.stored_raw_input
|
|
||||||
|
|
||||||
print("signal_shape1", unpackedData.shape, processing_input.shape, processing_input.dtype)
|
|
||||||
processing_input = processing_input / self.hps.data.max_wav_value
|
|
||||||
print("type:", processing_input.dtype)
|
|
||||||
_f0, _time = pw.dio(processing_input, self.hps.data.sampling_rate, frame_period=5.5)
|
|
||||||
f0 = pw.stonemask(processing_input, _f0, _time, self.hps.data.sampling_rate)
|
|
||||||
f0 = convert_continuos_f0(f0, int(processing_input.shape[0] / self.hps.data.hop_length))
|
|
||||||
f0 = torch.from_numpy(f0.astype(np.float32))
|
|
||||||
|
|
||||||
print("signal_shape2", f0.shape)
|
|
||||||
|
|
||||||
processing_input = torch.from_numpy(processing_input.astype(np.float32)).clone()
|
|
||||||
with torch.no_grad():
|
|
||||||
trans_length = processing_input.size()[0]
|
|
||||||
# spec, sid = get_audio_text_speaker_pair(signal.view(1, trans_length), Hyperparameters.SOURCE_ID)
|
|
||||||
processing_input_v = processing_input.view(1, trans_length) # unsqueezeと同じ
|
|
||||||
|
|
||||||
print("processing_input_v shape:", processing_input_v.shape)
|
|
||||||
spec = spectrogram_torch(processing_input_v, self.hps.data.filter_length,
|
|
||||||
self.hps.data.sampling_rate, self.hps.data.hop_length, self.hps.data.win_length,
|
|
||||||
center=False)
|
|
||||||
spec = torch.squeeze(spec, 0)
|
|
||||||
sid = torch.LongTensor([int(self.settings.srcId)])
|
|
||||||
dispose_stft_specs = 2
|
|
||||||
spec = spec[:, dispose_stft_specs:-dispose_stft_specs]
|
|
||||||
f0 = f0[dispose_stft_specs:-dispose_stft_specs]
|
|
||||||
print("spec shape:", spec.shape)
|
|
||||||
data = TextAudioSpeakerCollate(
|
|
||||||
sample_rate=self.hps.data.sampling_rate,
|
|
||||||
hop_size=self.hps.data.hop_length,
|
|
||||||
f0_factor=self.settings.f0Factor
|
|
||||||
)([(spec, sid, f0)])
|
|
||||||
|
|
||||||
if self.settings.gpu >= 0 or self.gpu_num > 0:
|
|
||||||
# spec, spec_lengths, sid_src, sin, d = [x.cuda(Hyperparameters.GPU_ID) for x in data]
|
|
||||||
spec, spec_lengths, sid_src, sin, d = data
|
|
||||||
spec = spec.cuda(self.settings.gpu)
|
|
||||||
spec_lengths = spec_lengths.cuda(self.settings.gpu)
|
|
||||||
sid_src = sid_src.cuda(self.settings.gpu)
|
|
||||||
sin = sin.cuda(self.settings.gpu)
|
|
||||||
d = tuple([d[:1].cuda(self.settings.gpu) for d in d])
|
|
||||||
sid_target = torch.LongTensor([self.settings.dstId]).cuda(self.settings.gpu)
|
|
||||||
audio = self.net_g.cuda(self.settings.gpu).voice_conversion(spec, spec_lengths,
|
|
||||||
sin, d, sid_src, sid_target)[0, 0].data.cpu().float().numpy()
|
|
||||||
else:
|
|
||||||
spec, spec_lengths, sid_src, sin, d = data
|
|
||||||
sid_target = torch.LongTensor([self.settings.dstId])
|
|
||||||
audio = self.net_g.voice_conversion(spec, spec_lengths, sin, d, sid_src, sid_target)[0, 0].data.cpu().float().numpy()
|
|
||||||
|
|
||||||
dispose_conv1d_length = 1280
|
|
||||||
audio = audio[dispose_conv1d_length:-dispose_conv1d_length]
|
|
||||||
audio = audio * self.hps.data.max_wav_value
|
|
||||||
audio = audio.astype(np.int16)
|
|
||||||
print("fin audio shape:", audio.shape)
|
|
||||||
audio = audio.tobytes()
|
|
||||||
|
|
||||||
if hasattr(self, "prev_audio"):
|
|
||||||
try:
|
|
||||||
audio1 = self.overlap_merge(audio, self.prev_audio, 1024)
|
|
||||||
except:
|
|
||||||
audio1 = np.zeros(1).astype(np.int16)
|
|
||||||
pass
|
|
||||||
# return np.zeros(1).astype(np.int16)
|
|
||||||
else:
|
|
||||||
audio1 = np.zeros(1).astype(np.int16)
|
|
||||||
|
|
||||||
self.prev_audio = audio
|
|
||||||
self.out.write(audio)
|
|
||||||
self.stream_in.write(unpackedData.tobytes())
|
|
||||||
# print(audio1)
|
|
||||||
|
|
||||||
return audio1
|
|
||||||
|
|
||||||
def __del__(self):
|
|
||||||
print("DESTRUCTOR")
|
|
||||||
|
Loading…
Reference in New Issue
Block a user