add server mode gain

This commit is contained in:
wataru 2023-05-26 23:53:27 +09:00
parent 39cd234268
commit 6224ec326e
9 changed files with 896 additions and 3481 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -19,13 +19,13 @@
"author": "wataru.okada@flect.co.jp",
"license": "ISC",
"devDependencies": {
"@babel/core": "^7.21.8",
"@babel/plugin-transform-runtime": "^7.21.4",
"@babel/preset-env": "^7.21.5",
"@babel/preset-react": "^7.18.6",
"@babel/core": "^7.22.0",
"@babel/plugin-transform-runtime": "^7.22.0",
"@babel/preset-env": "^7.22.0",
"@babel/preset-react": "^7.22.0",
"@babel/preset-typescript": "^7.21.5",
"@types/node": "^20.2.3",
"@types/react": "^18.2.6",
"@types/node": "^20.2.4",
"@types/react": "^18.2.7",
"@types/react-dom": "^18.2.4",
"autoprefixer": "^10.4.14",
"babel-loader": "^9.1.2",
@ -39,7 +39,7 @@
"html-loader": "^4.2.0",
"html-webpack-plugin": "^5.5.1",
"npm-run-all": "^4.1.5",
"postcss-loader": "^7.3.0",
"postcss-loader": "^7.3.1",
"postcss-nested": "^6.0.1",
"prettier": "^2.8.8",
"rimraf": "^5.0.1",
@ -47,12 +47,12 @@
"ts-loader": "^9.4.3",
"tsconfig-paths": "^4.2.0",
"typescript": "^5.0.4",
"webpack": "^5.83.1",
"webpack": "^5.84.1",
"webpack-cli": "^5.1.1",
"webpack-dev-server": "^4.15.0"
},
"dependencies": {
"@dannadori/voice-changer-client-js": "^1.0.132",
"@dannadori/voice-changer-client-js": "^1.0.135",
"@fortawesome/fontawesome-svg-core": "^6.4.0",
"@fortawesome/free-brands-svg-icons": "^6.4.0",
"@fortawesome/free-regular-svg-icons": "^6.4.0",

View File

@ -9,6 +9,10 @@ export const GainControlRow = (_props: GainControlRowProps) => {
const gainControlRow = useMemo(() => {
if (appState.serverSetting.serverSetting.enableServerAudio == 1) {
return <></>
}
return (
<div className="body-row split-3-2-2-3 left-padding-1 guided">
<div className="body-item-title left-padding-1 ">Gain Control</div>
@ -33,8 +37,48 @@ export const GainControlRow = (_props: GainControlRowProps) => {
}, [
appState.clientSetting.clientSetting.inputGain,
appState.clientSetting.clientSetting.outputGain,
appState.clientSetting.updateClientSetting
appState.clientSetting.updateClientSetting,
appState.serverSetting.serverSetting.enableServerAudio
])
return gainControlRow
const serverGainControlRow = useMemo(() => {
if (appState.serverSetting.serverSetting.enableServerAudio == 0) {
return <></>
}
return (
<div className="body-row split-3-2-2-3 left-padding-1 guided">
<div className="body-item-title left-padding-1 ">Server Gain Control</div>
<div>
<span className="body-item-input-slider-label">in</span>
<input type="range" className="body-item-input-slider" min="0.1" max="10.0" step="0.1" value={appState.serverSetting.serverSetting.serverInputAudioGain} onChange={(e) => {
appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, serverInputAudioGain: Number(e.target.value) })
}}></input>
<span className="body-item-input-slider-val">{appState.serverSetting.serverSetting.serverInputAudioGain}</span>
</div>
<div>
<span className="body-item-input-slider-label">out</span>
<input type="range" className="body-item-input-slider" min="0.1" max="10.0" step="0.1" value={appState.serverSetting.serverSetting.serverOutputAudioGain} onChange={(e) => {
appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, serverOutputAudioGain: Number(e.target.value) })
}}></input>
<span className="body-item-input-slider-val">{appState.serverSetting.serverSetting.serverOutputAudioGain}</span>
</div>
<div className="body-button-container">
</div>
</div>
)
}, [
appState.serverSetting.serverSetting.serverInputAudioGain,
appState.serverSetting.serverSetting.serverOutputAudioGain,
appState.serverSetting.updateServerSettings,
appState.serverSetting.serverSetting.enableServerAudio
])
return (
<>
{gainControlRow}
{serverGainControlRow}
</>
)
}

View File

@ -1,12 +1,12 @@
{
"name": "@dannadori/voice-changer-client-js",
"version": "1.0.134",
"version": "1.0.135",
"lockfileVersion": 2,
"requires": true,
"packages": {
"": {
"name": "@dannadori/voice-changer-client-js",
"version": "1.0.134",
"version": "1.0.135",
"license": "ISC",
"dependencies": {
"@types/readable-stream": "^2.3.15",

View File

@ -1,6 +1,6 @@
{
"name": "@dannadori/voice-changer-client-js",
"version": "1.0.134",
"version": "1.0.135",
"description": "",
"main": "dist/index.js",
"directories": {

View File

@ -107,6 +107,8 @@ export const ServerSettingKey = {
"serverInputDeviceId": "serverInputDeviceId",
"serverOutputDeviceId": "serverOutputDeviceId",
"serverReadChunkSize": "serverReadChunkSize",
"serverInputAudioGain": "serverInputAudioGain",
"serverOutputAudioGain": "serverOutputAudioGain",
"tran": "tran",
"noiseScale": "noiseScale",
@ -162,6 +164,8 @@ export type VoiceChangerServerSetting = {
serverInputDeviceId: number
serverOutputDeviceId: number
serverReadChunkSize: number
serverInputAudioGain: number
serverOutputAudioGain: number
tran: number // so-vits-svc
@ -268,6 +272,8 @@ export const DefaultServerSetting: ServerInfo = {
serverInputDeviceId: -1,
serverOutputDeviceId: -1,
serverReadChunkSize: 256,
serverInputAudioGain: 1.0,
serverOutputAudioGain: 1.0,
// VC Specific
srcId: 0,

View File

@ -60,11 +60,13 @@ class VoiceChangerSettings:
# serverOutputAudioSampleRate: int = 48000
serverInputAudioSampleRate: int = 44100
serverOutputAudioSampleRate: int = 44100
serverInputAudioBufferSize: int = 1024 * 24
serverOutputAudioBufferSize: int = 1024 * 24
# serverInputAudioBufferSize: int = 1024 * 24
# serverOutputAudioBufferSize: int = 1024 * 24
serverInputDeviceId: int = -1
serverOutputDeviceId: int = -1
serverReadChunkSize: int = 256
serverInputAudioGain: float = 1.0
serverOutputAudioGain: float = 1.0
performance: list[int] = field(default_factory=lambda: [0, 0, 0, 0])
# ↓mutableな物だけ列挙
@ -77,15 +79,20 @@ class VoiceChangerSettings:
"serverAudioStated",
"serverInputAudioSampleRate",
"serverOutputAudioSampleRate",
"serverInputAudioBufferSize",
"serverOutputAudioBufferSize",
# "serverInputAudioBufferSize",
# "serverOutputAudioBufferSize",
"serverInputDeviceId",
"serverOutputDeviceId",
"serverReadChunkSize",
]
)
floatData: list[str] = field(
default_factory=lambda: ["crossFadeOffsetRate", "crossFadeEndRate"]
default_factory=lambda: [
"crossFadeOffsetRate",
"crossFadeEndRate",
"serverInputAudioGain",
"serverOutputAudioGain",
]
)
strData: list[str] = field(default_factory=lambda: [])
@ -105,6 +112,7 @@ class VoiceChanger:
self, indata: np.ndarray, outdata: np.ndarray, frames, times, status
):
try:
indata = indata * self.settings.serverInputAudioGain
with Timer("all_inference_time") as t:
unpackedData = librosa.to_mono(indata.T) * 32768.0
out_wav, times = self.on_request(unpackedData)
@ -113,6 +121,7 @@ class VoiceChanger:
np.repeat(out_wav, outputChunnels).reshape(-1, outputChunnels)
/ 32768.0
)
outdata[:] = outdata * self.settings.serverOutputAudioGain
all_inference_time = t.secs
performance = [all_inference_time] + times
if self.emitTo is not None: