add server mode gain

This commit is contained in:
wataru 2023-05-26 23:53:27 +09:00
parent 39cd234268
commit 6224ec326e
9 changed files with 896 additions and 3481 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -19,13 +19,13 @@
"author": "wataru.okada@flect.co.jp", "author": "wataru.okada@flect.co.jp",
"license": "ISC", "license": "ISC",
"devDependencies": { "devDependencies": {
"@babel/core": "^7.21.8", "@babel/core": "^7.22.0",
"@babel/plugin-transform-runtime": "^7.21.4", "@babel/plugin-transform-runtime": "^7.22.0",
"@babel/preset-env": "^7.21.5", "@babel/preset-env": "^7.22.0",
"@babel/preset-react": "^7.18.6", "@babel/preset-react": "^7.22.0",
"@babel/preset-typescript": "^7.21.5", "@babel/preset-typescript": "^7.21.5",
"@types/node": "^20.2.3", "@types/node": "^20.2.4",
"@types/react": "^18.2.6", "@types/react": "^18.2.7",
"@types/react-dom": "^18.2.4", "@types/react-dom": "^18.2.4",
"autoprefixer": "^10.4.14", "autoprefixer": "^10.4.14",
"babel-loader": "^9.1.2", "babel-loader": "^9.1.2",
@ -39,7 +39,7 @@
"html-loader": "^4.2.0", "html-loader": "^4.2.0",
"html-webpack-plugin": "^5.5.1", "html-webpack-plugin": "^5.5.1",
"npm-run-all": "^4.1.5", "npm-run-all": "^4.1.5",
"postcss-loader": "^7.3.0", "postcss-loader": "^7.3.1",
"postcss-nested": "^6.0.1", "postcss-nested": "^6.0.1",
"prettier": "^2.8.8", "prettier": "^2.8.8",
"rimraf": "^5.0.1", "rimraf": "^5.0.1",
@ -47,12 +47,12 @@
"ts-loader": "^9.4.3", "ts-loader": "^9.4.3",
"tsconfig-paths": "^4.2.0", "tsconfig-paths": "^4.2.0",
"typescript": "^5.0.4", "typescript": "^5.0.4",
"webpack": "^5.83.1", "webpack": "^5.84.1",
"webpack-cli": "^5.1.1", "webpack-cli": "^5.1.1",
"webpack-dev-server": "^4.15.0" "webpack-dev-server": "^4.15.0"
}, },
"dependencies": { "dependencies": {
"@dannadori/voice-changer-client-js": "^1.0.132", "@dannadori/voice-changer-client-js": "^1.0.135",
"@fortawesome/fontawesome-svg-core": "^6.4.0", "@fortawesome/fontawesome-svg-core": "^6.4.0",
"@fortawesome/free-brands-svg-icons": "^6.4.0", "@fortawesome/free-brands-svg-icons": "^6.4.0",
"@fortawesome/free-regular-svg-icons": "^6.4.0", "@fortawesome/free-regular-svg-icons": "^6.4.0",

View File

@ -9,6 +9,10 @@ export const GainControlRow = (_props: GainControlRowProps) => {
const gainControlRow = useMemo(() => { const gainControlRow = useMemo(() => {
if (appState.serverSetting.serverSetting.enableServerAudio == 1) {
return <></>
}
return ( return (
<div className="body-row split-3-2-2-3 left-padding-1 guided"> <div className="body-row split-3-2-2-3 left-padding-1 guided">
<div className="body-item-title left-padding-1 ">Gain Control</div> <div className="body-item-title left-padding-1 ">Gain Control</div>
@ -33,8 +37,48 @@ export const GainControlRow = (_props: GainControlRowProps) => {
}, [ }, [
appState.clientSetting.clientSetting.inputGain, appState.clientSetting.clientSetting.inputGain,
appState.clientSetting.clientSetting.outputGain, appState.clientSetting.clientSetting.outputGain,
appState.clientSetting.updateClientSetting appState.clientSetting.updateClientSetting,
appState.serverSetting.serverSetting.enableServerAudio
]) ])
return gainControlRow
const serverGainControlRow = useMemo(() => {
if (appState.serverSetting.serverSetting.enableServerAudio == 0) {
return <></>
}
return (
<div className="body-row split-3-2-2-3 left-padding-1 guided">
<div className="body-item-title left-padding-1 ">Server Gain Control</div>
<div>
<span className="body-item-input-slider-label">in</span>
<input type="range" className="body-item-input-slider" min="0.1" max="10.0" step="0.1" value={appState.serverSetting.serverSetting.serverInputAudioGain} onChange={(e) => {
appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, serverInputAudioGain: Number(e.target.value) })
}}></input>
<span className="body-item-input-slider-val">{appState.serverSetting.serverSetting.serverInputAudioGain}</span>
</div>
<div>
<span className="body-item-input-slider-label">out</span>
<input type="range" className="body-item-input-slider" min="0.1" max="10.0" step="0.1" value={appState.serverSetting.serverSetting.serverOutputAudioGain} onChange={(e) => {
appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, serverOutputAudioGain: Number(e.target.value) })
}}></input>
<span className="body-item-input-slider-val">{appState.serverSetting.serverSetting.serverOutputAudioGain}</span>
</div>
<div className="body-button-container">
</div>
</div>
)
}, [
appState.serverSetting.serverSetting.serverInputAudioGain,
appState.serverSetting.serverSetting.serverOutputAudioGain,
appState.serverSetting.updateServerSettings,
appState.serverSetting.serverSetting.enableServerAudio
])
return (
<>
{gainControlRow}
{serverGainControlRow}
</>
)
} }

View File

@ -1,12 +1,12 @@
{ {
"name": "@dannadori/voice-changer-client-js", "name": "@dannadori/voice-changer-client-js",
"version": "1.0.134", "version": "1.0.135",
"lockfileVersion": 2, "lockfileVersion": 2,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "@dannadori/voice-changer-client-js", "name": "@dannadori/voice-changer-client-js",
"version": "1.0.134", "version": "1.0.135",
"license": "ISC", "license": "ISC",
"dependencies": { "dependencies": {
"@types/readable-stream": "^2.3.15", "@types/readable-stream": "^2.3.15",

View File

@ -1,6 +1,6 @@
{ {
"name": "@dannadori/voice-changer-client-js", "name": "@dannadori/voice-changer-client-js",
"version": "1.0.134", "version": "1.0.135",
"description": "", "description": "",
"main": "dist/index.js", "main": "dist/index.js",
"directories": { "directories": {

View File

@ -107,6 +107,8 @@ export const ServerSettingKey = {
"serverInputDeviceId": "serverInputDeviceId", "serverInputDeviceId": "serverInputDeviceId",
"serverOutputDeviceId": "serverOutputDeviceId", "serverOutputDeviceId": "serverOutputDeviceId",
"serverReadChunkSize": "serverReadChunkSize", "serverReadChunkSize": "serverReadChunkSize",
"serverInputAudioGain": "serverInputAudioGain",
"serverOutputAudioGain": "serverOutputAudioGain",
"tran": "tran", "tran": "tran",
"noiseScale": "noiseScale", "noiseScale": "noiseScale",
@ -162,6 +164,8 @@ export type VoiceChangerServerSetting = {
serverInputDeviceId: number serverInputDeviceId: number
serverOutputDeviceId: number serverOutputDeviceId: number
serverReadChunkSize: number serverReadChunkSize: number
serverInputAudioGain: number
serverOutputAudioGain: number
tran: number // so-vits-svc tran: number // so-vits-svc
@ -268,6 +272,8 @@ export const DefaultServerSetting: ServerInfo = {
serverInputDeviceId: -1, serverInputDeviceId: -1,
serverOutputDeviceId: -1, serverOutputDeviceId: -1,
serverReadChunkSize: 256, serverReadChunkSize: 256,
serverInputAudioGain: 1.0,
serverOutputAudioGain: 1.0,
// VC Specific // VC Specific
srcId: 0, srcId: 0,

View File

@ -60,11 +60,13 @@ class VoiceChangerSettings:
# serverOutputAudioSampleRate: int = 48000 # serverOutputAudioSampleRate: int = 48000
serverInputAudioSampleRate: int = 44100 serverInputAudioSampleRate: int = 44100
serverOutputAudioSampleRate: int = 44100 serverOutputAudioSampleRate: int = 44100
serverInputAudioBufferSize: int = 1024 * 24 # serverInputAudioBufferSize: int = 1024 * 24
serverOutputAudioBufferSize: int = 1024 * 24 # serverOutputAudioBufferSize: int = 1024 * 24
serverInputDeviceId: int = -1 serverInputDeviceId: int = -1
serverOutputDeviceId: int = -1 serverOutputDeviceId: int = -1
serverReadChunkSize: int = 256 serverReadChunkSize: int = 256
serverInputAudioGain: float = 1.0
serverOutputAudioGain: float = 1.0
performance: list[int] = field(default_factory=lambda: [0, 0, 0, 0]) performance: list[int] = field(default_factory=lambda: [0, 0, 0, 0])
# ↓mutableな物だけ列挙 # ↓mutableな物だけ列挙
@ -77,15 +79,20 @@ class VoiceChangerSettings:
"serverAudioStated", "serverAudioStated",
"serverInputAudioSampleRate", "serverInputAudioSampleRate",
"serverOutputAudioSampleRate", "serverOutputAudioSampleRate",
"serverInputAudioBufferSize", # "serverInputAudioBufferSize",
"serverOutputAudioBufferSize", # "serverOutputAudioBufferSize",
"serverInputDeviceId", "serverInputDeviceId",
"serverOutputDeviceId", "serverOutputDeviceId",
"serverReadChunkSize", "serverReadChunkSize",
] ]
) )
floatData: list[str] = field( floatData: list[str] = field(
default_factory=lambda: ["crossFadeOffsetRate", "crossFadeEndRate"] default_factory=lambda: [
"crossFadeOffsetRate",
"crossFadeEndRate",
"serverInputAudioGain",
"serverOutputAudioGain",
]
) )
strData: list[str] = field(default_factory=lambda: []) strData: list[str] = field(default_factory=lambda: [])
@ -105,6 +112,7 @@ class VoiceChanger:
self, indata: np.ndarray, outdata: np.ndarray, frames, times, status self, indata: np.ndarray, outdata: np.ndarray, frames, times, status
): ):
try: try:
indata = indata * self.settings.serverInputAudioGain
with Timer("all_inference_time") as t: with Timer("all_inference_time") as t:
unpackedData = librosa.to_mono(indata.T) * 32768.0 unpackedData = librosa.to_mono(indata.T) * 32768.0
out_wav, times = self.on_request(unpackedData) out_wav, times = self.on_request(unpackedData)
@ -113,6 +121,7 @@ class VoiceChanger:
np.repeat(out_wav, outputChunnels).reshape(-1, outputChunnels) np.repeat(out_wav, outputChunnels).reshape(-1, outputChunnels)
/ 32768.0 / 32768.0
) )
outdata[:] = outdata * self.settings.serverOutputAudioGain
all_inference_time = t.secs all_inference_time = t.secs
performance = [all_inference_time] + times performance = [all_inference_time] + times
if self.emitTo is not None: if self.emitTo is not None: