MMVCv15: max chunksize for onnx
This commit is contained in:
w-okada 2023-07-09 05:19:30 +09:00
parent 514ae768b7
commit 728e7bcca1
10 changed files with 651 additions and 4678 deletions

View File

@ -1,10 +1 @@
<!DOCTYPE html> <!doctype html><html style="width:100%;height:100%;overflow:hidden"><head><meta charset="utf-8"/><title>Voice Changer Client Demo</title><script defer="defer" src="index.js"></script></head><body style="width:100%;height:100%;margin:0"><div id="app" style="width:100%;height:100%"></div></body></html>
<html style="width: 100%; height: 100%; overflow: hidden">
<head>
<meta charset="utf-8" />
<title>Voice Changer Client Demo</title>
<script defer src="index.js"></script></head>
<body style="width: 100%; height: 100%; margin: 0px">
<div id="app" style="width: 100%; height: 100%"></div>
</body>
</html>

File diff suppressed because one or more lines are too long

31
client/demo/dist/index.js.LICENSE.txt vendored Normal file
View File

@ -0,0 +1,31 @@
/*! regenerator-runtime -- Copyright (c) 2014-present, Facebook, Inc. -- license (MIT): https://github.com/facebook/regenerator/blob/main/LICENSE */
/**
* @license React
* react-dom.production.min.js
*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
/**
* @license React
* react.production.min.js
*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
/**
* @license React
* scheduler.production.min.js
*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

File diff suppressed because it is too large Load Diff

View File

@ -21,16 +21,16 @@
"author": "wataru.okada@flect.co.jp", "author": "wataru.okada@flect.co.jp",
"license": "ISC", "license": "ISC",
"devDependencies": { "devDependencies": {
"@babel/core": "^7.22.6", "@babel/core": "^7.22.8",
"@babel/plugin-transform-runtime": "^7.22.6", "@babel/plugin-transform-runtime": "^7.22.7",
"@babel/preset-env": "^7.22.6", "@babel/preset-env": "^7.22.7",
"@babel/preset-react": "^7.22.5", "@babel/preset-react": "^7.22.5",
"@babel/preset-typescript": "^7.22.5", "@babel/preset-typescript": "^7.22.5",
"@types/node": "^20.3.3", "@types/node": "^20.4.1",
"@types/react": "^18.2.14", "@types/react": "^18.2.14",
"@types/react-dom": "^18.2.6", "@types/react-dom": "^18.2.6",
"autoprefixer": "^10.4.14", "autoprefixer": "^10.4.14",
"babel-loader": "^9.1.2", "babel-loader": "^9.1.3",
"copy-webpack-plugin": "^11.0.0", "copy-webpack-plugin": "^11.0.0",
"css-loader": "^6.8.1", "css-loader": "^6.8.1",
"eslint": "^8.44.0", "eslint": "^8.44.0",
@ -43,7 +43,7 @@
"npm-run-all": "^4.1.5", "npm-run-all": "^4.1.5",
"postcss-loader": "^7.3.3", "postcss-loader": "^7.3.3",
"postcss-nested": "^6.0.1", "postcss-nested": "^6.0.1",
"prettier": "^2.8.8", "prettier": "^3.0.0",
"rimraf": "^5.0.1", "rimraf": "^5.0.1",
"style-loader": "^3.3.3", "style-loader": "^3.3.3",
"ts-loader": "^9.4.4", "ts-loader": "^9.4.4",
@ -54,7 +54,7 @@
"webpack-dev-server": "^4.15.1" "webpack-dev-server": "^4.15.1"
}, },
"dependencies": { "dependencies": {
"@dannadori/voice-changer-client-js": "^1.0.159", "@dannadori/voice-changer-client-js": "^1.0.160",
"@fortawesome/fontawesome-svg-core": "^6.4.0", "@fortawesome/fontawesome-svg-core": "^6.4.0",
"@fortawesome/free-brands-svg-icons": "^6.4.0", "@fortawesome/free-brands-svg-icons": "^6.4.0",
"@fortawesome/free-regular-svg-icons": "^6.4.0", "@fortawesome/free-regular-svg-icons": "^6.4.0",

View File

@ -17,6 +17,11 @@ export const ConvertArea = (props: ConvertProps) => {
} else { } else {
nums = props.inputChunkNums; nums = props.inputChunkNums;
} }
if (serverSetting.serverSetting.maxInputLength) {
nums = nums.filter((x) => {
return x < serverSetting.serverSetting.maxInputLength / 128;
});
}
const gpusEntry = [...serverSetting.serverSetting.gpus]; const gpusEntry = [...serverSetting.serverSetting.gpus];
gpusEntry.push({ gpusEntry.push({

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
{ {
"name": "@dannadori/voice-changer-client-js", "name": "@dannadori/voice-changer-client-js",
"version": "1.0.159", "version": "1.0.160",
"description": "", "description": "",
"main": "dist/index.js", "main": "dist/index.js",
"directories": { "directories": {
@ -27,16 +27,16 @@
"license": "ISC", "license": "ISC",
"devDependencies": { "devDependencies": {
"@types/audioworklet": "^0.0.48", "@types/audioworklet": "^0.0.48",
"@types/node": "^20.3.3", "@types/node": "^20.4.1",
"@types/react": "18.2.14", "@types/react": "18.2.14",
"@types/react-dom": "18.2.6", "@types/react-dom": "18.2.6",
"eslint": "^8.44.0", "eslint": "^8.44.0",
"eslint-config-prettier": "^8.8.0", "eslint-config-prettier": "^8.8.0",
"eslint-plugin-prettier": "^4.2.1", "eslint-plugin-prettier": "^4.2.1",
"eslint-plugin-react": "^7.32.2", "eslint-plugin-react": "^7.25.3",
"eslint-webpack-plugin": "^4.0.1", "eslint-webpack-plugin": "^4.0.1",
"npm-run-all": "^4.1.5", "npm-run-all": "^4.1.2",
"prettier": "^2.8.8", "prettier": "^3.0.0",
"raw-loader": "^4.0.2", "raw-loader": "^4.0.2",
"rimraf": "^5.0.1", "rimraf": "^5.0.1",
"ts-loader": "^9.4.4", "ts-loader": "^9.4.4",
@ -47,7 +47,7 @@
}, },
"dependencies": { "dependencies": {
"@types/readable-stream": "^2.3.15", "@types/readable-stream": "^2.3.15",
"amazon-chime-sdk-js": "^3.15.0", "amazon-chime-sdk-js": "^2.7.0",
"buffer": "^6.0.3", "buffer": "^6.0.3",
"localforage": "^1.10.0", "localforage": "^1.10.0",
"react": "^18.2.0", "react": "^18.2.0",

View File

@ -280,6 +280,7 @@ export type ServerInfo = VoiceChangerServerSetting & {
name: string, name: string,
memory: number, memory: number,
}[] }[]
maxInputLength: number // MMVCv15
} }
@ -365,7 +366,9 @@ export const DefaultServerSetting: ServerInfo = {
status: "ok", status: "ok",
modelSlots: [], modelSlots: [],
serverAudioInputDevices: [], serverAudioInputDevices: [],
serverAudioOutputDevices: [] serverAudioOutputDevices: [],
maxInputLength: 128 * 2048
} }
/////////////////////// ///////////////////////

View File

@ -47,6 +47,8 @@ class MMVCv15Settings:
f0Factor: float = 1.0 f0Factor: float = 1.0
f0Detector: str = "dio" # dio or harvest f0Detector: str = "dio" # dio or harvest
maxInputLength: int = 1024
# ↓mutableな物だけ列挙 # ↓mutableな物だけ列挙
intData = ["gpu", "srcId", "dstId"] intData = ["gpu", "srcId", "dstId"]
floatData = ["f0Factor"] floatData = ["f0Factor"]
@ -88,6 +90,7 @@ class MMVCv15:
requires_grad_text_enc=self.hps.requires_grad.text_enc, requires_grad_text_enc=self.hps.requires_grad.text_enc,
requires_grad_dec=self.hps.requires_grad.dec, requires_grad_dec=self.hps.requires_grad.dec,
) )
self.settings.maxInputLength = 128 * 2048 # Torchの時は無制限。とりあえずでかい値で初期化
if self.slotInfo.isONNX: if self.slotInfo.isONNX:
self.onxx_input_length = 8192 self.onxx_input_length = 8192
@ -102,6 +105,7 @@ class MMVCv15:
# print("ONNX INPUT SHAPE", i.name, i.shape) # print("ONNX INPUT SHAPE", i.name, i.shape)
if i.name == "sin": if i.name == "sin":
self.onxx_input_length = i.shape[2] self.onxx_input_length = i.shape[2]
self.settings.maxInputLength = self.onxx_input_length - (0.012 * self.hps.data.sampling_rate) - 1024 # onnxの場合は入力長固(crossfadeの1024は仮) # NOQA
else: else:
self.net_g.eval() self.net_g.eval()
load_checkpoint(self.slotInfo.modelFile, self.net_g, None) load_checkpoint(self.slotInfo.modelFile, self.net_g, None)
@ -144,6 +148,7 @@ class MMVCv15:
for i in inputs_info: for i in inputs_info:
if i.name == "sin": if i.name == "sin":
self.onxx_input_length = i.shape[2] self.onxx_input_length = i.shape[2]
self.settings.maxInputLength = self.onxx_input_length - (0.012 * self.hps.data.sampling_rate) - 1024 # onnxの場合は入力長固(crossfadeの1024は仮) # NOQA
elif key in self.settings.floatData: elif key in self.settings.floatData:
setattr(self.settings, key, float(val)) setattr(self.settings, key, float(val))
elif key in self.settings.strData: elif key in self.settings.strData:
@ -202,6 +207,9 @@ class MMVCv15:
crossfadeSize: int, crossfadeSize: int,
solaSearchFrame: int = 0, solaSearchFrame: int = 0,
): ):
# maxInputLength を更新(ここでやると非効率だが、とりあえず。)
self.settings.maxInputLength = self.onxx_input_length - crossfadeSize - solaSearchFrame # onnxの場合は入力長固(crossfadeの1024は仮) # NOQA
newData = newData.astype(np.float32) / self.hps.data.max_wav_value newData = newData.astype(np.float32) / self.hps.data.max_wav_value
if self.audio_buffer is not None: if self.audio_buffer is not None: