This commit is contained in:
w-okada 2024-02-28 23:08:49 +09:00
parent 39e0d0cfd6
commit bc6e8a9c08
35 changed files with 3520 additions and 1896 deletions

View File

@ -21,7 +21,7 @@
{
"name": "configArea",
"options": {
"detectors": ["dio", "harvest", "crepe", "crepe_full", "crepe_tiny", "rmvpe", "rmvpe_onnx", "fcpe" ],
"detectors": ["dio", "harvest", "crepe", "crepe_full", "crepe_tiny", "rmvpe", "rmvpe_onnx", "fcpe"],
"inputChunkNums": [1, 2, 4, 6, 8, 16, 24, 32, 40, 48, 64, 80, 96, 112, 128, 192, 256, 320, 384, 448, 512, 576, 640, 704, 768, 832, 896, 960, 1024, 2048, 4096, 8192, 16384]
}
}

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@ -28,41 +28,41 @@
"author": "wataru.okada@flect.co.jp",
"license": "ISC",
"devDependencies": {
"@babel/core": "^7.23.5",
"@babel/plugin-transform-runtime": "^7.23.4",
"@babel/preset-env": "^7.23.5",
"@babel/core": "^7.24.0",
"@babel/plugin-transform-runtime": "^7.24.0",
"@babel/preset-env": "^7.24.0",
"@babel/preset-react": "^7.23.3",
"@babel/preset-typescript": "^7.23.3",
"@types/node": "^20.10.2",
"@types/react": "^18.2.40",
"@types/react-dom": "^18.2.17",
"autoprefixer": "^10.4.16",
"@types/node": "^20.11.21",
"@types/react": "^18.2.60",
"@types/react-dom": "^18.2.19",
"autoprefixer": "^10.4.17",
"babel-loader": "^9.1.3",
"copy-webpack-plugin": "^11.0.0",
"css-loader": "^6.8.1",
"eslint": "^8.55.0",
"copy-webpack-plugin": "^12.0.2",
"css-loader": "^6.10.0",
"eslint": "^8.57.0",
"eslint-config-prettier": "^9.1.0",
"eslint-plugin-prettier": "^5.0.1",
"eslint-plugin-prettier": "^5.1.3",
"eslint-plugin-react": "^7.33.2",
"eslint-webpack-plugin": "^4.0.1",
"html-loader": "^4.2.0",
"html-webpack-plugin": "^5.5.3",
"html-loader": "^5.0.0",
"html-webpack-plugin": "^5.6.0",
"npm-run-all": "^4.1.5",
"postcss-loader": "^7.3.3",
"postcss-loader": "^8.1.1",
"postcss-nested": "^6.0.1",
"prettier": "^3.1.0",
"prettier": "^3.2.5",
"rimraf": "^5.0.5",
"style-loader": "^3.3.3",
"style-loader": "^3.3.4",
"ts-loader": "^9.5.1",
"tsconfig-paths": "^4.2.0",
"typescript": "^5.3.2",
"webpack": "^5.89.0",
"typescript": "^5.3.3",
"webpack": "^5.90.3",
"webpack-cli": "^5.1.4",
"webpack-dev-server": "^4.15.1"
"webpack-dev-server": "^5.0.2"
},
"dependencies": {
"@alexanderolsen/libsamplerate-js": "^2.1.0",
"@dannadori/voice-changer-client-js": "^1.0.180",
"@alexanderolsen/libsamplerate-js": "^2.1.1",
"@dannadori/voice-changer-client-js": "^1.0.182",
"@dannadori/voice-changer-js": "^1.0.2",
"@dannadori/worker-manager": "^1.0.20",
"@fortawesome/fontawesome-svg-core": "^6.5.1",
@ -70,9 +70,9 @@
"@fortawesome/free-regular-svg-icons": "^6.5.1",
"@fortawesome/free-solid-svg-icons": "^6.5.1",
"@fortawesome/react-fontawesome": "^0.2.0",
"@tensorflow/tfjs": "^4.14.0",
"onnxruntime-web": "^1.16.3",
"protobufjs": "^7.2.5",
"@tensorflow/tfjs": "^4.17.0",
"onnxruntime-web": "^1.17.1",
"protobufjs": "^7.2.6",
"react": "^18.2.0",
"react-dom": "^18.2.0"
}

View File

@ -125,6 +125,11 @@ export const FileUploaderScreen = (props: FileUploaderScreenProps) => {
return x.kind == "llvcConfig";
});
return enough;
} else if (setting.voiceChangerType == "EasyVC") {
const enough = !!setting.files.find((x) => {
return x.kind == "easyVCModel";
});
return enough;
}
return false;
};
@ -189,6 +194,8 @@ export const FileUploaderScreen = (props: FileUploaderScreenProps) => {
} else if (vcType == "LLVC") {
rows.push(generateFileRow(uploadSetting!, "Model", "llvcModel", ["pth"]));
rows.push(generateFileRow(uploadSetting!, "Config", "llvcConfig", ["json"]));
} else if (vcType == "EasyVC") {
rows.push(generateFileRow(uploadSetting!, "Model", "easyVCModel", ["onnx"]));
}
return rows;
};

View File

@ -1,44 +1,44 @@
{
"name": "@dannadori/voice-changer-client-js",
"version": "1.0.180",
"version": "1.0.182",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@dannadori/voice-changer-client-js",
"version": "1.0.180",
"version": "1.0.182",
"license": "ISC",
"dependencies": {
"@types/readable-stream": "^4.0.9",
"amazon-chime-sdk-js": "^3.18.2",
"@types/readable-stream": "^4.0.10",
"amazon-chime-sdk-js": "^3.20.0",
"buffer": "^6.0.3",
"localforage": "^1.10.0",
"protobufjs": "^7.2.5",
"protobufjs": "^7.2.6",
"react": "^18.2.0",
"react-dom": "^18.2.0",
"socket.io-client": "^4.7.2"
"socket.io-client": "^4.7.4"
},
"devDependencies": {
"@types/audioworklet": "^0.0.52",
"@types/jest": "^29.5.10",
"@types/node": "^20.10.2",
"@types/react": "18.2.40",
"@types/react-dom": "18.2.17",
"eslint": "^8.55.0",
"@types/audioworklet": "^0.0.54",
"@types/jest": "^29.5.12",
"@types/node": "^20.11.21",
"@types/react": "18.2.60",
"@types/react-dom": "18.2.19",
"eslint": "^8.57.0",
"eslint-config-prettier": "^9.1.0",
"eslint-plugin-prettier": "^5.0.1",
"eslint-plugin-prettier": "^5.1.3",
"eslint-plugin-react": "^7.33.2",
"eslint-webpack-plugin": "^4.0.1",
"jest": "^29.7.0",
"npm-run-all": "^4.1.5",
"prettier": "^3.1.0",
"prettier": "^3.2.5",
"raw-loader": "^4.0.2",
"rimraf": "^5.0.5",
"ts-loader": "^9.5.1",
"typescript": "^5.3.2",
"webpack": "^5.89.0",
"typescript": "^5.3.3",
"webpack": "^5.90.3",
"webpack-cli": "^5.1.4",
"webpack-dev-server": "^4.15.1"
"webpack-dev-server": "^5.0.2"
}
},
"node_modules/@aashutoshrathi/word-wrap": {
@ -1585,22 +1585,22 @@
}
},
"node_modules/@eslint/js": {
"version": "8.55.0",
"resolved": "https://registry.npmjs.org/@eslint/js/-/js-8.55.0.tgz",
"integrity": "sha512-qQfo2mxH5yVom1kacMtZZJFVdW+E70mqHMJvVg6WTLo+VBuQJ4TojZlfWBjK0ve5BdEeNAVxOsl/nvNMpJOaJA==",
"version": "8.57.0",
"resolved": "https://registry.npmjs.org/@eslint/js/-/js-8.57.0.tgz",
"integrity": "sha512-Ys+3g2TaW7gADOJzPt83SJtCDhMjndcDMFVQ/Tj9iA1BfJzFKD9mAUXT3OenpuPHbI6P/myECxRJrofUsDx/5g==",
"dev": true,
"engines": {
"node": "^12.22.0 || ^14.17.0 || >=16.0.0"
}
},
"node_modules/@humanwhocodes/config-array": {
"version": "0.11.13",
"resolved": "https://registry.npmjs.org/@humanwhocodes/config-array/-/config-array-0.11.13.tgz",
"integrity": "sha512-JSBDMiDKSzQVngfRjOdFXgFfklaXI4K9nLF49Auh21lmBWRLIK3+xTErTWD4KU54pb6coM6ESE7Awz/FNU3zgQ==",
"version": "0.11.14",
"resolved": "https://registry.npmjs.org/@humanwhocodes/config-array/-/config-array-0.11.14.tgz",
"integrity": "sha512-3T8LkOmg45BV5FICb15QQMsyUSWrQ8AygVfC7ZG32zOalnqrilm018ZVCw0eapXux8FtA33q8PSRSstjee3jSg==",
"dev": true,
"dependencies": {
"@humanwhocodes/object-schema": "^2.0.1",
"debug": "^4.1.1",
"@humanwhocodes/object-schema": "^2.0.2",
"debug": "^4.3.1",
"minimatch": "^3.0.5"
},
"engines": {
@ -1621,9 +1621,9 @@
}
},
"node_modules/@humanwhocodes/object-schema": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/@humanwhocodes/object-schema/-/object-schema-2.0.1.tgz",
"integrity": "sha512-dvuCeX5fC9dXgJn9t+X5atfmgQAzUOWqS1254Gh0m6i8wKd10ebXkfNKiRK+1GWi/yTvvLDHpoxLr0xxxeslWw==",
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/@humanwhocodes/object-schema/-/object-schema-2.0.2.tgz",
"integrity": "sha512-6EwiSjwWYP7pTckG6I5eyFANjPhmPjUX9JRLUSfNPC7FX7zK9gyZAfUEaECL6ALTpGX5AjnBq3C9XmVWPitNpw==",
"dev": true
},
"node_modules/@isaacs/cliui": {
@ -3051,9 +3051,9 @@
"integrity": "sha512-+9jVqKhRSpsc591z5vX+X5Yyw+he/HCB4iQ/RYxw35CEPaY1gnsNE43nf9n9AaYjAQrTiI/mOwKUKdUs9vf7Xg=="
},
"node_modules/@types/audioworklet": {
"version": "0.0.52",
"resolved": "https://registry.npmjs.org/@types/audioworklet/-/audioworklet-0.0.52.tgz",
"integrity": "sha512-+C0QA8HJS07NjSdLFUDsSfUGJiLs+FPa6K7Tu/e76dqHEnuTOjAjDyiFOnZTuf9j4x9P8Nmv0OOfcMNYnGzbAQ==",
"version": "0.0.54",
"resolved": "https://registry.npmjs.org/@types/audioworklet/-/audioworklet-0.0.54.tgz",
"integrity": "sha512-WR1XcwT2LhCaUiKDDgHdTjrVjoBZnTz6FhszeIKgY9i2UYfIRKtnNvqToUDnbCPXBpVuu4Qo5+mMJt+wDphRew==",
"dev": true
},
"node_modules/@types/babel__core": {
@ -3234,9 +3234,9 @@
}
},
"node_modules/@types/jest": {
"version": "29.5.10",
"resolved": "https://registry.npmjs.org/@types/jest/-/jest-29.5.10.tgz",
"integrity": "sha512-tE4yxKEphEyxj9s4inideLHktW/x6DwesIwWZ9NN1FKf9zbJYsnhBoA9vrHA/IuIOKwPa5PcFBNV4lpMIOEzyQ==",
"version": "29.5.12",
"resolved": "https://registry.npmjs.org/@types/jest/-/jest-29.5.12.tgz",
"integrity": "sha512-eDC8bTvT/QhYdxJAulQikueigY5AsdBRH2yDKW3yveW7svY3+DzN84/2NUgkw10RTiJbWqZrTtoGVdYlvFJdLw==",
"dev": true,
"dependencies": {
"expect": "^29.0.0",
@ -3256,9 +3256,9 @@
"dev": true
},
"node_modules/@types/node": {
"version": "20.10.2",
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.10.2.tgz",
"integrity": "sha512-37MXfxkb0vuIlRKHNxwCkb60PNBpR94u4efQuN4JgIAm66zfCDXGSAFCef9XUWFovX2R1ok6Z7MHhtdVXXkkIw==",
"version": "20.11.21",
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.11.21.tgz",
"integrity": "sha512-/ySDLGscFPNasfqStUuWWPfL78jompfIoVzLJPVVAHBh6rpG68+pI2Gk+fNLeI8/f1yPYL4s46EleVIc20F1Ow==",
"dependencies": {
"undici-types": "~5.26.4"
}
@ -3291,9 +3291,9 @@
"dev": true
},
"node_modules/@types/react": {
"version": "18.2.40",
"resolved": "https://registry.npmjs.org/@types/react/-/react-18.2.40.tgz",
"integrity": "sha512-H+BUhb9C1zBtogDLAk+KCNRKiHDrqSwQT/0z0PVTwMFBxqg3011ByLomADtgkgMkfwj4AMOiXBReyLTUBg681g==",
"version": "18.2.60",
"resolved": "https://registry.npmjs.org/@types/react/-/react-18.2.60.tgz",
"integrity": "sha512-dfiPj9+k20jJrLGOu9Nf6eqxm2EyJRrq2NvwOFsfbb7sFExZ9WELPs67UImHj3Ayxg8ruTtKtNnbjaF8olPq0A==",
"dev": true,
"dependencies": {
"@types/prop-types": "*",
@ -3302,27 +3302,27 @@
}
},
"node_modules/@types/react-dom": {
"version": "18.2.17",
"resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-18.2.17.tgz",
"integrity": "sha512-rvrT/M7Df5eykWFxn6MYt5Pem/Dbyc1N8Y0S9Mrkw2WFCRiqUgw9P7ul2NpwsXCSM1DVdENzdG9J5SreqfAIWg==",
"version": "18.2.19",
"resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-18.2.19.tgz",
"integrity": "sha512-aZvQL6uUbIJpjZk4U8JZGbau9KDeAwMfmhyWorxgBkqDIEf6ROjRozcmPIicqsUwPUjbkDfHKgGee1Lq65APcA==",
"dev": true,
"dependencies": {
"@types/react": "*"
}
},
"node_modules/@types/readable-stream": {
"version": "4.0.9",
"resolved": "https://registry.npmjs.org/@types/readable-stream/-/readable-stream-4.0.9.tgz",
"integrity": "sha512-4cwuvrmNF96M4Nrx0Eep37RwPB1Mth+nCSezsGRv5+PsFyRvDdLd0pil6gVLcWD/bh69INNdwZ98dJwfHpLohA==",
"version": "4.0.10",
"resolved": "https://registry.npmjs.org/@types/readable-stream/-/readable-stream-4.0.10.tgz",
"integrity": "sha512-AbUKBjcC8SHmImNi4yK2bbjogQlkFSg7shZCcicxPQapniOlajG8GCc39lvXzCWX4lLRRs7DM3VAeSlqmEVZUA==",
"dependencies": {
"@types/node": "*",
"safe-buffer": "~5.1.1"
}
},
"node_modules/@types/retry": {
"version": "0.12.0",
"resolved": "https://registry.npmjs.org/@types/retry/-/retry-0.12.0.tgz",
"integrity": "sha512-wWKOClTTiizcZhXnPY4wikVAwmdYHp8q6DmC+EJUzAMsycb7HB32Kh9RN4+0gExjmPmZSAQjgURXIGATPegAvA==",
"version": "0.12.2",
"resolved": "https://registry.npmjs.org/@types/retry/-/retry-0.12.2.tgz",
"integrity": "sha512-XISRgDJ2Tc5q4TRqvgJtzsRkFYNJzZrhTdtMoGVBttwzzQJkPnS3WWTFc7kuDRoPtPakl+T+OfdEUjYJj7Jbow==",
"dev": true
},
"node_modules/@types/scheduler": {
@ -3721,9 +3721,9 @@
}
},
"node_modules/amazon-chime-sdk-js": {
"version": "3.18.2",
"resolved": "https://registry.npmjs.org/amazon-chime-sdk-js/-/amazon-chime-sdk-js-3.18.2.tgz",
"integrity": "sha512-w0O/X8NG+i7y6hS+iQOH0Yn1szkfFDyJDrFTtuZ81Ygd6832Ht1MLmNFf5HuaQzhqVve48W/fAtfcRYOcavIeg==",
"version": "3.20.0",
"resolved": "https://registry.npmjs.org/amazon-chime-sdk-js/-/amazon-chime-sdk-js-3.20.0.tgz",
"integrity": "sha512-VfWKwFQEp78pO2kRvCHsQ9Df/cwk0F6OVzFHlJ8pqtKCw8NIeFJ0zM3Yv1vyfU6N35nXg41APld/sTfvLsotCA==",
"dependencies": {
"@aws-crypto/sha256-js": "^2.0.1",
"@aws-sdk/client-chime-sdk-messaging": "^3.341.0",
@ -3736,8 +3736,8 @@
"ua-parser-js": "^1.0.1"
},
"engines": {
"node": "^12 || ^14 || ^15 || ^16 || ^18 || ^19",
"npm": "^6 || ^7 || ^8 || ^9"
"node": "^18 || ^19 || ^20",
"npm": "^8 || ^9 || ^10"
}
},
"node_modules/ansi-escapes": {
@ -3835,12 +3835,6 @@
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/array-flatten": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-2.1.2.tgz",
"integrity": "sha512-hNfzcOV8W4NdualtqBFPyVO+54DSJuZGY9qT4pRroB6S9e3iiido2ISIC5h9R2sPJ8H3FHCIiEnsv1lPXO3KtQ==",
"dev": true
},
"node_modules/array-includes": {
"version": "3.1.7",
"resolved": "https://registry.npmjs.org/array-includes/-/array-includes-3.1.7.tgz",
@ -4165,13 +4159,11 @@
"dev": true
},
"node_modules/bonjour-service": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/bonjour-service/-/bonjour-service-1.1.1.tgz",
"integrity": "sha512-Z/5lQRMOG9k7W+FkeGTNjh7htqn/2LMnfOvBZ8pynNZCM9MwkQkI3zeI4oz09uWdcgmgHugVvBqxGg4VQJ5PCg==",
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/bonjour-service/-/bonjour-service-1.2.1.tgz",
"integrity": "sha512-oSzCS2zV14bh2kji6vNe7vrpJYCHGvcZnlffFQ1MEoX/WOeQ/teD8SYWKR942OI3INjq8OMNJlbPK5LLLUxFDw==",
"dev": true,
"dependencies": {
"array-flatten": "^2.1.2",
"dns-equal": "^1.0.0",
"fast-deep-equal": "^3.1.3",
"multicast-dns": "^7.2.5"
}
@ -4387,16 +4379,10 @@
}
},
"node_modules/chokidar": {
"version": "3.5.3",
"resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.5.3.tgz",
"integrity": "sha512-Dr3sfKRP6oTcjf2JmUmFJfeVMvXBdegxB0iVQ5eb2V10uFJUCAS8OByZdVAyVb8xXNz3GjjTgj9kLWsZTqE6kw==",
"version": "3.6.0",
"resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz",
"integrity": "sha512-7VT13fmjotKpGipCW9JEQAusEPE+Ei8nl6/g4FBAmIm0GOOLMua9NDDo/DWp0ZAxCr3cPq5ZpBqmPAQgDda2Pw==",
"dev": true,
"funding": [
{
"type": "individual",
"url": "https://paulmillr.com/funding/"
}
],
"dependencies": {
"anymatch": "~3.1.2",
"braces": "~3.0.2",
@ -4409,6 +4395,9 @@
"engines": {
"node": ">= 8.10.0"
},
"funding": {
"url": "https://paulmillr.com/funding/"
},
"optionalDependencies": {
"fsevents": "~2.3.2"
}
@ -4990,12 +4979,6 @@
"node": "^14.15.0 || ^16.10.0 || >=18.0.0"
}
},
"node_modules/dns-equal": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/dns-equal/-/dns-equal-1.0.0.tgz",
"integrity": "sha512-z+paD6YUQsk+AbGCEM4PrOXSss5gd66QfcVBFTKR/HpFL9jCqikS94HYwKww6fQyO7IxrIIyUu+g0Ka9tUS2Cg==",
"dev": true
},
"node_modules/dns-packet": {
"version": "5.6.1",
"resolved": "https://registry.npmjs.org/dns-packet/-/dns-packet-5.6.1.tgz",
@ -5277,16 +5260,16 @@
}
},
"node_modules/eslint": {
"version": "8.55.0",
"resolved": "https://registry.npmjs.org/eslint/-/eslint-8.55.0.tgz",
"integrity": "sha512-iyUUAM0PCKj5QpwGfmCAG9XXbZCWsqP/eWAWrG/W0umvjuLRBECwSFdt+rCntju0xEH7teIABPwXpahftIaTdA==",
"version": "8.57.0",
"resolved": "https://registry.npmjs.org/eslint/-/eslint-8.57.0.tgz",
"integrity": "sha512-dZ6+mexnaTIbSBZWgou51U6OmzIhYM2VcNdtiTtI7qPNZm35Akpr0f6vtw3w1Kmn5PYo+tZVfh13WrhpS6oLqQ==",
"dev": true,
"dependencies": {
"@eslint-community/eslint-utils": "^4.2.0",
"@eslint-community/regexpp": "^4.6.1",
"@eslint/eslintrc": "^2.1.4",
"@eslint/js": "8.55.0",
"@humanwhocodes/config-array": "^0.11.13",
"@eslint/js": "8.57.0",
"@humanwhocodes/config-array": "^0.11.14",
"@humanwhocodes/module-importer": "^1.0.1",
"@nodelib/fs.walk": "^1.2.8",
"@ungap/structured-clone": "^1.2.0",
@ -5344,23 +5327,24 @@
}
},
"node_modules/eslint-plugin-prettier": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/eslint-plugin-prettier/-/eslint-plugin-prettier-5.0.1.tgz",
"integrity": "sha512-m3u5RnR56asrwV/lDC4GHorlW75DsFfmUcjfCYylTUs85dBRnB7VM6xG8eCMJdeDRnppzmxZVf1GEPJvl1JmNg==",
"version": "5.1.3",
"resolved": "https://registry.npmjs.org/eslint-plugin-prettier/-/eslint-plugin-prettier-5.1.3.tgz",
"integrity": "sha512-C9GCVAs4Eq7ZC/XFQHITLiHJxQngdtraXaM+LoUFoFp/lHNl2Zn8f3WQbe9HvTBBQ9YnKFB0/2Ajdqwo5D1EAw==",
"dev": true,
"dependencies": {
"prettier-linter-helpers": "^1.0.0",
"synckit": "^0.8.5"
"synckit": "^0.8.6"
},
"engines": {
"node": "^14.18.0 || >=16.0.0"
},
"funding": {
"url": "https://opencollective.com/prettier"
"url": "https://opencollective.com/eslint-plugin-prettier"
},
"peerDependencies": {
"@types/eslint": ">=8.0.0",
"eslint": ">=8.0.0",
"eslint-config-prettier": "*",
"prettier": ">=3.0.0"
},
"peerDependenciesMeta": {
@ -5997,12 +5981,6 @@
"node": ">= 0.6"
}
},
"node_modules/fs-monkey": {
"version": "1.0.5",
"resolved": "https://registry.npmjs.org/fs-monkey/-/fs-monkey-1.0.5.tgz",
"integrity": "sha512-8uMbBjrhzW76TYgEV27Y5E//W2f/lTFmx78P2w19FZSxarhI/798APGQyuGCwmkNxgwGRhrLfvWyLBvNtuOmew==",
"dev": true
},
"node_modules/fs.realpath": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
@ -6824,6 +6802,18 @@
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/is-network-error": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/is-network-error/-/is-network-error-1.0.1.tgz",
"integrity": "sha512-OwQXkwBJeESyhFw+OumbJVD58BFBJJI5OM5S1+eyrDKlgDZPX2XNT5gXS56GSD3NPbbwUuMlR1Q71SRp5SobuQ==",
"dev": true,
"engines": {
"node": ">=16"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/is-number": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz",
@ -8132,17 +8122,27 @@
}
},
"node_modules/memfs": {
"version": "3.5.3",
"resolved": "https://registry.npmjs.org/memfs/-/memfs-3.5.3.tgz",
"integrity": "sha512-UERzLsxzllchadvbPs5aolHh65ISpKpM+ccLbOJ8/vvpBKmAWf+la7dXFy7Mr0ySHbdHrFv5kGFCUHHe6GFEmw==",
"version": "4.7.7",
"resolved": "https://registry.npmjs.org/memfs/-/memfs-4.7.7.tgz",
"integrity": "sha512-x9qc6k88J/VVwnfTkJV8pRRswJ2156Rc4w5rciRqKceFDZ0y1MqsNL9pkg5sE0GOcDzZYbonreALhaHzg1siFw==",
"dev": true,
"dependencies": {
"fs-monkey": "^1.0.4"
"tslib": "^2.0.0"
},
"engines": {
"node": ">= 4.0.0"
},
"funding": {
"type": "github",
"url": "https://github.com/sponsors/streamich"
}
},
"node_modules/memfs/node_modules/tslib": {
"version": "2.6.2",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.2.tgz",
"integrity": "sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q==",
"dev": true
},
"node_modules/memorystream": {
"version": "0.3.1",
"resolved": "https://registry.npmjs.org/memorystream/-/memorystream-0.3.1.tgz",
@ -8775,16 +8775,20 @@
}
},
"node_modules/p-retry": {
"version": "4.6.2",
"resolved": "https://registry.npmjs.org/p-retry/-/p-retry-4.6.2.tgz",
"integrity": "sha512-312Id396EbJdvRONlngUx0NydfrIQ5lsYu0znKVUzVvArzEIt08V1qhtyESbGVd1FGX7UKtiFp5uwKZdM8wIuQ==",
"version": "6.2.0",
"resolved": "https://registry.npmjs.org/p-retry/-/p-retry-6.2.0.tgz",
"integrity": "sha512-JA6nkq6hKyWLLasXQXUrO4z8BUZGUt/LjlJxx8Gb2+2ntodU/SS63YZ8b0LUTbQ8ZB9iwOfhEPhg4ykKnn2KsA==",
"dev": true,
"dependencies": {
"@types/retry": "0.12.0",
"@types/retry": "0.12.2",
"is-network-error": "^1.0.0",
"retry": "^0.13.1"
},
"engines": {
"node": ">=8"
"node": ">=16.17"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/p-try": {
@ -9038,9 +9042,9 @@
}
},
"node_modules/prettier": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/prettier/-/prettier-3.1.0.tgz",
"integrity": "sha512-TQLvXjq5IAibjh8EpBIkNKxO749UEWABoiIZehEPiY4GNpVdhaFKqSTu+QrlU6D2dPAfubRmtJTi4K4YkQ5eXw==",
"version": "3.2.5",
"resolved": "https://registry.npmjs.org/prettier/-/prettier-3.2.5.tgz",
"integrity": "sha512-3/GWa9aOC0YeD7LUfvOG2NiDyhOWRvt1k+rcKhOuYnMY24iiCphgneUfJDyFXd6rZCAnuLBv6UeAULtrhT/F4A==",
"dev": true,
"bin": {
"prettier": "bin/prettier.cjs"
@ -9127,9 +9131,9 @@
"dev": true
},
"node_modules/protobufjs": {
"version": "7.2.5",
"resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.2.5.tgz",
"integrity": "sha512-gGXRSXvxQ7UiPgfw8gevrfRWcTlSbOFg+p/N+JVJEK5VhueL2miT6qTymqAmjr1Q5WbOCyJbyrk6JfWKwlFn6A==",
"version": "7.2.6",
"resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.2.6.tgz",
"integrity": "sha512-dgJaEDDL6x8ASUZ1YqWciTRrdOuYNzoOf27oHNfdyvKqHr5i0FV7FSLU+aIeFjyFgVxrpTOtQUi0BLLBymZaBw==",
"hasInstallScript": true,
"dependencies": {
"@protobufjs/aspromise": "^1.1.2",
@ -9830,9 +9834,9 @@
"dev": true
},
"node_modules/serialize-javascript": {
"version": "6.0.1",
"resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-6.0.1.tgz",
"integrity": "sha512-owoXEFjWRllis8/M1Q+Cw5k8ZH40e3zhp/ovX+Xr/vi1qj6QesbyXXViFbpNvWvPNAD62SutwEXavefrLJWj7w==",
"version": "6.0.2",
"resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-6.0.2.tgz",
"integrity": "sha512-Saa1xPByTTq2gdeFZYLLo+RFE35NHZkAbqZeWNd3BpzppeVisAqpDjcp8dyf6uIvEqJRd46jemmyA4iFIeVk8g==",
"dev": true,
"dependencies": {
"randombytes": "^2.1.0"
@ -10044,9 +10048,9 @@
}
},
"node_modules/socket.io-client": {
"version": "4.7.2",
"resolved": "https://registry.npmjs.org/socket.io-client/-/socket.io-client-4.7.2.tgz",
"integrity": "sha512-vtA0uD4ibrYD793SOIAwlo8cj6haOeMHrGvwPxJsxH7CeIksqJ+3Zc06RvWTIFgiSqx4A3sOnTXpfAEE2Zyz6w==",
"version": "4.7.4",
"resolved": "https://registry.npmjs.org/socket.io-client/-/socket.io-client-4.7.4.tgz",
"integrity": "sha512-wh+OkeF0rAVCrABWQBaEjLfb7DVPotMbu0cgWgyR0v6eA4EoVnAwcIeIbcdTE3GT/H3kbdLl7OoH2+asoDRIIg==",
"dependencies": {
"@socket.io/component-emitter": "~3.1.0",
"debug": "~4.3.2",
@ -10466,9 +10470,9 @@
}
},
"node_modules/terser": {
"version": "5.24.0",
"resolved": "https://registry.npmjs.org/terser/-/terser-5.24.0.tgz",
"integrity": "sha512-ZpGR4Hy3+wBEzVEnHvstMvqpD/nABNelQn/z2r0fjVWGQsN3bpOLzQlqDxmb4CDZnXq5lpjnQ+mHQLAOpfM5iw==",
"version": "5.28.1",
"resolved": "https://registry.npmjs.org/terser/-/terser-5.28.1.tgz",
"integrity": "sha512-wM+bZp54v/E9eRRGXb5ZFDvinrJIOaTapx3WUokyVGZu5ucVCK55zEgGd5Dl2fSr3jUo5sDiERErUWLY6QPFyA==",
"dev": true,
"dependencies": {
"@jridgewell/source-map": "^0.3.3",
@ -10484,16 +10488,16 @@
}
},
"node_modules/terser-webpack-plugin": {
"version": "5.3.9",
"resolved": "https://registry.npmjs.org/terser-webpack-plugin/-/terser-webpack-plugin-5.3.9.tgz",
"integrity": "sha512-ZuXsqE07EcggTWQjXUj+Aot/OMcD0bMKGgF63f7UxYcu5/AJF53aIpK1YoP5xR9l6s/Hy2b+t1AM0bLNPRuhwA==",
"version": "5.3.10",
"resolved": "https://registry.npmjs.org/terser-webpack-plugin/-/terser-webpack-plugin-5.3.10.tgz",
"integrity": "sha512-BKFPWlPDndPs+NGGCr1U59t0XScL5317Y0UReNrHaw9/FwhPENlq6bfgs+4yPfyP51vqC1bQ4rp1EfXW5ZSH9w==",
"dev": true,
"dependencies": {
"@jridgewell/trace-mapping": "^0.3.17",
"@jridgewell/trace-mapping": "^0.3.20",
"jest-worker": "^27.4.5",
"schema-utils": "^3.1.1",
"serialize-javascript": "^6.0.1",
"terser": "^5.16.8"
"terser": "^5.26.0"
},
"engines": {
"node": ">= 10.13.0"
@ -10827,9 +10831,9 @@
}
},
"node_modules/typescript": {
"version": "5.3.2",
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.3.2.tgz",
"integrity": "sha512-6l+RyNy7oAHDfxC4FzSJcz9vnjTKxrLpDG5M2Vu4SHRVNg6xzqZp6LYSR9zjqQTu8DU/f5xwxUdADOkbrIX2gQ==",
"version": "5.3.3",
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.3.3.tgz",
"integrity": "sha512-pXWcraxM0uxAS+tN0AG/BF2TyqmHO014Z070UsJ+pFvYuRSq8KH8DmWpnbXe0pEPDHXZV3FcAbJkijJ5oNEnWw==",
"dev": true,
"bin": {
"tsc": "bin/tsc",
@ -11026,19 +11030,19 @@
}
},
"node_modules/webpack": {
"version": "5.89.0",
"resolved": "https://registry.npmjs.org/webpack/-/webpack-5.89.0.tgz",
"integrity": "sha512-qyfIC10pOr70V+jkmud8tMfajraGCZMBWJtrmuBymQKCrLTRejBI8STDp1MCyZu/QTdZSeacCQYpYNQVOzX5kw==",
"version": "5.90.3",
"resolved": "https://registry.npmjs.org/webpack/-/webpack-5.90.3.tgz",
"integrity": "sha512-h6uDYlWCctQRuXBs1oYpVe6sFcWedl0dpcVaTf/YF67J9bKvwJajFulMVSYKHrksMB3I/pIagRzDxwxkebuzKA==",
"dev": true,
"dependencies": {
"@types/eslint-scope": "^3.7.3",
"@types/estree": "^1.0.0",
"@types/estree": "^1.0.5",
"@webassemblyjs/ast": "^1.11.5",
"@webassemblyjs/wasm-edit": "^1.11.5",
"@webassemblyjs/wasm-parser": "^1.11.5",
"acorn": "^8.7.1",
"acorn-import-assertions": "^1.9.0",
"browserslist": "^4.14.5",
"browserslist": "^4.21.10",
"chrome-trace-event": "^1.0.2",
"enhanced-resolve": "^5.15.0",
"es-module-lexer": "^1.2.1",
@ -11052,7 +11056,7 @@
"neo-async": "^2.6.2",
"schema-utils": "^3.2.0",
"tapable": "^2.1.1",
"terser-webpack-plugin": "^5.3.7",
"terser-webpack-plugin": "^5.3.10",
"watchpack": "^2.4.0",
"webpack-sources": "^3.2.3"
},
@ -11127,77 +11131,82 @@
}
},
"node_modules/webpack-dev-middleware": {
"version": "5.3.3",
"resolved": "https://registry.npmjs.org/webpack-dev-middleware/-/webpack-dev-middleware-5.3.3.tgz",
"integrity": "sha512-hj5CYrY0bZLB+eTO+x/j67Pkrquiy7kWepMHmUMoPsmcUaeEnQJqFzHJOyxgWlq746/wUuA64p9ta34Kyb01pA==",
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/webpack-dev-middleware/-/webpack-dev-middleware-7.0.0.tgz",
"integrity": "sha512-tZ5hqsWwww/8DislmrzXE3x+4f+v10H1z57mA2dWFrILb4i3xX+dPhTkcdR0DLyQztrhF2AUmO5nN085UYjd/Q==",
"dev": true,
"dependencies": {
"colorette": "^2.0.10",
"memfs": "^3.4.3",
"memfs": "^4.6.0",
"mime-types": "^2.1.31",
"range-parser": "^1.2.1",
"schema-utils": "^4.0.0"
},
"engines": {
"node": ">= 12.13.0"
"node": ">= 18.12.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/webpack"
},
"peerDependencies": {
"webpack": "^4.0.0 || ^5.0.0"
"webpack": "^5.0.0"
},
"peerDependenciesMeta": {
"webpack": {
"optional": true
}
}
},
"node_modules/webpack-dev-server": {
"version": "4.15.1",
"resolved": "https://registry.npmjs.org/webpack-dev-server/-/webpack-dev-server-4.15.1.tgz",
"integrity": "sha512-5hbAst3h3C3L8w6W4P96L5vaV0PxSmJhxZvWKYIdgxOQm8pNZ5dEOmmSLBVpP85ReeyRt6AS1QJNyo/oFFPeVA==",
"version": "5.0.2",
"resolved": "https://registry.npmjs.org/webpack-dev-server/-/webpack-dev-server-5.0.2.tgz",
"integrity": "sha512-IVj3qsQhiLJR82zVg3QdPtngMD05CYP/Am+9NG5QSl+XwUR/UPtFwllRBKrMwM9ttzFsC6Zj3DMgniPyn/Z0hQ==",
"dev": true,
"dependencies": {
"@types/bonjour": "^3.5.9",
"@types/connect-history-api-fallback": "^1.3.5",
"@types/express": "^4.17.13",
"@types/serve-index": "^1.9.1",
"@types/serve-static": "^1.13.10",
"@types/sockjs": "^0.3.33",
"@types/ws": "^8.5.5",
"@types/bonjour": "^3.5.13",
"@types/connect-history-api-fallback": "^1.5.4",
"@types/express": "^4.17.21",
"@types/serve-index": "^1.9.4",
"@types/serve-static": "^1.15.5",
"@types/sockjs": "^0.3.36",
"@types/ws": "^8.5.10",
"ansi-html-community": "^0.0.8",
"bonjour-service": "^1.0.11",
"chokidar": "^3.5.3",
"bonjour-service": "^1.2.1",
"chokidar": "^3.6.0",
"colorette": "^2.0.10",
"compression": "^1.7.4",
"connect-history-api-fallback": "^2.0.0",
"default-gateway": "^6.0.3",
"express": "^4.17.3",
"graceful-fs": "^4.2.6",
"html-entities": "^2.3.2",
"html-entities": "^2.4.0",
"http-proxy-middleware": "^2.0.3",
"ipaddr.js": "^2.0.1",
"launch-editor": "^2.6.0",
"open": "^8.0.9",
"p-retry": "^4.5.0",
"rimraf": "^3.0.2",
"schema-utils": "^4.0.0",
"selfsigned": "^2.1.1",
"ipaddr.js": "^2.1.0",
"launch-editor": "^2.6.1",
"open": "^10.0.3",
"p-retry": "^6.2.0",
"rimraf": "^5.0.5",
"schema-utils": "^4.2.0",
"selfsigned": "^2.4.1",
"serve-index": "^1.9.1",
"sockjs": "^0.3.24",
"spdy": "^4.0.2",
"webpack-dev-middleware": "^5.3.1",
"ws": "^8.13.0"
"webpack-dev-middleware": "^7.0.0",
"ws": "^8.16.0"
},
"bin": {
"webpack-dev-server": "bin/webpack-dev-server.js"
},
"engines": {
"node": ">= 12.13.0"
"node": ">= 18.12.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/webpack"
},
"peerDependencies": {
"webpack": "^4.37.0 || ^5.0.0"
"webpack": "^5.0.0"
},
"peerDependenciesMeta": {
"webpack": {
@ -11208,66 +11217,98 @@
}
}
},
"node_modules/webpack-dev-server/node_modules/define-lazy-prop": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/define-lazy-prop/-/define-lazy-prop-2.0.0.tgz",
"integrity": "sha512-Ds09qNh8yw3khSjiJjiUInaGX9xlqZDY7JVryGxdxV7NPeuqQfplOpQ66yJFZut3jLa5zOwkXw1g9EI2uKh4Og==",
"node_modules/webpack-dev-server/node_modules/bundle-name": {
"version": "4.1.0",
"resolved": "https://registry.npmjs.org/bundle-name/-/bundle-name-4.1.0.tgz",
"integrity": "sha512-tjwM5exMg6BGRI+kNmTntNsvdZS1X8BFYS6tnJ2hdH0kVxM6/eVZ2xy+FqStSWvYmtfFMDLIxurorHwDKfDz5Q==",
"dev": true,
"engines": {
"node": ">=8"
}
},
"node_modules/webpack-dev-server/node_modules/is-docker": {
"version": "2.2.1",
"resolved": "https://registry.npmjs.org/is-docker/-/is-docker-2.2.1.tgz",
"integrity": "sha512-F+i2BKsFrH66iaUFc0woD8sLy8getkwTwtOBjvs56Cx4CgJDeKQeqfz8wAYiSb8JOprWhHH5p77PbmYCvvUuXQ==",
"dev": true,
"bin": {
"is-docker": "cli.js"
"dependencies": {
"run-applescript": "^7.0.0"
},
"engines": {
"node": ">=8"
"node": ">=18"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/webpack-dev-server/node_modules/default-browser": {
"version": "5.2.1",
"resolved": "https://registry.npmjs.org/default-browser/-/default-browser-5.2.1.tgz",
"integrity": "sha512-WY/3TUME0x3KPYdRRxEJJvXRHV4PyPoUsxtZa78lwItwRQRHhd2U9xOscaT/YTf8uCXIAjeJOFBVEh/7FtD8Xg==",
"dev": true,
"dependencies": {
"bundle-name": "^4.1.0",
"default-browser-id": "^5.0.0"
},
"engines": {
"node": ">=18"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/webpack-dev-server/node_modules/default-browser-id": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/default-browser-id/-/default-browser-id-5.0.0.tgz",
"integrity": "sha512-A6p/pu/6fyBcA1TRz/GqWYPViplrftcW2gZC9q79ngNCKAeR/X3gcEdXQHl4KNXV+3wgIJ1CPkJQ3IHM6lcsyA==",
"dev": true,
"engines": {
"node": ">=18"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/webpack-dev-server/node_modules/is-wsl": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/is-wsl/-/is-wsl-3.1.0.tgz",
"integrity": "sha512-UcVfVfaK4Sc4m7X3dUSoHoozQGBEFeDC+zVo06t98xe8CzHSZZBekNXH+tu0NalHolcJ/QAGqS46Hef7QXBIMw==",
"dev": true,
"dependencies": {
"is-inside-container": "^1.0.0"
},
"engines": {
"node": ">=16"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/webpack-dev-server/node_modules/open": {
"version": "8.4.2",
"resolved": "https://registry.npmjs.org/open/-/open-8.4.2.tgz",
"integrity": "sha512-7x81NCL719oNbsq/3mh+hVrAWmFuEYUqrq/Iw3kUzH8ReypT9QQ0BLoJS7/G9k6N81XjW4qHWtjWwe/9eLy1EQ==",
"version": "10.0.4",
"resolved": "https://registry.npmjs.org/open/-/open-10.0.4.tgz",
"integrity": "sha512-oujJ/FFr7ra6/7gJuQ4ZJJ8Gf2VHM0J3J/W7IvH++zaqEzacWVxzK++NiVY5NLHTTj7u/jNH5H3Ei9biL31Lng==",
"dev": true,
"dependencies": {
"define-lazy-prop": "^2.0.0",
"is-docker": "^2.1.1",
"is-wsl": "^2.2.0"
"default-browser": "^5.2.1",
"define-lazy-prop": "^3.0.0",
"is-inside-container": "^1.0.0",
"is-wsl": "^3.1.0"
},
"engines": {
"node": ">=12"
"node": ">=18"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/webpack-dev-server/node_modules/rimraf": {
"version": "3.0.2",
"resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz",
"integrity": "sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==",
"node_modules/webpack-dev-server/node_modules/run-applescript": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/run-applescript/-/run-applescript-7.0.0.tgz",
"integrity": "sha512-9by4Ij99JUr/MCFBUkDKLWK3G9HVXmabKz9U5MlIAIuvuzkiOicRYs8XJLxX+xahD+mLiiCYDqF9dKAgtzKP1A==",
"dev": true,
"dependencies": {
"glob": "^7.1.3"
},
"bin": {
"rimraf": "bin.js"
"engines": {
"node": ">=18"
},
"funding": {
"url": "https://github.com/sponsors/isaacs"
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/webpack-dev-server/node_modules/ws": {
"version": "8.14.2",
"resolved": "https://registry.npmjs.org/ws/-/ws-8.14.2.tgz",
"integrity": "sha512-wEBG1ftX4jcglPxgFCMJmZ2PLtSbJ2Peg6TmpJFTbe9GZYOQCDPdMYu/Tm0/bGZkw8paZnJY45J4K2PZrLYq8g==",
"version": "8.16.0",
"resolved": "https://registry.npmjs.org/ws/-/ws-8.16.0.tgz",
"integrity": "sha512-HS0c//TP7Ina87TfiPUz1rQzMhHrl/SG2guqRcTOIUYD2q8uhUdNHZYJUaQ8aTGPzCh+c6oawMKW35nFl1dxyQ==",
"dev": true,
"engines": {
"node": ">=10.0.0"

View File

@ -1,6 +1,6 @@
{
"name": "@dannadori/voice-changer-client-js",
"version": "1.0.180",
"version": "1.0.182",
"description": "",
"main": "dist/index.js",
"directories": {
@ -26,35 +26,35 @@
"author": "wataru.okada@flect.co.jp",
"license": "ISC",
"devDependencies": {
"@types/audioworklet": "^0.0.52",
"@types/jest": "^29.5.10",
"@types/node": "^20.10.2",
"@types/react": "18.2.40",
"@types/react-dom": "18.2.17",
"eslint": "^8.55.0",
"@types/audioworklet": "^0.0.54",
"@types/jest": "^29.5.12",
"@types/node": "^20.11.21",
"@types/react": "18.2.60",
"@types/react-dom": "18.2.19",
"eslint": "^8.57.0",
"eslint-config-prettier": "^9.1.0",
"eslint-plugin-prettier": "^5.0.1",
"eslint-plugin-prettier": "^5.1.3",
"eslint-plugin-react": "^7.33.2",
"eslint-webpack-plugin": "^4.0.1",
"jest": "^29.7.0",
"npm-run-all": "^4.1.5",
"prettier": "^3.1.0",
"prettier": "^3.2.5",
"raw-loader": "^4.0.2",
"rimraf": "^5.0.5",
"ts-loader": "^9.5.1",
"typescript": "^5.3.2",
"webpack": "^5.89.0",
"typescript": "^5.3.3",
"webpack": "^5.90.3",
"webpack-cli": "^5.1.4",
"webpack-dev-server": "^4.15.1"
"webpack-dev-server": "^5.0.2"
},
"dependencies": {
"@types/readable-stream": "^4.0.9",
"amazon-chime-sdk-js": "^3.18.2",
"@types/readable-stream": "^4.0.10",
"amazon-chime-sdk-js": "^3.20.0",
"buffer": "^6.0.3",
"localforage": "^1.10.0",
"protobufjs": "^7.2.5",
"protobufjs": "^7.2.6",
"react": "^18.2.0",
"react-dom": "^18.2.0",
"socket.io-client": "^4.7.2"
"socket.io-client": "^4.7.4"
}
}

View File

@ -4,20 +4,22 @@
// 187.5chunk -> 1sec
export const VoiceChangerType = {
MMVCv15: "MMVCv15",
MMVCv13: "MMVCv13",
"so-vits-svc-40": "so-vits-svc-40",
"DDSP-SVC": "DDSP-SVC",
RVC: "RVC",
"Diffusion-SVC": "Diffusion-SVC",
Beatrice: "Beatrice",
LLVC: "LLVC",
WebModel: "WebModel",
MMVCv15: "MMVCv15",
MMVCv13: "MMVCv13",
"so-vits-svc-40": "so-vits-svc-40",
"DDSP-SVC": "DDSP-SVC",
RVC: "RVC",
"Diffusion-SVC": "Diffusion-SVC",
Beatrice: "Beatrice",
LLVC: "LLVC",
WebModel: "WebModel",
EasyVC: "EasyVC",
} as const;
export type VoiceChangerType = (typeof VoiceChangerType)[keyof typeof VoiceChangerType];
export type VoiceChangerType =
(typeof VoiceChangerType)[keyof typeof VoiceChangerType];
export const StaticModel = {
BeatriceJVS: "Beatrice-JVS",
BeatriceJVS: "Beatrice-JVS",
} as const;
export type StaticModel = (typeof StaticModel)[keyof typeof StaticModel];
@ -25,426 +27,439 @@ export type StaticModel = (typeof StaticModel)[keyof typeof StaticModel];
// サーバセッティング
///////////////////////
export const InputSampleRate = {
"48000": 48000,
"44100": 44100,
"24000": 24000,
"48000": 48000,
"44100": 44100,
"24000": 24000,
} as const;
export type InputSampleRate = (typeof InputSampleRate)[keyof typeof InputSampleRate];
export type InputSampleRate =
(typeof InputSampleRate)[keyof typeof InputSampleRate];
export const ModelSamplingRate = {
"48000": 48000,
"40000": 40000,
"32000": 32000,
"48000": 48000,
"40000": 40000,
"32000": 32000,
} as const;
export type ModelSamplingRate = (typeof InputSampleRate)[keyof typeof InputSampleRate];
export type ModelSamplingRate =
(typeof InputSampleRate)[keyof typeof InputSampleRate];
export const CrossFadeOverlapSize = {
"128": 128,
"256": 256,
"512": 512,
"1024": 1024,
"2048": 2048,
"4096": 4096,
"128": 128,
"256": 256,
"512": 512,
"1024": 1024,
"2048": 2048,
"4096": 4096,
} as const;
export type CrossFadeOverlapSize = (typeof CrossFadeOverlapSize)[keyof typeof CrossFadeOverlapSize];
export type CrossFadeOverlapSize =
(typeof CrossFadeOverlapSize)[keyof typeof CrossFadeOverlapSize];
export const F0Detector = {
dio: "dio",
harvest: "harvest",
crepe: "crepe",
crepe_full: "crepe_full",
crepe_tiny: "crepe_tiny",
rmvpe: "rmvpe",
rmvpe_onnx: "rmvpe_onnx",
fcpe: "fcpe",
dio: "dio",
harvest: "harvest",
crepe: "crepe",
crepe_full: "crepe_full",
crepe_tiny: "crepe_tiny",
rmvpe: "rmvpe",
rmvpe_onnx: "rmvpe_onnx",
fcpe: "fcpe",
} as const;
export type F0Detector = (typeof F0Detector)[keyof typeof F0Detector];
export const DiffMethod = {
pndm: "pndm",
"dpm-solver": "dpm-solver",
pndm: "pndm",
"dpm-solver": "dpm-solver",
} as const;
export type DiffMethod = (typeof DiffMethod)[keyof typeof DiffMethod];
export const RVCModelType = {
pyTorchRVC: "pyTorchRVC",
pyTorchRVCNono: "pyTorchRVCNono",
pyTorchRVCv2: "pyTorchRVCv2",
pyTorchRVCv2Nono: "pyTorchRVCv2Nono",
pyTorchWebUI: "pyTorchWebUI",
pyTorchWebUINono: "pyTorchWebUINono",
onnxRVC: "onnxRVC",
onnxRVCNono: "onnxRVCNono",
pyTorchRVC: "pyTorchRVC",
pyTorchRVCNono: "pyTorchRVCNono",
pyTorchRVCv2: "pyTorchRVCv2",
pyTorchRVCv2Nono: "pyTorchRVCv2Nono",
pyTorchWebUI: "pyTorchWebUI",
pyTorchWebUINono: "pyTorchWebUINono",
onnxRVC: "onnxRVC",
onnxRVCNono: "onnxRVCNono",
} as const;
export type RVCModelType = (typeof RVCModelType)[keyof typeof RVCModelType];
export const ServerSettingKey = {
passThrough: "passThrough",
srcId: "srcId",
dstId: "dstId",
gpu: "gpu",
passThrough: "passThrough",
srcId: "srcId",
dstId: "dstId",
gpu: "gpu",
crossFadeOffsetRate: "crossFadeOffsetRate",
crossFadeEndRate: "crossFadeEndRate",
crossFadeOverlapSize: "crossFadeOverlapSize",
crossFadeOffsetRate: "crossFadeOffsetRate",
crossFadeEndRate: "crossFadeEndRate",
crossFadeOverlapSize: "crossFadeOverlapSize",
framework: "framework",
onnxExecutionProvider: "onnxExecutionProvider",
framework: "framework",
onnxExecutionProvider: "onnxExecutionProvider",
f0Factor: "f0Factor",
f0Detector: "f0Detector",
recordIO: "recordIO",
f0Factor: "f0Factor",
f0Detector: "f0Detector",
recordIO: "recordIO",
enableServerAudio: "enableServerAudio",
serverAudioStated: "serverAudioStated",
serverAudioSampleRate: "serverAudioSampleRate",
serverInputAudioSampleRate: "serverInputAudioSampleRate",
serverOutputAudioSampleRate: "serverOutputAudioSampleRate",
serverMonitorAudioSampleRate: "serverMonitorAudioSampleRate",
serverInputAudioBufferSize: "serverInputAudioBufferSize",
serverOutputAudioBufferSize: "serverOutputAudioBufferSize",
serverInputDeviceId: "serverInputDeviceId",
serverOutputDeviceId: "serverOutputDeviceId",
serverMonitorDeviceId: "serverMonitorDeviceId",
serverReadChunkSize: "serverReadChunkSize",
serverInputAudioGain: "serverInputAudioGain",
serverOutputAudioGain: "serverOutputAudioGain",
serverMonitorAudioGain: "serverMonitorAudioGain",
enableServerAudio: "enableServerAudio",
serverAudioStated: "serverAudioStated",
serverAudioSampleRate: "serverAudioSampleRate",
serverInputAudioSampleRate: "serverInputAudioSampleRate",
serverOutputAudioSampleRate: "serverOutputAudioSampleRate",
serverMonitorAudioSampleRate: "serverMonitorAudioSampleRate",
serverInputAudioBufferSize: "serverInputAudioBufferSize",
serverOutputAudioBufferSize: "serverOutputAudioBufferSize",
serverInputDeviceId: "serverInputDeviceId",
serverOutputDeviceId: "serverOutputDeviceId",
serverMonitorDeviceId: "serverMonitorDeviceId",
serverReadChunkSize: "serverReadChunkSize",
serverInputAudioGain: "serverInputAudioGain",
serverOutputAudioGain: "serverOutputAudioGain",
serverMonitorAudioGain: "serverMonitorAudioGain",
tran: "tran",
noiseScale: "noiseScale",
predictF0: "predictF0",
silentThreshold: "silentThreshold",
extraConvertSize: "extraConvertSize",
clusterInferRatio: "clusterInferRatio",
tran: "tran",
noiseScale: "noiseScale",
predictF0: "predictF0",
silentThreshold: "silentThreshold",
extraConvertSize: "extraConvertSize",
clusterInferRatio: "clusterInferRatio",
indexRatio: "indexRatio",
protect: "protect",
rvcQuality: "rvcQuality",
modelSamplingRate: "modelSamplingRate",
silenceFront: "silenceFront",
modelSlotIndex: "modelSlotIndex",
indexRatio: "indexRatio",
protect: "protect",
rvcQuality: "rvcQuality",
modelSamplingRate: "modelSamplingRate",
silenceFront: "silenceFront",
modelSlotIndex: "modelSlotIndex",
useEnhancer: "useEnhancer",
useDiff: "useDiff",
// "useDiffDpm": "useDiffDpm",
diffMethod: "diffMethod",
useDiffSilence: "useDiffSilence",
diffAcc: "diffAcc",
diffSpkId: "diffSpkId",
kStep: "kStep",
threshold: "threshold",
useEnhancer: "useEnhancer",
useDiff: "useDiff",
// "useDiffDpm": "useDiffDpm",
diffMethod: "diffMethod",
useDiffSilence: "useDiffSilence",
diffAcc: "diffAcc",
diffSpkId: "diffSpkId",
kStep: "kStep",
threshold: "threshold",
speedUp: "speedUp",
skipDiffusion: "skipDiffusion",
speedUp: "speedUp",
skipDiffusion: "skipDiffusion",
inputSampleRate: "inputSampleRate",
enableDirectML: "enableDirectML",
inputSampleRate: "inputSampleRate",
enableDirectML: "enableDirectML",
} as const;
export type ServerSettingKey = (typeof ServerSettingKey)[keyof typeof ServerSettingKey];
export type ServerSettingKey =
(typeof ServerSettingKey)[keyof typeof ServerSettingKey];
export type VoiceChangerServerSetting = {
passThrough: boolean;
srcId: number;
dstId: number;
gpu: number;
passThrough: boolean;
srcId: number;
dstId: number;
gpu: number;
crossFadeOffsetRate: number;
crossFadeEndRate: number;
crossFadeOverlapSize: CrossFadeOverlapSize;
crossFadeOffsetRate: number;
crossFadeEndRate: number;
crossFadeOverlapSize: CrossFadeOverlapSize;
f0Factor: number;
f0Detector: F0Detector; // dio or harvest
recordIO: number; // 0:off, 1:on
f0Factor: number;
f0Detector: F0Detector; // dio or harvest
recordIO: number; // 0:off, 1:on
enableServerAudio: number; // 0:off, 1:on
serverAudioStated: number; // 0:off, 1:on
serverAudioSampleRate: number;
serverInputAudioSampleRate: number;
serverOutputAudioSampleRate: number;
serverMonitorAudioSampleRate: number;
serverInputAudioBufferSize: number;
serverOutputAudioBufferSize: number;
serverInputDeviceId: number;
serverOutputDeviceId: number;
serverMonitorDeviceId: number;
serverReadChunkSize: number;
serverInputAudioGain: number;
serverOutputAudioGain: number;
serverMonitorAudioGain: number;
enableServerAudio: number; // 0:off, 1:on
serverAudioStated: number; // 0:off, 1:on
serverAudioSampleRate: number;
serverInputAudioSampleRate: number;
serverOutputAudioSampleRate: number;
serverMonitorAudioSampleRate: number;
serverInputAudioBufferSize: number;
serverOutputAudioBufferSize: number;
serverInputDeviceId: number;
serverOutputDeviceId: number;
serverMonitorDeviceId: number;
serverReadChunkSize: number;
serverInputAudioGain: number;
serverOutputAudioGain: number;
serverMonitorAudioGain: number;
tran: number; // so-vits-svc
noiseScale: number; // so-vits-svc
predictF0: number; // so-vits-svc
silentThreshold: number; // so-vits-svc
extraConvertSize: number; // so-vits-svc
clusterInferRatio: number; // so-vits-svc
tran: number; // so-vits-svc
noiseScale: number; // so-vits-svc
predictF0: number; // so-vits-svc
silentThreshold: number; // so-vits-svc
extraConvertSize: number; // so-vits-svc
clusterInferRatio: number; // so-vits-svc
indexRatio: number; // RVC
protect: number; // RVC
rvcQuality: number; // 0:low, 1:high
silenceFront: number; // 0:off, 1:on
modelSamplingRate: ModelSamplingRate; // 32000,40000,48000
modelSlotIndex: number | StaticModel;
indexRatio: number; // RVC
protect: number; // RVC
rvcQuality: number; // 0:low, 1:high
silenceFront: number; // 0:off, 1:on
modelSamplingRate: ModelSamplingRate; // 32000,40000,48000
modelSlotIndex: number | StaticModel;
useEnhancer: number; // DDSP-SVC
useDiff: number; // DDSP-SVC
// useDiffDpm: number// DDSP-SVC
diffMethod: DiffMethod; // DDSP-SVC
useDiffSilence: number; // DDSP-SVC
diffAcc: number; // DDSP-SVC
diffSpkId: number; // DDSP-SVC
kStep: number; // DDSP-SVC
threshold: number; // DDSP-SVC
useEnhancer: number; // DDSP-SVC
useDiff: number; // DDSP-SVC
// useDiffDpm: number// DDSP-SVC
diffMethod: DiffMethod; // DDSP-SVC
useDiffSilence: number; // DDSP-SVC
diffAcc: number; // DDSP-SVC
diffSpkId: number; // DDSP-SVC
kStep: number; // DDSP-SVC
threshold: number; // DDSP-SVC
speedUp: number; // Diffusion-SVC
skipDiffusion: number; // Diffusion-SVC 0:off, 1:on
speedUp: number; // Diffusion-SVC
skipDiffusion: number; // Diffusion-SVC 0:off, 1:on
inputSampleRate: InputSampleRate;
enableDirectML: number;
inputSampleRate: InputSampleRate;
enableDirectML: number;
};
type ModelSlot = {
slotIndex: number | StaticModel;
voiceChangerType: VoiceChangerType;
name: string;
description: string;
credit: string;
termsOfUseUrl: string;
iconFile: string;
speakers: { [key: number]: string };
slotIndex: number | StaticModel;
voiceChangerType: VoiceChangerType;
name: string;
description: string;
credit: string;
termsOfUseUrl: string;
iconFile: string;
speakers: { [key: number]: string };
};
export type RVCModelSlot = ModelSlot & {
modelFile: string;
indexFile: string;
defaultIndexRatio: number;
defaultProtect: number;
defaultTune: number;
modelType: RVCModelType;
modelFile: string;
indexFile: string;
defaultIndexRatio: number;
defaultProtect: number;
defaultTune: number;
modelType: RVCModelType;
embChannels: number;
f0: boolean;
samplingRate: number;
deprecated: boolean;
embChannels: number;
f0: boolean;
samplingRate: number;
deprecated: boolean;
};
export type MMVCv13ModelSlot = ModelSlot & {
modelFile: string;
configFile: string;
srcId: number;
dstId: number;
modelFile: string;
configFile: string;
srcId: number;
dstId: number;
samplingRate: number;
speakers: { [key: number]: string };
samplingRate: number;
speakers: { [key: number]: string };
};
export type MMVCv15ModelSlot = ModelSlot & {
modelFile: string;
configFile: string;
srcId: number;
dstId: number;
f0Factor: number;
samplingRate: number;
f0: { [key: number]: number };
modelFile: string;
configFile: string;
srcId: number;
dstId: number;
f0Factor: number;
samplingRate: number;
f0: { [key: number]: number };
};
export type SoVitsSvc40ModelSlot = ModelSlot & {
modelFile: string;
configFile: string;
clusterFile: string;
dstId: number;
modelFile: string;
configFile: string;
clusterFile: string;
dstId: number;
samplingRate: number;
samplingRate: number;
defaultTune: number;
defaultClusterInferRatio: number;
noiseScale: number;
speakers: { [key: number]: string };
defaultTune: number;
defaultClusterInferRatio: number;
noiseScale: number;
speakers: { [key: number]: string };
};
export type DDSPSVCModelSlot = ModelSlot & {
modelFile: string;
configFile: string;
diffModelFile: string;
diffConfigFile: string;
dstId: number;
modelFile: string;
configFile: string;
diffModelFile: string;
diffConfigFile: string;
dstId: number;
samplingRate: number;
samplingRate: number;
defaultTune: number;
enhancer: boolean;
diffusion: boolean;
acc: number;
kstep: number;
speakers: { [key: number]: string };
defaultTune: number;
enhancer: boolean;
diffusion: boolean;
acc: number;
kstep: number;
speakers: { [key: number]: string };
};
export type DiffusionSVCModelSlot = ModelSlot & {
modelFile: string;
dstId: number;
modelFile: string;
dstId: number;
samplingRate: number;
samplingRate: number;
defaultTune: number;
defaultKstep: number;
defaultSpeedup: number;
kStepMax: number;
nLayers: number;
nnLayers: number;
speakers: { [key: number]: string };
defaultTune: number;
defaultKstep: number;
defaultSpeedup: number;
kStepMax: number;
nLayers: number;
nnLayers: number;
speakers: { [key: number]: string };
};
export type BeatriceModelSlot = ModelSlot & {
modelFile: string;
dstId: number;
modelFile: string;
dstId: number;
speakers: { [key: number]: string };
speakers: { [key: number]: string };
};
export type LLVCModelSlot = ModelSlot & {
modelFile: string;
configFile: string;
modelFile: string;
configFile: string;
speakers: { [key: number]: string };
speakers: { [key: number]: string };
};
export type WebModelSlot = ModelSlot & {
modelFile: string;
defaultTune: number;
modelType: RVCModelType;
f0: boolean;
samplingRate: number;
modelFile: string;
defaultTune: number;
modelType: RVCModelType;
f0: boolean;
samplingRate: number;
};
export type ModelSlotUnion = RVCModelSlot | MMVCv13ModelSlot | MMVCv15ModelSlot | SoVitsSvc40ModelSlot | DDSPSVCModelSlot | DiffusionSVCModelSlot | BeatriceModelSlot | LLVCModelSlot | WebModelSlot;
export type ModelSlotUnion =
| RVCModelSlot
| MMVCv13ModelSlot
| MMVCv15ModelSlot
| SoVitsSvc40ModelSlot
| DDSPSVCModelSlot
| DiffusionSVCModelSlot
| BeatriceModelSlot
| LLVCModelSlot
| WebModelSlot;
type ServerAudioDevice = {
kind: "audioinput" | "audiooutput";
index: number;
name: string;
hostAPI: string;
kind: "audioinput" | "audiooutput";
index: number;
name: string;
hostAPI: string;
};
export type ServerInfo = VoiceChangerServerSetting & {
// コンフィグ対象外 (getInfoで取得のみ可能な情報)
status: string;
modelSlots: ModelSlotUnion[];
serverAudioInputDevices: ServerAudioDevice[];
serverAudioOutputDevices: ServerAudioDevice[];
sampleModels: (RVCSampleModel | DiffusionSVCSampleModel)[];
gpus: {
id: number;
name: string;
memory: number;
}[];
maxInputLength: number; // MMVCv15
voiceChangerParams: {
model_dir: string;
};
// コンフィグ対象外 (getInfoで取得のみ可能な情報)
status: string;
modelSlots: ModelSlotUnion[];
serverAudioInputDevices: ServerAudioDevice[];
serverAudioOutputDevices: ServerAudioDevice[];
sampleModels: (RVCSampleModel | DiffusionSVCSampleModel)[];
gpus: {
id: number;
name: string;
memory: number;
}[];
maxInputLength: number; // MMVCv15
voiceChangerParams: {
model_dir: string;
};
};
export type SampleModel = {
id: string;
voiceChangerType: VoiceChangerType;
lang: string;
tag: string[];
name: string;
modelUrl: string;
termsOfUseUrl: string;
icon: string;
credit: string;
description: string;
sampleRate: number;
modelType: string;
f0: boolean;
id: string;
voiceChangerType: VoiceChangerType;
lang: string;
tag: string[];
name: string;
modelUrl: string;
termsOfUseUrl: string;
icon: string;
credit: string;
description: string;
sampleRate: number;
modelType: string;
f0: boolean;
};
export type RVCSampleModel = SampleModel & {
indexUrl: string;
featureUrl: string;
indexUrl: string;
featureUrl: string;
};
export type DiffusionSVCSampleModel = SampleModel & {
numOfDiffLayers: number;
numOfNativeLayers: number;
maxKStep: number;
numOfDiffLayers: number;
numOfNativeLayers: number;
maxKStep: number;
};
export const DefaultServerSetting: ServerInfo = {
// VC Common
passThrough: false,
inputSampleRate: 48000,
// VC Common
passThrough: false,
inputSampleRate: 48000,
crossFadeOffsetRate: 0.0,
crossFadeEndRate: 1.0,
crossFadeOverlapSize: CrossFadeOverlapSize[1024],
crossFadeOffsetRate: 0.0,
crossFadeEndRate: 1.0,
crossFadeOverlapSize: CrossFadeOverlapSize[1024],
recordIO: 0,
recordIO: 0,
enableServerAudio: 0,
serverAudioStated: 0,
serverAudioSampleRate: 48000,
serverInputAudioSampleRate: 48000,
serverOutputAudioSampleRate: 48000,
serverMonitorAudioSampleRate: 48000,
serverInputAudioBufferSize: 1024 * 24,
serverOutputAudioBufferSize: 1024 * 24,
serverInputDeviceId: -1,
serverOutputDeviceId: -1,
serverMonitorDeviceId: -1,
serverReadChunkSize: 256,
serverInputAudioGain: 1.0,
serverOutputAudioGain: 1.0,
serverMonitorAudioGain: 1.0,
enableServerAudio: 0,
serverAudioStated: 0,
serverAudioSampleRate: 48000,
serverInputAudioSampleRate: 48000,
serverOutputAudioSampleRate: 48000,
serverMonitorAudioSampleRate: 48000,
serverInputAudioBufferSize: 1024 * 24,
serverOutputAudioBufferSize: 1024 * 24,
serverInputDeviceId: -1,
serverOutputDeviceId: -1,
serverMonitorDeviceId: -1,
serverReadChunkSize: 256,
serverInputAudioGain: 1.0,
serverOutputAudioGain: 1.0,
serverMonitorAudioGain: 1.0,
// VC Specific
srcId: 0,
dstId: 1,
gpu: 0,
// VC Specific
srcId: 0,
dstId: 1,
gpu: 0,
f0Factor: 1.0,
f0Detector: F0Detector.rmvpe_onnx,
f0Factor: 1.0,
f0Detector: F0Detector.rmvpe_onnx,
tran: 0,
noiseScale: 0,
predictF0: 0,
silentThreshold: 0,
extraConvertSize: 0,
clusterInferRatio: 0,
tran: 0,
noiseScale: 0,
predictF0: 0,
silentThreshold: 0,
extraConvertSize: 0,
clusterInferRatio: 0,
indexRatio: 0,
protect: 0.5,
rvcQuality: 0,
modelSamplingRate: 48000,
silenceFront: 1,
modelSlotIndex: 0,
sampleModels: [],
gpus: [],
indexRatio: 0,
protect: 0.5,
rvcQuality: 0,
modelSamplingRate: 48000,
silenceFront: 1,
modelSlotIndex: 0,
sampleModels: [],
gpus: [],
useEnhancer: 0,
useDiff: 1,
diffMethod: "dpm-solver",
useDiffSilence: 0,
diffAcc: 20,
diffSpkId: 1,
kStep: 120,
threshold: -45,
useEnhancer: 0,
useDiff: 1,
diffMethod: "dpm-solver",
useDiffSilence: 0,
diffAcc: 20,
diffSpkId: 1,
kStep: 120,
threshold: -45,
speedUp: 10,
skipDiffusion: 1,
speedUp: 10,
skipDiffusion: 1,
enableDirectML: 0,
//
status: "ok",
modelSlots: [],
serverAudioInputDevices: [],
serverAudioOutputDevices: [],
enableDirectML: 0,
//
status: "ok",
modelSlots: [],
serverAudioInputDevices: [],
serverAudioOutputDevices: [],
maxInputLength: 128 * 2048,
voiceChangerParams: {
model_dir: "",
},
maxInputLength: 128 * 2048,
voiceChangerParams: {
model_dir: "",
},
};
///////////////////////
@ -452,109 +467,113 @@ export const DefaultServerSetting: ServerInfo = {
///////////////////////
export type WorkletSetting = {
numTrancateTreshold: number;
volTrancateThreshold: number;
volTrancateLength: number;
numTrancateTreshold: number;
volTrancateThreshold: number;
volTrancateLength: number;
};
///////////////////////
// Worklet Nodeセッティング
///////////////////////
export const Protocol = {
sio: "sio",
rest: "rest",
internal: "internal",
sio: "sio",
rest: "rest",
internal: "internal",
} as const;
export type Protocol = (typeof Protocol)[keyof typeof Protocol];
export const SendingSampleRate = {
"48000": 48000,
"44100": 44100,
"24000": 24000,
"48000": 48000,
"44100": 44100,
"24000": 24000,
} as const;
export type SendingSampleRate = (typeof SendingSampleRate)[keyof typeof SendingSampleRate];
export type SendingSampleRate =
(typeof SendingSampleRate)[keyof typeof SendingSampleRate];
export const DownSamplingMode = {
decimate: "decimate",
average: "average",
decimate: "decimate",
average: "average",
} as const;
export type DownSamplingMode = (typeof DownSamplingMode)[keyof typeof DownSamplingMode];
export type DownSamplingMode =
(typeof DownSamplingMode)[keyof typeof DownSamplingMode];
export type WorkletNodeSetting = {
serverUrl: string;
protocol: Protocol;
sendingSampleRate: SendingSampleRate;
inputChunkNum: number;
downSamplingMode: DownSamplingMode;
serverUrl: string;
protocol: Protocol;
sendingSampleRate: SendingSampleRate;
inputChunkNum: number;
downSamplingMode: DownSamplingMode;
};
///////////////////////
// クライアントセッティング
///////////////////////
export const SampleRate = {
"48000": 48000,
"48000": 48000,
} as const;
export type SampleRate = (typeof SampleRate)[keyof typeof SampleRate];
export type VoiceChangerClientSetting = {
audioInput: string | MediaStream | null;
sampleRate: SampleRate; // 48000Hz
echoCancel: boolean;
noiseSuppression: boolean;
noiseSuppression2: boolean;
audioInput: string | MediaStream | null;
sampleRate: SampleRate; // 48000Hz
echoCancel: boolean;
noiseSuppression: boolean;
noiseSuppression2: boolean;
inputGain: number;
outputGain: number;
monitorGain: number;
inputGain: number;
outputGain: number;
monitorGain: number;
passThroughConfirmationSkip: boolean;
passThroughConfirmationSkip: boolean;
};
///////////////////////
// Client セッティング
///////////////////////
export type ClientSetting = {
workletSetting: WorkletSetting;
workletNodeSetting: WorkletNodeSetting;
voiceChangerClientSetting: VoiceChangerClientSetting;
workletSetting: WorkletSetting;
workletNodeSetting: WorkletNodeSetting;
voiceChangerClientSetting: VoiceChangerClientSetting;
};
export const DefaultClientSettng: ClientSetting = {
workletSetting: {
// numTrancateTreshold: 512 * 2,
numTrancateTreshold: 100,
volTrancateThreshold: 0.0005,
volTrancateLength: 32,
},
workletNodeSetting: {
serverUrl: "",
protocol: "sio",
sendingSampleRate: 48000,
inputChunkNum: 192,
downSamplingMode: "average",
},
voiceChangerClientSetting: {
audioInput: null,
sampleRate: 48000,
echoCancel: false,
noiseSuppression: false,
noiseSuppression2: false,
inputGain: 1.0,
outputGain: 1.0,
monitorGain: 1.0,
passThroughConfirmationSkip: false,
},
workletSetting: {
// numTrancateTreshold: 512 * 2,
numTrancateTreshold: 100,
volTrancateThreshold: 0.0005,
volTrancateLength: 32,
},
workletNodeSetting: {
serverUrl: "",
protocol: "sio",
sendingSampleRate: 48000,
inputChunkNum: 192,
downSamplingMode: "average",
},
voiceChangerClientSetting: {
audioInput: null,
sampleRate: 48000,
echoCancel: false,
noiseSuppression: false,
noiseSuppression2: false,
inputGain: 1.0,
outputGain: 1.0,
monitorGain: 1.0,
passThroughConfirmationSkip: false,
},
};
////////////////////////////////////
// Exceptions
////////////////////////////////////
export const VOICE_CHANGER_CLIENT_EXCEPTION = {
ERR_SIO_CONNECT_FAILED: "ERR_SIO_CONNECT_FAILED",
ERR_SIO_INVALID_RESPONSE: "ERR_SIO_INVALID_RESPONSE",
ERR_REST_INVALID_RESPONSE: "ERR_REST_INVALID_RESPONSE",
ERR_MIC_STREAM_NOT_INITIALIZED: "ERR_MIC_STREAM_NOT_INITIALIZED",
ERR_INTERNAL_AUDIO_PROCESS_CALLBACK_IS_NOT_INITIALIZED: "ERR_INTERNAL_AUDIO_PROCESS_CALLBACK_IS_NOT_INITIALIZED",
ERR_SIO_CONNECT_FAILED: "ERR_SIO_CONNECT_FAILED",
ERR_SIO_INVALID_RESPONSE: "ERR_SIO_INVALID_RESPONSE",
ERR_REST_INVALID_RESPONSE: "ERR_REST_INVALID_RESPONSE",
ERR_MIC_STREAM_NOT_INITIALIZED: "ERR_MIC_STREAM_NOT_INITIALIZED",
ERR_INTERNAL_AUDIO_PROCESS_CALLBACK_IS_NOT_INITIALIZED:
"ERR_INTERNAL_AUDIO_PROCESS_CALLBACK_IS_NOT_INITIALIZED",
} as const;
export type VOICE_CHANGER_CLIENT_EXCEPTION = (typeof VOICE_CHANGER_CLIENT_EXCEPTION)[keyof typeof VOICE_CHANGER_CLIENT_EXCEPTION];
export type VOICE_CHANGER_CLIENT_EXCEPTION =
(typeof VOICE_CHANGER_CLIENT_EXCEPTION)[keyof typeof VOICE_CHANGER_CLIENT_EXCEPTION];
////////////////////////////////////
// indexedDB
@ -563,22 +582,23 @@ export const INDEXEDDB_DB_APP_NAME = "INDEXEDDB_KEY_VOICE_CHANGER";
export const INDEXEDDB_DB_NAME = "INDEXEDDB_KEY_VOICE_CHANGER_DB";
export const INDEXEDDB_KEY_CLIENT = "INDEXEDDB_KEY_VOICE_CHANGER_LIB_CLIENT";
export const INDEXEDDB_KEY_SERVER = "INDEXEDDB_KEY_VOICE_CHANGER_LIB_SERVER";
export const INDEXEDDB_KEY_MODEL_DATA = "INDEXEDDB_KEY_VOICE_CHANGER_LIB_MODEL_DATA";
export const INDEXEDDB_KEY_MODEL_DATA =
"INDEXEDDB_KEY_VOICE_CHANGER_LIB_MODEL_DATA";
// ONNX
export type OnnxExporterInfo = {
status: string;
path: string;
filename: string;
status: string;
path: string;
filename: string;
};
// Merge
export type MergeElement = {
slotIndex: number;
strength: number;
slotIndex: number;
strength: number;
};
export type MergeModelRequest = {
voiceChangerType: VoiceChangerType;
command: "mix";
files: MergeElement[];
voiceChangerType: VoiceChangerType;
command: "mix";
files: MergeElement[];
};

View File

@ -1,231 +1,279 @@
import { useState, useMemo } from "react";
import { VoiceChangerServerSetting, ServerInfo, ServerSettingKey, OnnxExporterInfo, MergeModelRequest, VoiceChangerType, DefaultServerSetting } from "../const";
import {
VoiceChangerServerSetting,
ServerInfo,
ServerSettingKey,
OnnxExporterInfo,
MergeModelRequest,
VoiceChangerType,
DefaultServerSetting,
} from "../const";
import { VoiceChangerClient } from "../VoiceChangerClient";
export const ModelAssetName = {
iconFile: "iconFile",
iconFile: "iconFile",
} as const;
export type ModelAssetName = (typeof ModelAssetName)[keyof typeof ModelAssetName];
export type ModelAssetName =
(typeof ModelAssetName)[keyof typeof ModelAssetName];
export const ModelFileKind = {
mmvcv13Config: "mmvcv13Config",
mmvcv13Model: "mmvcv13Model",
mmvcv15Config: "mmvcv15Config",
mmvcv15Model: "mmvcv15Model",
mmvcv15Correspondence: "mmvcv15Correspondence",
mmvcv13Config: "mmvcv13Config",
mmvcv13Model: "mmvcv13Model",
mmvcv15Config: "mmvcv15Config",
mmvcv15Model: "mmvcv15Model",
mmvcv15Correspondence: "mmvcv15Correspondence",
soVitsSvc40Config: "soVitsSvc40Config",
soVitsSvc40Model: "soVitsSvc40Model",
soVitsSvc40Cluster: "soVitsSvc40Cluster",
soVitsSvc40Config: "soVitsSvc40Config",
soVitsSvc40Model: "soVitsSvc40Model",
soVitsSvc40Cluster: "soVitsSvc40Cluster",
rvcModel: "rvcModel",
rvcIndex: "rvcIndex",
rvcModel: "rvcModel",
rvcIndex: "rvcIndex",
ddspSvcModel: "ddspSvcModel",
ddspSvcModelConfig: "ddspSvcModelConfig",
ddspSvcDiffusion: "ddspSvcDiffusion",
ddspSvcDiffusionConfig: "ddspSvcDiffusionConfig",
ddspSvcModel: "ddspSvcModel",
ddspSvcModelConfig: "ddspSvcModelConfig",
ddspSvcDiffusion: "ddspSvcDiffusion",
ddspSvcDiffusionConfig: "ddspSvcDiffusionConfig",
diffusionSVCModel: "diffusionSVCModel",
diffusionSVCModel: "diffusionSVCModel",
beatriceModel: "beatriceModel",
beatriceModel: "beatriceModel",
llvcModel: "llvcModel",
llvcConfig: "llvcConfig",
llvcModel: "llvcModel",
llvcConfig: "llvcConfig",
easyVCModel: "easyVCModel",
} as const;
export type ModelFileKind = (typeof ModelFileKind)[keyof typeof ModelFileKind];
export type ModelFile = {
file: File;
kind: ModelFileKind;
dir: string;
file: File;
kind: ModelFileKind;
dir: string;
};
export type ModelUploadSetting = {
voiceChangerType: VoiceChangerType;
slot: number;
isSampleMode: boolean;
sampleId: string | null;
voiceChangerType: VoiceChangerType;
slot: number;
isSampleMode: boolean;
sampleId: string | null;
files: ModelFile[];
params: any;
files: ModelFile[];
params: any;
};
export type ModelFileForServer = Omit<ModelFile, "file"> & {
name: string;
kind: ModelFileKind;
name: string;
kind: ModelFileKind;
};
export type ModelUploadSettingForServer = Omit<ModelUploadSetting, "files"> & {
files: ModelFileForServer[];
files: ModelFileForServer[];
};
type AssetUploadSetting = {
slot: number;
name: ModelAssetName;
file: string;
slot: number;
name: ModelAssetName;
file: string;
};
export type UseServerSettingProps = {
voiceChangerClient: VoiceChangerClient | null;
voiceChangerClient: VoiceChangerClient | null;
};
export type ServerSettingState = {
serverSetting: ServerInfo;
updateServerSettings: (setting: ServerInfo) => Promise<void>;
reloadServerInfo: () => Promise<void>;
serverSetting: ServerInfo;
updateServerSettings: (setting: ServerInfo) => Promise<void>;
reloadServerInfo: () => Promise<void>;
uploadModel: (setting: ModelUploadSetting) => Promise<void>;
uploadProgress: number;
isUploading: boolean;
uploadModel: (setting: ModelUploadSetting) => Promise<void>;
uploadProgress: number;
isUploading: boolean;
getOnnx: () => Promise<OnnxExporterInfo>;
mergeModel: (request: MergeModelRequest) => Promise<ServerInfo>;
updateModelDefault: () => Promise<ServerInfo>;
updateModelInfo: (slot: number, key: string, val: string) => Promise<ServerInfo>;
uploadAssets: (slot: number, name: ModelAssetName, file: File) => Promise<void>;
getOnnx: () => Promise<OnnxExporterInfo>;
mergeModel: (request: MergeModelRequest) => Promise<ServerInfo>;
updateModelDefault: () => Promise<ServerInfo>;
updateModelInfo: (
slot: number,
key: string,
val: string
) => Promise<ServerInfo>;
uploadAssets: (
slot: number,
name: ModelAssetName,
file: File
) => Promise<void>;
};
export const useServerSetting = (props: UseServerSettingProps): ServerSettingState => {
const [serverSetting, _setServerSetting] = useState<ServerInfo>(DefaultServerSetting);
const setServerSetting = (info: ServerInfo) => {
if (!info.modelSlots) {
// サーバが情報を空で返したとき。Web版対策
return;
export const useServerSetting = (
props: UseServerSettingProps
): ServerSettingState => {
const [serverSetting, _setServerSetting] =
useState<ServerInfo>(DefaultServerSetting);
const setServerSetting = (info: ServerInfo) => {
if (!info.modelSlots) {
// サーバが情報を空で返したとき。Web版対策
return;
}
_setServerSetting(info);
};
//////////////
// 設定
/////////////
const updateServerSettings = useMemo(() => {
return async (setting: ServerInfo) => {
if (!props.voiceChangerClient) return;
for (let i = 0; i < Object.values(ServerSettingKey).length; i++) {
const k = Object.values(ServerSettingKey)[
i
] as keyof VoiceChangerServerSetting;
const cur_v = serverSetting[k];
const new_v = setting[k];
if (cur_v != new_v) {
const res = await props.voiceChangerClient.updateServerSettings(
k,
"" + new_v
);
setServerSetting(res);
}
_setServerSetting(info);
}
};
}, [props.voiceChangerClient, serverSetting]);
//////////////
// 設定
/////////////
const updateServerSettings = useMemo(() => {
return async (setting: ServerInfo) => {
if (!props.voiceChangerClient) return;
for (let i = 0; i < Object.values(ServerSettingKey).length; i++) {
const k = Object.values(ServerSettingKey)[i] as keyof VoiceChangerServerSetting;
const cur_v = serverSetting[k];
const new_v = setting[k];
//////////////
// 操作
/////////////
const [uploadProgress, setUploadProgress] = useState<number>(0);
const [isUploading, setIsUploading] = useState<boolean>(false);
if (cur_v != new_v) {
const res = await props.voiceChangerClient.updateServerSettings(k, "" + new_v);
setServerSetting(res);
}
}
};
}, [props.voiceChangerClient, serverSetting]);
//////////////
// 操作
/////////////
const [uploadProgress, setUploadProgress] = useState<number>(0);
const [isUploading, setIsUploading] = useState<boolean>(false);
// (e) モデルアップロード
const _uploadFile2 = useMemo(() => {
return async (file: File, onprogress: (progress: number, end: boolean) => void, dir: string = "") => {
if (!props.voiceChangerClient) return;
const num = await props.voiceChangerClient.uploadFile2(dir, file, onprogress);
const res = await props.voiceChangerClient.concatUploadedFile(dir + file.name, num);
console.log("uploaded", num, res);
};
}, [props.voiceChangerClient]);
// 新しいアップローダ
const uploadModel = useMemo(() => {
return async (setting: ModelUploadSetting) => {
if (!props.voiceChangerClient) {
return;
}
setUploadProgress(0);
setIsUploading(true);
if (setting.isSampleMode == false) {
const progRate = 1 / setting.files.length;
for (let i = 0; i < setting.files.length; i++) {
const progOffset = 100 * i * progRate;
await _uploadFile2(
setting.files[i].file,
(progress: number, _end: boolean) => {
setUploadProgress(progress * progRate + progOffset);
},
setting.files[i].dir
);
}
}
const params: ModelUploadSettingForServer = {
...setting,
files: setting.files.map((f) => {
return { name: f.file.name, kind: f.kind, dir: f.dir };
}),
};
const loadPromise = props.voiceChangerClient.loadModel(0, false, JSON.stringify(params));
await loadPromise;
setUploadProgress(0);
setIsUploading(false);
reloadServerInfo();
};
}, [props.voiceChangerClient]);
const uploadAssets = useMemo(() => {
return async (slot: number, name: ModelAssetName, file: File) => {
if (!props.voiceChangerClient) return;
await _uploadFile2(file, (progress: number, _end: boolean) => {
console.log(progress, _end);
});
const assetUploadSetting: AssetUploadSetting = {
slot,
name,
file: file.name,
};
await props.voiceChangerClient.uploadAssets(JSON.stringify(assetUploadSetting));
reloadServerInfo();
};
}, [props.voiceChangerClient]);
const reloadServerInfo = useMemo(() => {
return async () => {
if (!props.voiceChangerClient) return;
const res = await props.voiceChangerClient.getServerSettings();
setServerSetting(res);
};
}, [props.voiceChangerClient]);
const getOnnx = async () => {
return props.voiceChangerClient!.getOnnx();
// (e) モデルアップロード
const _uploadFile2 = useMemo(() => {
return async (
file: File,
onprogress: (progress: number, end: boolean) => void,
dir: string = ""
) => {
if (!props.voiceChangerClient) return;
const num = await props.voiceChangerClient.uploadFile2(
dir,
file,
onprogress
);
const res = await props.voiceChangerClient.concatUploadedFile(
dir + file.name,
num
);
console.log("uploaded", num, res);
};
}, [props.voiceChangerClient]);
const mergeModel = async (request: MergeModelRequest) => {
const serverInfo = await props.voiceChangerClient!.mergeModel(request);
setServerSetting(serverInfo);
return serverInfo;
};
// 新しいアップローダ
const uploadModel = useMemo(() => {
return async (setting: ModelUploadSetting) => {
if (!props.voiceChangerClient) {
return;
}
const updateModelDefault = async () => {
const serverInfo = await props.voiceChangerClient!.updateModelDefault();
setServerSetting(serverInfo);
return serverInfo;
};
const updateModelInfo = async (slot: number, key: string, val: string) => {
const serverInfo = await props.voiceChangerClient!.updateModelInfo(slot, key, val);
setServerSetting(serverInfo);
return serverInfo;
};
setUploadProgress(0);
setIsUploading(true);
return {
serverSetting,
updateServerSettings,
reloadServerInfo,
if (setting.isSampleMode == false) {
const progRate = 1 / setting.files.length;
for (let i = 0; i < setting.files.length; i++) {
const progOffset = 100 * i * progRate;
await _uploadFile2(
setting.files[i].file,
(progress: number, _end: boolean) => {
setUploadProgress(progress * progRate + progOffset);
},
setting.files[i].dir
);
}
}
const params: ModelUploadSettingForServer = {
...setting,
files: setting.files.map((f) => {
return { name: f.file.name, kind: f.kind, dir: f.dir };
}),
};
uploadModel,
uploadProgress,
isUploading,
getOnnx,
mergeModel,
updateModelDefault,
updateModelInfo,
uploadAssets,
const loadPromise = props.voiceChangerClient.loadModel(
0,
false,
JSON.stringify(params)
);
await loadPromise;
setUploadProgress(0);
setIsUploading(false);
reloadServerInfo();
};
}, [props.voiceChangerClient]);
const uploadAssets = useMemo(() => {
return async (slot: number, name: ModelAssetName, file: File) => {
if (!props.voiceChangerClient) return;
await _uploadFile2(file, (progress: number, _end: boolean) => {
console.log(progress, _end);
});
const assetUploadSetting: AssetUploadSetting = {
slot,
name,
file: file.name,
};
await props.voiceChangerClient.uploadAssets(
JSON.stringify(assetUploadSetting)
);
reloadServerInfo();
};
}, [props.voiceChangerClient]);
const reloadServerInfo = useMemo(() => {
return async () => {
if (!props.voiceChangerClient) return;
const res = await props.voiceChangerClient.getServerSettings();
setServerSetting(res);
};
}, [props.voiceChangerClient]);
const getOnnx = async () => {
return props.voiceChangerClient!.getOnnx();
};
const mergeModel = async (request: MergeModelRequest) => {
const serverInfo = await props.voiceChangerClient!.mergeModel(request);
setServerSetting(serverInfo);
return serverInfo;
};
const updateModelDefault = async () => {
const serverInfo = await props.voiceChangerClient!.updateModelDefault();
setServerSetting(serverInfo);
return serverInfo;
};
const updateModelInfo = async (slot: number, key: string, val: string) => {
const serverInfo = await props.voiceChangerClient!.updateModelInfo(
slot,
key,
val
);
setServerSetting(serverInfo);
return serverInfo;
};
return {
serverSetting,
updateServerSettings,
reloadServerInfo,
uploadModel,
uploadProgress,
isUploading,
getOnnx,
mergeModel,
updateModelDefault,
updateModelInfo,
uploadAssets,
};
};

View File

@ -58,6 +58,7 @@ def setupArgParser():
parser.add_argument("--hubert_base", type=str, default="pretrain/hubert_base.pt", help="path to hubert_base model(pytorch)")
parser.add_argument("--hubert_base_jp", type=str, default="pretrain/rinna_hubert_base_jp.pt", help="path to hubert_base_jp model(pytorch)")
parser.add_argument("--hubert_soft", type=str, default="pretrain/hubert/hubert-soft-0d54a1f4.pt", help="path to hubert_soft model(pytorch)")
parser.add_argument("--whisper_tiny", type=str, default="pretrain/whisper_tiny.pt", help="path to hubert_soft model(pytorch)")
parser.add_argument("--nsf_hifigan", type=str, default="pretrain/nsf_hifigan/model", help="path to nsf_hifigan model(pytorch)")
parser.add_argument("--crepe_onnx_full", type=str, default="pretrain/crepe_onnx_full.onnx", help="path to crepe_onnx_full")
parser.add_argument("--crepe_onnx_tiny", type=str, default="pretrain/crepe_onnx_tiny.onnx", help="path to crepe_onnx_tiny")
@ -106,6 +107,7 @@ voiceChangerParams = VoiceChangerParams(
rmvpe=args.rmvpe,
rmvpe_onnx=args.rmvpe_onnx,
sample_mode=args.sample_mode,
whisper_tiny=args.whisper_tiny,
)
vcparams = VoiceChangerParamsManager.get_instance()
vcparams.setParams(voiceChangerParams)

View File

@ -14,6 +14,7 @@ VoiceChangerType: TypeAlias = Literal[
"Diffusion-SVC",
"Beatrice",
"LLVC",
"EasyVC",
]
StaticSlot: TypeAlias = Literal["Beatrice-JVS",]
@ -56,7 +57,12 @@ def getFrontendPath():
return frontend_path
EmbedderType: TypeAlias = Literal["hubert_base", "contentvec", "hubert-base-japanese"]
EmbedderType: TypeAlias = Literal[
"hubert_base",
"contentvec",
"hubert-base-japanese",
"whisper",
]
class EnumInferenceTypes(Enum):
@ -70,6 +76,8 @@ class EnumInferenceTypes(Enum):
onnxRVC = "onnxRVC"
onnxRVCNono = "onnxRVCNono"
easyVC = "easyVC"
DiffusionSVCInferenceType: TypeAlias = Literal["combo",]

View File

@ -141,6 +141,14 @@ class LLVCModelSlot(ModelSlot):
configFile: str = ""
@dataclass
class EasyVCModelSlot(ModelSlot):
voiceChangerType: VoiceChangerType = "EasyVC"
modelFile: str = ""
version: str = ""
samplingRate: int = -1
ModelSlots: TypeAlias = Union[
ModelSlot,
RVCModelSlot,
@ -151,6 +159,7 @@ ModelSlots: TypeAlias = Union[
DiffusionSVCModelSlot,
BeatriceModelSlot,
LLVCModelSlot,
EasyVCModelSlot,
]
@ -188,6 +197,9 @@ def loadSlotInfo(model_dir: str, slotIndex: int | StaticSlot) -> ModelSlots:
elif slotInfo.voiceChangerType == "LLVC":
slotInfoKey.extend(list(LLVCModelSlot.__annotations__.keys()))
return LLVCModelSlot(**{k: v for k, v in jsonDict.items() if k in slotInfoKey})
elif slotInfo.voiceChangerType == "EasyVC":
slotInfoKey.extend(list(EasyVCModelSlot.__annotations__.keys()))
return EasyVCModelSlot(**{k: v for k, v in jsonDict.items() if k in slotInfoKey})
else:
return ModelSlot()

View File

@ -19,9 +19,19 @@ def downloadWeight(voiceChangerParams: VoiceChangerParams):
crepe_onnx_tiny = voiceChangerParams.crepe_onnx_tiny
rmvpe = voiceChangerParams.rmvpe
rmvpe_onnx = voiceChangerParams.rmvpe_onnx
whisper_tiny = voiceChangerParams.whisper_tiny
weight_files = [content_vec_500_onnx, hubert_base, hubert_base_jp, hubert_soft,
nsf_hifigan, crepe_onnx_full, crepe_onnx_tiny, rmvpe]
weight_files = [
content_vec_500_onnx,
hubert_base,
hubert_base_jp,
hubert_soft,
nsf_hifigan,
crepe_onnx_full,
crepe_onnx_tiny,
rmvpe,
whisper_tiny,
]
# file exists check (currently only for rvc)
downloadParams = []
@ -119,6 +129,15 @@ def downloadWeight(voiceChangerParams: VoiceChangerParams):
}
)
if os.path.exists(whisper_tiny) is False:
downloadParams.append(
{
"url": "https://openaipublic.azureedge.net/main/whisper/models/65147644a518d12f04e32d6f3b26facc3f8dd46e5390956a9424a650c0ce22b9/tiny.pt",
"saveTo": whisper_tiny,
"position": 10,
}
)
with ThreadPoolExecutor() as pool:
pool.map(download, downloadParams)

View File

@ -0,0 +1,326 @@
"""
VoiceChangerV2向け
"""
from dataclasses import asdict
import numpy as np
import torch
from data.ModelSlot import RVCModelSlot
from mods.log_control import VoiceChangaerLogger
from voice_changer.EasyVC.EasyVCSettings import EasyVCSettings
from voice_changer.EasyVC.pipeline.Pipeline import Pipeline
from voice_changer.EasyVC.pipeline.PipelineGenerator import createPipeline
from voice_changer.RVC.RVCSettings import RVCSettings
from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager
from voice_changer.utils.Timer import Timer2
from voice_changer.utils.VoiceChangerModel import (
AudioInOut,
PitchfInOut,
FeatureInOut,
VoiceChangerModel,
)
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
from voice_changer.RVC.onnxExporter.export2onnx import export2onnx
from voice_changer.RVC.pitchExtractor.PitchExtractorManager import PitchExtractorManager
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
from Exceptions import (
DeviceCannotSupportHalfPrecisionException,
PipelineCreateException,
PipelineNotInitializedException,
)
import resampy
from typing import cast
logger = VoiceChangaerLogger.get_instance().getLogger()
class EasyVC(VoiceChangerModel):
def __init__(self, params: VoiceChangerParams, slotInfo: RVCModelSlot):
logger.info("[Voice Changer] [EasyVC] Creating instance ")
self.voiceChangerType = "RVC"
self.deviceManager = DeviceManager.get_instance()
EmbedderManager.initialize(params)
PitchExtractorManager.initialize(params)
self.settings = EasyVCSettings()
self.params = params
# self.pitchExtractor = PitchExtractorManager.getPitchExtractor(self.settings.f0Detector, self.settings.gpu)
self.pipeline: Pipeline | None = None
self.audio_buffer: AudioInOut | None = None
self.pitchf_buffer: PitchfInOut | None = None
self.feature_buffer: FeatureInOut | None = None
self.prevVol = 0.0
self.slotInfo = slotInfo
# self.initialize()
def initialize(self):
logger.info("[Voice Changer][EasyVC] Initializing... ")
# pipelineの生成
try:
self.pipeline = createPipeline(self.params, self.slotInfo, self.settings.gpu, self.settings.f0Detector)
except PipelineCreateException as e: # NOQA
logger.error("[Voice Changer] pipeline create failed. check your model is valid.")
return
# その他の設定
logger.info("[Voice Changer] [EasyVC] Initializing... done")
def setSamplingRate(self, inputSampleRate, outputSampleRate):
self.inputSampleRate = inputSampleRate
self.outputSampleRate = outputSampleRate
# self.initialize()
def update_settings(self, key: str, val: int | float | str):
logger.info(f"[Voice Changer][RVC]: update_settings {key}:{val}")
if key in self.settings.intData:
setattr(self.settings, key, int(val))
if key == "gpu":
self.deviceManager.setForceTensor(False)
self.initialize()
elif key in self.settings.floatData:
setattr(self.settings, key, float(val))
elif key in self.settings.strData:
setattr(self.settings, key, str(val))
if key == "f0Detector" and self.pipeline is not None:
pitchExtractor = PitchExtractorManager.getPitchExtractor(self.settings.f0Detector, self.settings.gpu)
self.pipeline.setPitchExtractor(pitchExtractor)
else:
return False
return True
def get_info(self):
data = asdict(self.settings)
if self.pipeline is not None:
pipelineInfo = self.pipeline.getPipelineInfo()
data["pipelineInfo"] = pipelineInfo
else:
data["pipelineInfo"] = "None"
return data
def get_processing_sampling_rate(self):
return self.slotInfo.samplingRate
def generate_input(
self,
newData: AudioInOut,
crossfadeSize: int,
solaSearchFrame: int,
extra_frame: int,
):
# 16k で入ってくる。
inputSize = newData.shape[0]
newData = newData.astype(np.float32) / 32768.0
newFeatureLength = inputSize // 160 # hopsize:=160
if self.audio_buffer is not None:
# 過去のデータに連結
self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0)
# if self.slotInfo.f0:
# self.pitchf_buffer = np.concatenate([self.pitchf_buffer, np.zeros(newFeatureLength)], 0)
self.feature_buffer = np.concatenate(
[
self.feature_buffer,
# np.zeros([newFeatureLength, self.slotInfo.embChannels]),
np.zeros([newFeatureLength, 768]),
],
0,
)
else:
self.audio_buffer = newData
# if self.slotInfo.f0:
# self.pitchf_buffer = np.zeros(newFeatureLength)
self.feature_buffer = np.zeros([newFeatureLength, 768])
convertSize = inputSize + crossfadeSize + solaSearchFrame + extra_frame
if convertSize % 160 != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
convertSize = convertSize + (160 - (convertSize % 160))
outSize = int(((convertSize - extra_frame) / 16000) * self.slotInfo.samplingRate)
# バッファがたまっていない場合はzeroで補う
if self.audio_buffer.shape[0] < convertSize:
self.audio_buffer = np.concatenate([np.zeros([convertSize]), self.audio_buffer])
# if self.slotInfo.f0:
# self.pitchf_buffer = np.concatenate([np.zeros([convertSize // 160]), self.pitchf_buffer])
self.feature_buffer = np.concatenate(
[
np.zeros([convertSize // 160, 768]),
self.feature_buffer,
]
)
# 不要部分をトリミング
convertOffset = -1 * convertSize
featureOffset = convertOffset // 160
self.audio_buffer = self.audio_buffer[convertOffset:] # 変換対象の部分だけ抽出
# if self.slotInfo.f0:
# self.pitchf_buffer = self.pitchf_buffer[featureOffset:]
self.feature_buffer = self.feature_buffer[featureOffset:]
# 出力部分だけ切り出して音量を確認。(TODO:段階的消音にする)
cropOffset = -1 * (inputSize + crossfadeSize)
cropEnd = -1 * (crossfadeSize)
crop = self.audio_buffer[cropOffset:cropEnd]
vol = np.sqrt(np.square(crop).mean())
vol = max(vol, self.prevVol * 0.0)
self.prevVol = vol
return (
self.audio_buffer,
self.pitchf_buffer,
self.feature_buffer,
convertSize,
vol,
outSize,
)
def inference(self, receivedData: AudioInOut, crossfade_frame: int, sola_search_frame: int):
if self.pipeline is None:
logger.info("[Voice Changer] Pipeline is not initialized.")
raise PipelineNotInitializedException()
enableTimer = True
with Timer2("infer_easyvc", enableTimer) as t:
# 処理は16Kで実施(Pitch, embed, (infer))
receivedData = cast(
AudioInOut,
resampy.resample(
receivedData,
self.inputSampleRate,
16000,
filter="kaiser_fast",
),
)
crossfade_frame = int((crossfade_frame / self.inputSampleRate) * 16000)
sola_search_frame = int((sola_search_frame / self.inputSampleRate) * 16000)
extra_frame = int((self.settings.extraConvertSize / self.inputSampleRate) * 16000)
# 入力データ生成
data = self.generate_input(receivedData, crossfade_frame, sola_search_frame, extra_frame)
t.record("generate_input")
audio = data[0]
pitchf = data[1]
feature = data[2]
convertSize = data[3]
vol = data[4]
outSize = data[5]
if vol < self.settings.silentThreshold:
return np.zeros(convertSize).astype(np.int16) * np.sqrt(vol)
device = self.pipeline.device
audio = torch.from_numpy(audio).to(device=device, dtype=torch.float32)
repeat = 0
sid = self.settings.dstId
f0_up_key = self.settings.tran
index_rate = self.settings.indexRatio
protect = self.settings.protect
# if_f0 = 1 if self.slotInfo.f0 else 0
if_f0 = 0
# embOutputLayer = self.slotInfo.embOutputLayer
# useFinalProj = self.slotInfo.useFinalProj
t.record("pre-process")
try:
audio_out, self.pitchf_buffer, self.feature_buffer = self.pipeline.exec(
sid,
audio,
pitchf,
feature,
f0_up_key,
index_rate,
if_f0,
# 0,
self.settings.extraConvertSize / self.inputSampleRate if self.settings.silenceFront else 0.0, # extaraDataSizeの秒数。入力のサンプリングレートで算出
repeat,
outSize,
)
t.record("pipeline-exec")
# result = audio_out.detach().cpu().numpy() * np.sqrt(vol)
result = audio_out[-outSize:].detach().cpu().numpy() * np.sqrt(vol)
result = cast(
AudioInOut,
resampy.resample(
result,
16000,
self.outputSampleRate,
filter="kaiser_fast",
),
)
t.record("resample")
return result
except DeviceCannotSupportHalfPrecisionException as e: # NOQA
logger.warn("[Device Manager] Device cannot support half precision. Fallback to float....")
self.deviceManager.setForceTensor(True)
self.initialize()
# raise e
return
def __del__(self):
del self.pipeline
# print("---------- REMOVING ---------------")
# remove_path = os.path.join("RVC")
# sys.path = [x for x in sys.path if x.endswith(remove_path) is False]
# for key in list(sys.modules):
# val = sys.modules.get(key)
# try:
# file_path = val.__file__
# if file_path.find("RVC" + os.path.sep) >= 0:
# # print("remove", key, file_path)
# sys.modules.pop(key)
# except Exception: # type:ignore
# # print(e)
# pass
def export2onnx(self):
modelSlot = self.slotInfo
if modelSlot.isONNX:
logger.warn("[Voice Changer] export2onnx, No pyTorch filepath.")
return {"status": "ng", "path": ""}
if self.pipeline is not None:
del self.pipeline
self.pipeline = None
torch.cuda.empty_cache()
self.initialize()
output_file_simple = export2onnx(self.settings.gpu, modelSlot)
return {
"status": "ok",
"path": f"/tmp/{output_file_simple}",
"filename": output_file_simple,
}
def get_model_current(self):
return [
{
"key": "defaultTune",
"val": self.settings.tran,
},
{
"key": "defaultIndexRatio",
"val": self.settings.indexRatio,
},
{
"key": "defaultProtect",
"val": self.settings.protect,
},
]

View File

@ -0,0 +1,17 @@
import os
from data.ModelSlot import EasyVCModelSlot
from voice_changer.utils.LoadModelParams import LoadModelParams
from voice_changer.utils.ModelSlotGenerator import ModelSlotGenerator
class EasyVCModelSlotGenerator(ModelSlotGenerator):
@classmethod
def loadModel(cls, props: LoadModelParams):
slotInfo: EasyVCModelSlot = EasyVCModelSlot()
for file in props.files:
if file.kind == "easyVCModel":
slotInfo.modelFile = file.name
slotInfo.name = os.path.splitext(os.path.basename(slotInfo.modelFile))[0]
slotInfo.slotIndex = props.slot
return slotInfo

View File

@ -0,0 +1,33 @@
from dataclasses import dataclass, field
from const import PitchExtractorType
@dataclass
class EasyVCSettings:
gpu: int = -9999
dstId: int = 0
f0Detector: PitchExtractorType = "rmvpe_onnx" # dio or harvest
tran: int = 12
silentThreshold: float = 0.00001
extraConvertSize: int = 1024 * 4
indexRatio: float = 0
protect: float = 0.5
rvcQuality: int = 0
silenceFront: int = 1 # 0:off, 1:on
modelSamplingRate: int = 48000
speakers: dict[str, int] = field(default_factory=lambda: {})
intData = [
"gpu",
"dstId",
"tran",
"extraConvertSize",
"rvcQuality",
"silenceFront",
]
floatData = ["silentThreshold", "indexRatio", "protect"]
strData = ["f0Detector"]

View File

@ -0,0 +1,237 @@
from typing import Any
import math
import torch
import torch.nn.functional as F
from torch.cuda.amp import autocast
from Exceptions import (
DeviceCannotSupportHalfPrecisionException,
DeviceChangingException,
HalfPrecisionChangingException,
NotEnoughDataExtimateF0,
)
from mods.log_control import VoiceChangaerLogger
from voice_changer.RVC.embedder.Embedder import Embedder
from voice_changer.RVC.inferencer.Inferencer import Inferencer
from voice_changer.RVC.inferencer.OnnxRVCInferencer import OnnxRVCInferencer
from voice_changer.RVC.inferencer.OnnxRVCInferencerNono import OnnxRVCInferencerNono
from voice_changer.RVC.pitchExtractor.PitchExtractor import PitchExtractor
from voice_changer.utils.Timer import Timer2
logger = VoiceChangaerLogger.get_instance().getLogger()
class Pipeline(object):
embedder: Embedder
inferencer: Inferencer
pitchExtractor: PitchExtractor
index: Any | None
big_npy: Any | None
# feature: Any | None
targetSR: int
device: torch.device
isHalf: bool
def __init__(
self,
embedder: Embedder,
inferencer: Inferencer,
pitchExtractor: PitchExtractor,
targetSR,
device,
isHalf,
):
self.embedder = embedder
self.inferencer = inferencer
self.pitchExtractor = pitchExtractor
logger.info("GENERATE INFERENCER" + str(self.inferencer))
logger.info("GENERATE EMBEDDER" + str(self.embedder))
logger.info("GENERATE PITCH EXTRACTOR" + str(self.pitchExtractor))
self.targetSR = targetSR
self.device = device
self.isHalf = isHalf
self.sr = 16000
self.window = 160
def getPipelineInfo(self):
inferencerInfo = self.inferencer.getInferencerInfo() if self.inferencer else {}
embedderInfo = self.embedder.getEmbedderInfo()
pitchExtractorInfo = self.pitchExtractor.getPitchExtractorInfo()
return {"inferencer": inferencerInfo, "embedder": embedderInfo, "pitchExtractor": pitchExtractorInfo, "isHalf": self.isHalf}
def setPitchExtractor(self, pitchExtractor: PitchExtractor):
self.pitchExtractor = pitchExtractor
def extractPitch(self, audio_pad, if_f0, pitchf, f0_up_key, silence_front):
try:
if if_f0 == 1:
pitch, pitchf = self.pitchExtractor.extract(
audio_pad,
pitchf,
f0_up_key,
self.sr,
self.window,
silence_front=silence_front,
)
# pitch = pitch[:p_len]
# pitchf = pitchf[:p_len]
pitch = torch.tensor(pitch, device=self.device).unsqueeze(0).long()
pitchf = torch.tensor(pitchf, device=self.device, dtype=torch.float).unsqueeze(0)
else:
pitch = None
pitchf = None
except IndexError as e: # NOQA
print(e)
import traceback
traceback.print_exc()
raise NotEnoughDataExtimateF0()
return pitch, pitchf
def extractFeatures(self, feats):
with autocast(enabled=self.isHalf):
try:
feats = self.embedder.extractFeatures(feats)
if torch.isnan(feats).all():
raise DeviceCannotSupportHalfPrecisionException()
return feats
except RuntimeError as e:
if "HALF" in e.__str__().upper():
raise HalfPrecisionChangingException()
elif "same device" in e.__str__():
raise DeviceChangingException()
else:
raise e
def infer(self, feats, p_len, pitch, pitchf, sid, out_size):
try:
with torch.no_grad():
with autocast(enabled=self.isHalf):
audio1 = self.inferencer.infer(feats, p_len, pitch, pitchf, sid, out_size)
audio1 = (audio1 * 32767.5).data.to(dtype=torch.int16)
return audio1
except RuntimeError as e:
if "HALF" in e.__str__().upper():
print("HalfPresicion Error:", e)
raise HalfPrecisionChangingException()
else:
raise e
def exec(
self,
sid,
audio, # torch.tensor [n]
pitchf, # np.array [m]
feature, # np.array [m, feat]
f0_up_key,
index_rate,
if_f0,
silence_front,
repeat,
out_size=None,
):
# print(f"pipeline exec input, audio:{audio.shape}, pitchf:{pitchf.shape}, feature:{feature.shape}")
# print(f"pipeline exec input, silence_front:{silence_front}, out_size:{out_size}")
enablePipelineTimer = True
with Timer2("Pipeline-Exec", enablePipelineTimer) as t: # NOQA
# 16000のサンプリングレートで入ってきている。以降この世界は16000で処理。
# self.t_pad = self.sr * repeat # 1秒
# self.t_pad_tgt = self.targetSR * repeat # 1秒 出力時のトリミング(モデルのサンプリングで出力される)
audio = audio.unsqueeze(0)
quality_padding_sec = (repeat * (audio.shape[1] - 1)) / self.sr # padding(reflect)のサイズは元のサイズより小さい必要がある。
self.t_pad = round(self.sr * quality_padding_sec) # 前後に音声を追加
self.t_pad_tgt = round(self.targetSR * quality_padding_sec) # 前後に音声を追加 出力時のトリミング(モデルのサンプリングで出力される)
audio_pad = F.pad(audio, (self.t_pad, self.t_pad), mode="reflect").squeeze(0)
p_len = audio_pad.shape[0] // self.window
sid = torch.tensor(sid, device=self.device).unsqueeze(0).long()
# # RVC QualityがOnのときにはsilence_frontをオフに。
# silence_front = silence_front if repeat == 0 else 0
# pitchf = pitchf if repeat == 0 else np.zeros(p_len)
# out_size = out_size if repeat == 0 else None
# tensor型調整
feats = audio_pad
if feats.dim() == 2: # double channels
feats = feats.mean(-1)
assert feats.dim() == 1, feats.dim()
feats = feats.view(1, -1)
t.record("pre-process")
# ピッチ検出
pitch, pitchf = self.extractPitch(audio_pad, if_f0, pitchf, f0_up_key, silence_front)
t.record("extract-pitch")
# embedding
feats = self.extractFeatures(feats)
t.record("extract-feats")
feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
# if protect < 0.5 and search_index:
# feats0 = feats.clone()
# ピッチサイズ調整
p_len = audio_pad.shape[0] // self.window
if feats.shape[1] < p_len:
p_len = feats.shape[1]
if pitch is not None and pitchf is not None:
pitch = pitch[:, :p_len]
pitchf = pitchf[:, :p_len]
feats_len = feats.shape[1]
if pitch is not None and pitchf is not None:
pitch = pitch[:, -feats_len:]
pitchf = pitchf[:, -feats_len:]
p_len = torch.tensor([feats_len], device=self.device).long()
# apply silent front for inference
if type(self.inferencer) in [OnnxRVCInferencer, OnnxRVCInferencerNono]:
npyOffset = math.floor(silence_front * 16000) // 360
feats = feats[:, npyOffset * 2 :, :] # NOQA
feats_len = feats.shape[1]
if pitch is not None and pitchf is not None:
pitch = pitch[:, -feats_len:]
pitchf = pitchf[:, -feats_len:]
p_len = torch.tensor([feats_len], device=self.device).long()
t.record("mid-precess")
# 推論実行
audio1 = self.infer(feats, p_len, pitch, pitchf, sid, out_size)
t.record("infer")
feats_buffer = feats.squeeze(0).detach().cpu()
if pitchf is not None:
pitchf_buffer = pitchf.squeeze(0).detach().cpu()
else:
pitchf_buffer = None
del p_len, pitch, pitchf, feats
# torch.cuda.empty_cache()
# inferで出力されるサンプリングレートはモデルのサンプリングレートになる。
# pipelineに入力されるときはhubertように16k
if self.t_pad_tgt != 0:
offset = self.t_pad_tgt
end = -1 * self.t_pad_tgt
audio1 = audio1[offset:end]
del sid
t.record("post-process")
# torch.cuda.empty_cache()
# print("EXEC AVERAGE:", t.avrSecs)
return audio1, pitchf_buffer, feats_buffer
def __del__(self):
del self.embedder
del self.inferencer
del self.pitchExtractor
print("Pipeline has been deleted")

View File

@ -0,0 +1,52 @@
import os
import traceback
from Exceptions import PipelineCreateException
from const import EnumInferenceTypes, PitchExtractorType
from data.ModelSlot import EasyVCModelSlot
from voice_changer.EasyVC.pipeline.Pipeline import Pipeline
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager
from voice_changer.RVC.inferencer.InferencerManager import InferencerManager
from voice_changer.RVC.pitchExtractor.PitchExtractorManager import PitchExtractorManager
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
def createPipeline(params: VoiceChangerParams, modelSlot: EasyVCModelSlot, gpu: int, f0Detector: PitchExtractorType):
dev = DeviceManager.get_instance().getDevice(gpu)
half = DeviceManager.get_instance().halfPrecisionAvailable(gpu)
# Inferencer 生成
try:
modelPath = os.path.join(params.model_dir, str(modelSlot.slotIndex), os.path.basename(modelSlot.modelFile))
inferencer = InferencerManager.getInferencer(EnumInferenceTypes.easyVC, modelPath, gpu, modelSlot.version)
except Exception as e:
print("[Voice Changer] exception! loading inferencer", e)
traceback.print_exc()
raise PipelineCreateException("[Voice Changer] exception! loading inferencer")
# Embedder 生成
try:
embedder = EmbedderManager.getEmbedder(
"whisper",
half,
dev,
)
except Exception as e:
print("[Voice Changer] exception! loading embedder", e, dev)
traceback.print_exc()
raise PipelineCreateException("[Voice Changer] exception! loading embedder")
# pitchExtractor
pitchExtractor = PitchExtractorManager.getPitchExtractor(f0Detector, gpu)
pipeline = Pipeline(
embedder,
inferencer,
pitchExtractor,
modelSlot.samplingRate,
dev,
half,
)
return pipeline

View File

@ -1,6 +1,6 @@
import os
from data.ModelSlot import BeatriceModelSlot, LLVCModelSlot
from data.ModelSlot import LLVCModelSlot
from voice_changer.utils.LoadModelParams import LoadModelParams
from voice_changer.utils.ModelSlotGenerator import ModelSlotGenerator

View File

@ -1,6 +1,7 @@
"""
VoiceChangerV2向け
"""
from dataclasses import asdict
import numpy as np
import torch
@ -59,13 +60,9 @@ class RVCr2(VoiceChangerModel):
# pipelineの生成
try:
self.pipeline = createPipeline(
self.params, self.slotInfo, self.settings.gpu, self.settings.f0Detector
)
self.pipeline = createPipeline(self.params, self.slotInfo, self.settings.gpu, self.settings.f0Detector)
except PipelineCreateException as e: # NOQA
logger.error(
"[Voice Changer] pipeline create failed. check your model is valid."
)
logger.error("[Voice Changer] pipeline create failed. check your model is valid.")
return
# その他の設定
@ -91,9 +88,7 @@ class RVCr2(VoiceChangerModel):
elif key in self.settings.strData:
setattr(self.settings, key, str(val))
if key == "f0Detector" and self.pipeline is not None:
pitchExtractor = PitchExtractorManager.getPitchExtractor(
self.settings.f0Detector, self.settings.gpu
)
pitchExtractor = PitchExtractorManager.getPitchExtractor(self.settings.f0Detector, self.settings.gpu)
self.pipeline.setPitchExtractor(pitchExtractor)
else:
return False
@ -127,9 +122,7 @@ class RVCr2(VoiceChangerModel):
# 過去のデータに連結
self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0)
if self.slotInfo.f0:
self.pitchf_buffer = np.concatenate(
[self.pitchf_buffer, np.zeros(newFeatureLength)], 0
)
self.pitchf_buffer = np.concatenate([self.pitchf_buffer, np.zeros(newFeatureLength)], 0)
self.feature_buffer = np.concatenate(
[
self.feature_buffer,
@ -141,27 +134,19 @@ class RVCr2(VoiceChangerModel):
self.audio_buffer = newData
if self.slotInfo.f0:
self.pitchf_buffer = np.zeros(newFeatureLength)
self.feature_buffer = np.zeros(
[newFeatureLength, self.slotInfo.embChannels]
)
self.feature_buffer = np.zeros([newFeatureLength, self.slotInfo.embChannels])
convertSize = inputSize + crossfadeSize + solaSearchFrame + extra_frame
if convertSize % 160 != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
convertSize = convertSize + (160 - (convertSize % 160))
outSize = int(
((convertSize - extra_frame) / 16000) * self.slotInfo.samplingRate
)
outSize = int(((convertSize - extra_frame) / 16000) * self.slotInfo.samplingRate)
# バッファがたまっていない場合はzeroで補う
if self.audio_buffer.shape[0] < convertSize:
self.audio_buffer = np.concatenate(
[np.zeros([convertSize]), self.audio_buffer]
)
self.audio_buffer = np.concatenate([np.zeros([convertSize]), self.audio_buffer])
if self.slotInfo.f0:
self.pitchf_buffer = np.concatenate(
[np.zeros([convertSize // 160]), self.pitchf_buffer]
)
self.pitchf_buffer = np.concatenate([np.zeros([convertSize // 160]), self.pitchf_buffer])
self.feature_buffer = np.concatenate(
[
np.zeros([convertSize // 160, self.slotInfo.embChannels]),
@ -194,9 +179,7 @@ class RVCr2(VoiceChangerModel):
outSize,
)
def inference(
self, receivedData: AudioInOut, crossfade_frame: int, sola_search_frame: int
):
def inference(self, receivedData: AudioInOut, crossfade_frame: int, sola_search_frame: int):
if self.pipeline is None:
logger.info("[Voice Changer] Pipeline is not initialized.")
raise PipelineNotInitializedException()
@ -208,18 +191,15 @@ class RVCr2(VoiceChangerModel):
receivedData,
self.inputSampleRate,
16000,
filter="kaiser_fast",
),
)
crossfade_frame = int((crossfade_frame / self.inputSampleRate) * 16000)
sola_search_frame = int((sola_search_frame / self.inputSampleRate) * 16000)
extra_frame = int(
(self.settings.extraConvertSize / self.inputSampleRate) * 16000
)
extra_frame = int((self.settings.extraConvertSize / self.inputSampleRate) * 16000)
# 入力データ生成
data = self.generate_input(
receivedData, crossfade_frame, sola_search_frame, extra_frame
)
data = self.generate_input(receivedData, crossfade_frame, sola_search_frame, extra_frame)
audio = data[0]
pitchf = data[1]
@ -254,9 +234,7 @@ class RVCr2(VoiceChangerModel):
index_rate,
if_f0,
# 0,
self.settings.extraConvertSize / self.inputSampleRate
if self.settings.silenceFront
else 0.0, # extaraDataSizeの秒数。入力のサンプリングレートで算出
self.settings.extraConvertSize / self.inputSampleRate if self.settings.silenceFront else 0.0, # extaraDataSizeの秒数。入力のサンプリングレートで算出
embOutputLayer,
useFinalProj,
repeat,
@ -272,14 +250,13 @@ class RVCr2(VoiceChangerModel):
result,
self.slotInfo.samplingRate,
self.outputSampleRate,
filter="kaiser_fast",
),
)
return result
except DeviceCannotSupportHalfPrecisionException as e: # NOQA
logger.warn(
"[Device Manager] Device cannot support half precision. Fallback to float...."
)
logger.warn("[Device Manager] Device cannot support half precision. Fallback to float....")
self.deviceManager.setForceTensor(True)
self.initialize()
# raise e

View File

@ -15,14 +15,6 @@ class Embedder(EmbedderProtocol):
self.model: Any | None = None
def loadModel(self, file: str, dev: device, isHalf: bool = True):
...
def extractFeatures(
self, feats: torch.Tensor, embOutputLayer=9, useFinalProj=True
) -> torch.Tensor:
...
def getEmbedderInfo(self):
return {
"embedderType": self.embedderType,

View File

@ -6,6 +6,7 @@ from voice_changer.RVC.embedder.FairseqContentvec import FairseqContentvec
from voice_changer.RVC.embedder.FairseqHubert import FairseqHubert
from voice_changer.RVC.embedder.FairseqHubertJp import FairseqHubertJp
from voice_changer.RVC.embedder.OnnxContentvec import OnnxContentvec
from voice_changer.RVC.embedder.Whisper import Whisper
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
@ -18,9 +19,7 @@ class EmbedderManager:
cls.params = params
@classmethod
def getEmbedder(
cls, embederType: EmbedderType, isHalf: bool, dev: device
) -> Embedder:
def getEmbedder(cls, embederType: EmbedderType, isHalf: bool, dev: device) -> Embedder:
if cls.currentEmbedder is None:
print("[Voice Changer] generate new embedder. (no embedder)")
cls.currentEmbedder = cls.loadEmbedder(embederType, isHalf, dev)
@ -36,9 +35,7 @@ class EmbedderManager:
return cls.currentEmbedder
@classmethod
def loadEmbedder(
cls, embederType: EmbedderType, isHalf: bool, dev: device
) -> Embedder:
def loadEmbedder(cls, embederType: EmbedderType, isHalf: bool, dev: device) -> Embedder:
if embederType == "hubert_base":
try:
if cls.params.content_vec_500_onnx_on is False:
@ -62,5 +59,8 @@ class EmbedderManager:
print(e)
file = cls.params.hubert_base
return FairseqContentvec().loadModel(file, dev, isHalf)
elif embederType == "whisper":
file = cls.params.whisper_tiny
return Whisper().loadModel(file, dev, isHalf)
else:
return FairseqHubert().loadModel(file, dev, isHalf)

View File

@ -0,0 +1,53 @@
import torch
from torch import device
from voice_changer.RVC.embedder.Embedder import Embedder
from voice_changer.RVC.embedder.whisper.audio import log_mel_spectrogram
from .whisper.whisper import load_model
import numpy as np
import torch.nn.functional as F
class Whisper(Embedder):
def loadModel(self, file: str, dev: device, isHalf: bool = True) -> Embedder:
super().setProps("whisper", file, dev, isHalf)
whisper = load_model(file).to(dev)
self.model = whisper
return self
def extractFeatures(self, audio: torch.Tensor) -> torch.Tensor:
try:
if isinstance(audio, np.ndarray):
audio = torch.from_numpy(audio.astype(np.float32))
audio = audio.to(self.dev)
# if self.isHalf and audio.dtype != torch.float16:
# audio = audio.half()
if self.isHalf is False and audio.dtype != torch.float32:
audio = audio.float()
if audio.dim() != 1:
audio = audio.squeeze(0)
if audio.dim() != 1:
raise RuntimeError(f"Exeption in {self.__class__.__name__} audio.dim is not 1 (size :{audio.dim()}, {audio.shape})")
audln = audio.shape[0]
ppgln = audln // 320
mel = log_mel_spectrogram(audio).to(self.model.device)
# print(f"[whisper_ppg] audio:{audio.shape}({audio.shape[0]/16000}ms) -> ppg:{ppgln}")
# print(f"[whisper_ppg] mel:{mel.shape}({mel.dtype})")
with torch.no_grad():
ppg = self.model.encoder(mel.unsqueeze(0))
padding = (0, 384)
ppg_padded = F.pad(ppg, padding, "constant", 0)
ppg_padded = ppg_padded.data
# print(f"[whisper_ppg] ppg:{ppg.shape}")
except Exception as e:
print(e)
raise RuntimeError(f"Exeption in {self.__class__.__name__}", e)
# raise EmbedderProcessException(f"Exeption in {self.__class__.__name__}", e)
return ppg_padded

View File

@ -0,0 +1,120 @@
import os
from functools import lru_cache
from typing import Union
import numpy as np
import torch
import torch.nn.functional as F
from voice_changer.RVC.embedder.whisper.utils import exact_div
# hard-coded audio hyperparameters
SAMPLE_RATE = 16000
N_FFT = 400
N_MELS = 80
HOP_LENGTH = 160
CHUNK_LENGTH = 30
N_SAMPLES = CHUNK_LENGTH * SAMPLE_RATE # 480000: number of samples in a chunk
N_FRAMES = exact_div(N_SAMPLES, HOP_LENGTH) # 3000: number of frames in a mel spectrogram input
# def load_audio(file: str, sr: int = SAMPLE_RATE):
# """
# Open an audio file and read as mono waveform, resampling as necessary
# Parameters
# ----------
# file: str
# The audio file to open
# sr: int
# The sample rate to resample the audio if necessary
# Returns
# -------
# A NumPy array containing the audio waveform, in float32 dtype.
# """
# try:
# # This launches a subprocess to decode audio while down-mixing and resampling as necessary.
# # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
# out, _ = ffmpeg.input(file, threads=0).output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=sr).run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
# except ffmpeg.Error as e:
# raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
# return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
def pad_or_trim(array, length: int = N_SAMPLES, *, axis: int = -1):
"""
Pad or trim the audio array to N_SAMPLES, as expected by the encoder.
"""
if torch.is_tensor(array):
if array.shape[axis] > length:
array = array.index_select(dim=axis, index=torch.arange(length, device=array.device))
if array.shape[axis] < length:
pad_widths = [(0, 0)] * array.ndim
pad_widths[axis] = (0, length - array.shape[axis])
array = F.pad(array, [pad for sizes in pad_widths[::-1] for pad in sizes])
else:
if array.shape[axis] > length:
array = array.take(indices=range(length), axis=axis)
if array.shape[axis] < length:
pad_widths = [(0, 0)] * array.ndim
pad_widths[axis] = (0, length - array.shape[axis])
array = np.pad(array, pad_widths)
return array
@lru_cache(maxsize=None)
def mel_filters(device, n_mels: int = N_MELS) -> torch.Tensor:
"""
load the mel filterbank matrix for projecting STFT into a Mel spectrogram.
Allows decoupling librosa dependency; saved using:
np.savez_compressed(
"mel_filters.npz",
mel_80=librosa.filters.mel(sr=16000, n_fft=400, n_mels=80),
)
"""
assert n_mels == 80, f"Unsupported n_mels: {n_mels}"
with np.load(os.path.join(os.path.dirname(__file__), "assets", "mel_filters.npz")) as f:
return torch.from_numpy(f[f"mel_{n_mels}"]).to(device)
def log_mel_spectrogram(audio: Union[str, np.ndarray, torch.Tensor], n_mels: int = N_MELS):
"""
Compute the log-Mel spectrogram of
Parameters
----------
audio: Union[str, np.ndarray, torch.Tensor], shape = (*)
The path to audio or either a NumPy array or Tensor containing the audio waveform in 16 kHz
n_mels: int
The number of Mel-frequency filters, only 80 is supported
Returns
-------
torch.Tensor, shape = (80, n_frames)
A Tensor that contains the Mel spectrogram
"""
if not torch.is_tensor(audio):
if isinstance(audio, str):
audio = load_audio(audio)
audio = torch.from_numpy(audio)
window = torch.hann_window(N_FFT).to(audio.device) # type: ignore
stft = torch.stft(audio, N_FFT, HOP_LENGTH, window=window, return_complex=True) # type: ignore
magnitudes = stft[..., :-1].abs() ** 2
filters = mel_filters(audio.device, n_mels) # type: ignore
mel_spec = filters @ magnitudes
log_spec = torch.clamp(mel_spec, min=1e-10).log10()
log_spec = torch.maximum(log_spec, log_spec.max() - 8.0)
log_spec = (log_spec + 4.0) / 4.0
return log_spec

View File

@ -0,0 +1,222 @@
from dataclasses import dataclass
from typing import Dict
from typing import Iterable, Optional
import numpy as np
import torch
import torch.nn.functional as F
from torch import Tensor
from torch import nn
# from .decoding import detect_language as detect_language_function, decode as decode_function
@dataclass
class ModelDimensions:
n_mels: int
n_audio_ctx: int
n_audio_state: int
n_audio_head: int
n_audio_layer: int
n_vocab: int
n_text_ctx: int
n_text_state: int
n_text_head: int
n_text_layer: int
class LayerNorm(nn.LayerNorm):
def forward(self, x: Tensor) -> Tensor:
return super().forward(x.float()).type(x.dtype)
class Linear(nn.Linear):
def forward(self, x: Tensor) -> Tensor:
return F.linear(x, self.weight.to(x.dtype), None if self.bias is None else self.bias.to(x.dtype))
class Conv1d(nn.Conv1d):
def _conv_forward(self, x: Tensor, weight: Tensor, bias: Optional[Tensor]) -> Tensor:
return super()._conv_forward(x, weight.to(x.dtype), None if bias is None else bias.to(x.dtype))
def sinusoids(length, channels, max_timescale=10000):
"""Returns sinusoids for positional embedding"""
assert channels % 2 == 0
log_timescale_increment = np.log(max_timescale) / (channels // 2 - 1)
inv_timescales = torch.exp(-log_timescale_increment * torch.arange(channels // 2))
scaled_time = torch.arange(length)[:, np.newaxis] * inv_timescales[np.newaxis, :]
return torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], dim=1)
class MultiHeadAttention(nn.Module):
def __init__(self, n_state: int, n_head: int):
super().__init__()
self.n_head = n_head
self.query = Linear(n_state, n_state)
self.key = Linear(n_state, n_state, bias=False)
self.value = Linear(n_state, n_state)
self.out = Linear(n_state, n_state)
def forward(
self,
x: Tensor,
xa: Optional[Tensor] = None,
mask: Optional[Tensor] = None,
kv_cache: Optional[dict] = None,
):
q = self.query(x)
if kv_cache is None or xa is None or self.key not in kv_cache:
# hooks, if installed (i.e. kv_cache is not None), will prepend the cached kv tensors;
# otherwise, perform key/value projections for self- or cross-attention as usual.
k = self.key(x if xa is None else xa)
v = self.value(x if xa is None else xa)
else:
# for cross-attention, calculate keys and values once and reuse in subsequent calls.
k = kv_cache[self.key]
v = kv_cache[self.value]
wv, qk = self.qkv_attention(q, k, v, mask)
return self.out(wv), qk
def qkv_attention(self, q: Tensor, k: Tensor, v: Tensor, mask: Optional[Tensor] = None):
n_batch, n_ctx, n_state = q.shape
scale = (n_state // self.n_head) ** -0.25
q = q.view(*q.shape[:2], self.n_head, -1).permute(0, 2, 1, 3) * scale
k = k.view(*k.shape[:2], self.n_head, -1).permute(0, 2, 3, 1) * scale
v = v.view(*v.shape[:2], self.n_head, -1).permute(0, 2, 1, 3)
qk = q @ k
if mask is not None:
qk = qk + mask[:n_ctx, :n_ctx]
qk = qk.float()
w = F.softmax(qk, dim=-1).to(q.dtype)
return (w @ v).permute(0, 2, 1, 3).flatten(start_dim=2), qk.detach()
class ResidualAttentionBlock(nn.Module):
def __init__(self, n_state: int, n_head: int, cross_attention: bool = False):
super().__init__()
self.attn = MultiHeadAttention(n_state, n_head)
self.attn_ln = LayerNorm(n_state)
self.cross_attn = MultiHeadAttention(n_state, n_head) if cross_attention else None
self.cross_attn_ln = LayerNorm(n_state) if cross_attention else None
n_mlp = n_state * 4
self.mlp = nn.Sequential(Linear(n_state, n_mlp), nn.GELU(), Linear(n_mlp, n_state))
self.mlp_ln = LayerNorm(n_state)
def forward(
self,
x: Tensor,
xa: Optional[Tensor] = None,
mask: Optional[Tensor] = None,
kv_cache: Optional[dict] = None,
):
x = x + self.attn(self.attn_ln(x), mask=mask, kv_cache=kv_cache)[0]
if self.cross_attn and self.cross_attn_ln:
x = x + self.cross_attn(self.cross_attn_ln(x), xa, kv_cache=kv_cache)[0]
x = x + self.mlp(self.mlp_ln(x))
return x
class AudioEncoder(nn.Module):
def __init__(self, n_mels: int, n_ctx: int, n_state: int, n_head: int, n_layer: int):
super().__init__()
self.conv1 = Conv1d(n_mels, n_state, kernel_size=3, padding=1)
self.conv2 = Conv1d(n_state, n_state, kernel_size=3, stride=2, padding=1)
self.register_buffer("positional_embedding", sinusoids(n_ctx, n_state))
self.blocks: Iterable[ResidualAttentionBlock] = nn.ModuleList([ResidualAttentionBlock(n_state, n_head) for _ in range(n_layer)])
self.ln_post = LayerNorm(n_state)
def forward(self, x: Tensor):
"""
x : torch.Tensor, shape = (batch_size, n_mels, n_ctx)
the mel spectrogram of the audio
"""
x = F.gelu(self.conv1(x))
x = F.gelu(self.conv2(x))
x = x.permute(0, 2, 1)
x = (x[:, :, :] + self.positional_embedding[: x.shape[1], :]).to(x.dtype)
for j, block in enumerate(self.blocks):
x = block(x)
x = self.ln_post(x)
return x
class TextDecoder(nn.Module):
def __init__(self, n_vocab: int, n_ctx: int, n_state: int, n_head: int, n_layer: int):
super().__init__()
self.token_embedding = nn.Embedding(n_vocab, n_state)
self.positional_embedding = nn.Parameter(torch.empty(n_ctx, n_state))
self.blocks: Iterable[ResidualAttentionBlock] = nn.ModuleList([ResidualAttentionBlock(n_state, n_head, cross_attention=True) for _ in range(n_layer)])
self.ln = LayerNorm(n_state)
mask = torch.empty(n_ctx, n_ctx).fill_(-np.inf).triu_(1)
self.register_buffer("mask", mask, persistent=False)
def forward(self, x: Tensor, xa: Tensor, kv_cache: Optional[dict] = None):
"""
x : torch.LongTensor, shape = (batch_size, <= n_ctx)
the text tokens
xa : torch.Tensor, shape = (batch_size, n_mels, n_audio_ctx)
the encoded audio features to be attended on
"""
offset = next(iter(kv_cache.values())).shape[1] if kv_cache else 0
x = self.token_embedding(x) + self.positional_embedding[offset : offset + x.shape[-1]]
x = x.to(xa.dtype)
for block in self.blocks:
x = block(x, xa, mask=self.mask, kv_cache=kv_cache)
x = self.ln(x)
logits = (x @ torch.transpose(self.token_embedding.weight.to(x.dtype), 0, 1)).float()
return logits
class Whisper(nn.Module):
def __init__(self, dims: ModelDimensions):
super().__init__()
self.dims = dims
self.encoder = AudioEncoder(
self.dims.n_mels,
self.dims.n_audio_ctx,
self.dims.n_audio_state,
self.dims.n_audio_head,
self.dims.n_audio_layer,
)
self.decoder = TextDecoder(
self.dims.n_vocab,
self.dims.n_text_ctx,
self.dims.n_text_state,
self.dims.n_text_head,
self.dims.n_text_layer,
)
def embed_audio(self, mel: torch.Tensor):
return self.encoder(mel)
def logits(self, tokens: torch.Tensor, audio_features: torch.Tensor):
return self.decoder(tokens, audio_features)
def forward(self, mel: torch.Tensor, tokens: torch.Tensor) -> Dict[str, torch.Tensor]:
return self.decoder(tokens, self.encoder(mel))
@property
def device(self):
return next(self.parameters()).device
@property
def is_multilingual(self):
return self.dims.n_vocab == 51865

View File

@ -0,0 +1,22 @@
import sys
system_encoding = sys.getdefaultencoding()
if system_encoding != "utf-8":
def make_safe(string):
# replaces any character not representable using the system default encoding with an '?',
# avoiding UnicodeEncodeError (https://github.com/openai/whisper/discussions/729).
return string.encode(system_encoding, errors="replace").decode(system_encoding)
else:
def make_safe(string):
# utf-8 can encode any Unicode code point, so no need to do the round-trip encoding
return string
def exact_div(x, y):
assert x % y == 0
return x // y

View File

@ -0,0 +1,208 @@
# from whisper_ppg.model import Whisper, ModelDimensions
# from whisper_ppg_custom._LightWhisper import LightWhisper
# from whisper_ppg_custom.Timer import Timer2
# from whisper_ppg_custom.whisper_ppg.audio import load_audio, pad_or_trim, log_mel_spectrogram
# from whisper_ppg_custom.whisper_ppg.model import Whisper, ModelDimensions
import torch
# import numpy as np
# from easy_vc_dev.utils.whisper.audio import load_audio, pad_or_trim
from .model import ModelDimensions, Whisper
# import onnx
# from onnxsim import simplify
# import json
# import onnxruntime
def load_model(path) -> Whisper:
device = "cpu"
checkpoint = torch.load(path, map_location=device)
dims = ModelDimensions(**checkpoint["dims"])
model = Whisper(dims)
model.load_state_dict(checkpoint["model_state_dict"])
model = model.to(device)
return model
# def pred_ppg(whisper: Whisper, wavPath: str, ppgPath: str):
# print("pred")
# # whisper = load_model("base.pt") # "base": "https://openaipublic.azureedge.net/main/whisper/models/ed3a0b6b1c0edf879ad9b11b1af5a0e6ab5db9205f891f668f8b0e6c6326e34e/base.pt"
# audio = load_audio(wavPath)
# audln = audio.shape[0]
# ppgln = audln // 320
# print("audio.shape1", audio.shape, audio.shape[0] / 16000)
# audio = pad_or_trim(audio)
# audio = audio[:400000]
# print("audio.shape2", audio.shape)
# print(f"whisper.device {whisper.device}")
# for i in range(5):
# with Timer2("mainPorcess timer", True) as t:
# mel = log_mel_spectrogram(audio).to(whisper.device)
# with torch.no_grad():
# ppg = whisper.encoder(mel.unsqueeze(0)).squeeze().data.cpu().float().numpy()
# print("ppg.shape", ppg.shape)
# ppg = ppg[:ppgln,]
# print(ppg.shape)
# np.save(ppgPath, ppg, allow_pickle=False)
# t.record("fin")
# print("res", ppg)
# def pred_ppg_onnx(wavPath, ppgPath):
# print("pred")
# # whisper = load_model("base.pt") # "base": "https://openaipublic.azureedge.net/main/whisper/models/ed3a0b6b1c0edf879ad9b11b1af5a0e6ab5db9205f891f668f8b0e6c6326e34e/base.pt"
# whisper = load_model("tiny.pt")
# audio = load_audio(wavPath)
# # audln = audio.shape[0]
# # ppgln = audln // 320
# print("audio.shape1", audio.shape, audio.shape[0] / 16000)
# audio = pad_or_trim(audio)
# audio = audio[:1000]
# print("audio.shape2", audio.shape)
# print(f"whisper.device {whisper.device}")
# onnx_session = onnxruntime.InferenceSession(
# "wencoder_sim.onnx",
# providers=["CPUExecutionProvider"],
# provider_options=[
# {
# "intra_op_num_threads": 8,
# "execution_mode": onnxruntime.ExecutionMode.ORT_PARALLEL,
# "inter_op_num_threads": 8,
# }
# ],
# )
# for i in range(5):
# with Timer2("mainPorcess timer", True) as t:
# mel = log_mel_spectrogram(audio).to(whisper.device).unsqueeze(0)
# onnx_res = onnx_session.run(
# ["ppg"],
# {
# "mel": mel.cpu().numpy(),
# },
# )
# t.record("fin")
# print("onnx_res", onnx_res)
# def export_encoder(wavPath, ppgPath):
# print("pred")
# # whisper = load_model("base.pt") # "base": "https://openaipublic.azureedge.net/main/whisper/models/ed3a0b6b1c0edf879ad9b11b1af5a0e6ab5db9205f891f668f8b0e6c6326e34e/base.pt"
# whisper = load_model("tiny.pt")
# audio = load_audio(wavPath)
# # audln = audio.shape[0]
# # ppgln = audln // 320
# print("audio.shape1", audio.shape, audio.shape[0] / 16000)
# audio = pad_or_trim(audio)
# print("audio.shape2", audio.shape)
# print(f"whisper.device {whisper.device}")
# mel = log_mel_spectrogram(audio).to(whisper.device).unsqueeze(0)
# input_names = ["mel"]
# output_names = ["ppg"]
# torch.onnx.export(
# whisper.encoder,
# (mel,),
# "wencoder.onnx",
# dynamic_axes={
# "mel": [2],
# },
# do_constant_folding=False,
# opset_version=17,
# verbose=False,
# input_names=input_names,
# output_names=output_names,
# )
# metadata = {
# "application": "VC_CLIENT",
# "version": "2.1",
# }
# model_onnx2 = onnx.load("wencoder.onnx")
# model_simp, check = simplify(model_onnx2)
# meta = model_simp.metadata_props.add()
# meta.key = "metadata"
# meta.value = json.dumps(metadata)
# onnx.save(model_simp, "wencoder_sim.onnx")
# def pred_ppg_onnx_w(wavPath, ppgPath):
# print("pred")
# audio = load_audio(wavPath)
# print("audio.shape1", audio.shape, audio.shape[0] / 16000)
# audio = pad_or_trim(audio)
# print("audio.shape2", audio.shape)
# onnx_session = onnxruntime.InferenceSession(
# "wencoder_sim.onnx",
# providers=["CPUExecutionProvider"],
# provider_options=[
# {
# "intra_op_num_threads": 8,
# "execution_mode": onnxruntime.ExecutionMode.ORT_PARALLEL,
# "inter_op_num_threads": 8,
# }
# ],
# )
# for i in range(5):
# with Timer2("mainPorcess timer", True) as t:
# mel = log_mel_spectrogram(audio).to("cpu").unsqueeze(0)
# # mel = mel[:, :, 1500:]
# mel = mel[:, :, 2500:]
# # mel[0, 79, 1499] = 0.1
# print("x.shape", mel.shape)
# onnx_res = onnx_session.run(
# ["ppg"],
# {
# "mel": mel.cpu().numpy(),
# },
# )
# t.record("fin")
# print("onnx_res", onnx_res)
# def export_wrapped_encoder(wavPath, ppgPath):
# print("pred")
# whisper = LightWhisper("tiny.pt")
# audio = load_audio(wavPath)
# # audln = audio.shape[0]
# # ppgln = audln // 320
# print("audio.shape1", audio.shape, audio.shape[0] / 16000)
# audio = pad_or_trim(audio)
# print("audio.shape2", audio.shape)
# mel = log_mel_spectrogram(audio).to("cpu").unsqueeze(0)
# mel = mel[:, :, 1500:]
# input_names = ["mel"]
# output_names = ["ppg"]
# torch.onnx.export(
# whisper,
# (mel,),
# "wencoder.onnx",
# dynamic_axes={
# "mel": [2],
# },
# do_constant_folding=True,
# opset_version=17,
# verbose=False,
# input_names=input_names,
# output_names=output_names,
# )
# metadata = {
# "application": "VC_CLIENT",
# "version": "2.1",
# }
# model_onnx2 = onnx.load("wencoder.onnx")
# model_simp, check = simplify(model_onnx2)
# meta = model_simp.metadata_props.add()
# meta.key = "metadata"
# meta.value = json.dumps(metadata)
# onnx.save(model_simp, "wencoder_sim.onnx")

View File

@ -0,0 +1,46 @@
import torch
import numpy as np
from const import EnumInferenceTypes
from voice_changer.RVC.inferencer.OnnxRVCInferencer import OnnxRVCInferencer
class EasyVCInferencerONNX(OnnxRVCInferencer):
def loadModel(self, file: str, gpu: int, inferencerTypeVersion: str | None = None):
super().loadModel(file, gpu, inferencerTypeVersion)
self.setProps(EnumInferenceTypes.easyVC, file, self.isHalf, gpu)
return self
def infer(
self,
feats: torch.Tensor,
pitch_length: torch.Tensor,
pitch: torch.Tensor | None,
pitchf: torch.Tensor | None,
sid: torch.Tensor,
convert_length: int | None,
) -> torch.Tensor:
if self.isHalf:
audio1 = self.model.run(
["audio"],
{
"feats": feats.cpu().numpy().astype(np.float16),
"p_len": pitch_length.cpu().numpy().astype(np.int64),
},
)
else:
audio1 = self.model.run(
["audio"],
{
"feats": feats.cpu().numpy().astype(np.float32),
"p_len": pitch_length.cpu().numpy().astype(np.int64),
},
)
res = audio1[0][0][0]
# if self.inferencerTypeVersion == "v2.1" or self.inferencerTypeVersion == "v1.1":
# res = audio1[0]
# else:
# res = np.array(audio1)[0][0, 0]
# res = np.clip(res, -1.0, 1.0)
return torch.tensor(res)

View File

@ -1,4 +1,5 @@
from const import EnumInferenceTypes
from voice_changer.RVC.inferencer.EasyVCInferencerONNX import EasyVCInferencerONNX
from voice_changer.RVC.inferencer.Inferencer import Inferencer
from voice_changer.RVC.inferencer.OnnxRVCInferencer import OnnxRVCInferencer
from voice_changer.RVC.inferencer.OnnxRVCInferencerNono import OnnxRVCInferencerNono
@ -42,6 +43,7 @@ class InferencerManager:
elif inferencerType == EnumInferenceTypes.pyTorchVoRASbeta or inferencerType == EnumInferenceTypes.pyTorchVoRASbeta.value:
if sys.platform.startswith("darwin") is False:
from voice_changer.RVC.inferencer.VorasInferencebeta import VoRASInferencer
return VoRASInferencer().loadModel(file, gpu)
else:
raise RuntimeError("[Voice Changer] VoRAS is not supported on macOS")
@ -55,5 +57,8 @@ class InferencerManager:
return OnnxRVCInferencer().loadModel(file, gpu, inferencerTypeVersion)
elif inferencerType == EnumInferenceTypes.onnxRVCNono or inferencerType == EnumInferenceTypes.onnxRVCNono.value:
return OnnxRVCInferencerNono().loadModel(file, gpu, inferencerTypeVersion)
elif inferencerType == EnumInferenceTypes.easyVC or inferencerType == EnumInferenceTypes.easyVC.value:
return EasyVCInferencerONNX().loadModel(file, gpu)
else:
raise RuntimeError("[Voice Changer] Inferencer not found", inferencerType)

View File

@ -206,6 +206,12 @@ class VoiceChangerManager(ServerDeviceCallbacks):
slotInfo = LLVCModelSlotGenerator.loadModel(params)
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
elif params.voiceChangerType == "EasyVC":
from voice_changer.EasyVC.EasyVCModelSlotGenerator import EasyVCModelSlotGenerator
slotInfo = EasyVCModelSlotGenerator.loadModel(params)
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
logger.info(f"params, {params}")
def get_info(self):
@ -307,6 +313,15 @@ class VoiceChangerManager(ServerDeviceCallbacks):
self.voiceChanger.setModel(self.voiceChangerModel)
pass
elif slotInfo.voiceChangerType == "EasyVC":
logger.info("................EasyVC")
from voice_changer.EasyVC.EasyVC import EasyVC
self.voiceChangerModel = EasyVC(self.params, slotInfo)
self.voiceChanger = VoiceChangerV2(self.params)
self.voiceChanger.setModel(self.voiceChangerModel)
pass
else:
logger.info(f"[Voice Changer] unknown voice changer model: {slotInfo.voiceChangerType}")
if hasattr(self, "voiceChangerModel"):

View File

@ -216,8 +216,8 @@ class VoiceChangerV2(VoiceChangerIF):
try:
if self.voiceChanger is None:
raise VoiceChangerIsNotSelectedException("Voice Changer is not selected.")
with Timer2("main-process", False) as t:
enableMainprocessTimer = False
with Timer2("main-process", enableMainprocessTimer) as t:
processing_sampling_rate = self.voiceChanger.get_processing_sampling_rate()
if self.noCrossFade: # Beatrice, LLVC
@ -234,12 +234,14 @@ class VoiceChangerV2(VoiceChangerIF):
block_frame = receivedData.shape[0]
crossfade_frame = min(self.settings.crossFadeOverlapSize, block_frame)
self._generate_strength(crossfade_frame)
t.record("generate_strength")
audio = self.voiceChanger.inference(
receivedData,
crossfade_frame=crossfade_frame,
sola_search_frame=sola_search_frame,
)
t.record("inference")
if hasattr(self, "sola_buffer") is True:
np.set_printoptions(threshold=10000)
@ -271,6 +273,8 @@ class VoiceChangerV2(VoiceChangerIF):
logger.info("[Voice Changer] warming up... generating sola buffer.")
result = np.zeros(4096).astype(np.int16)
t.record("sora")
if hasattr(self, "sola_buffer") is True and sola_offset < sola_search_frame:
offset = -1 * (sola_search_frame + crossfade_frame - sola_offset)
end = -1 * (sola_search_frame - sola_offset)
@ -280,6 +284,8 @@ class VoiceChangerV2(VoiceChangerIF):
self.sola_buffer = audio[-crossfade_frame:] * self.np_prev_strength
# self.sola_buffer = audio[- crossfade_frame:]
t.record("post")
mainprocess_time = t.secs
# 後処理

View File

@ -22,6 +22,7 @@ LoadModelParamFileKind: TypeAlias = Literal[
"beatriceModel",
"llvcModel",
"llvcConfig",
"easyVCModel",
]

View File

@ -57,7 +57,7 @@ class Timer2(object):
if self.enable is False:
return
self.maxStores = 10
self.maxStores = 1
current_frame = inspect.currentframe()
caller_frame = inspect.getouterframes(current_frame, 2)

View File

@ -16,3 +16,4 @@ class VoiceChangerParams:
crepe_onnx_tiny: str
rmvpe: str
rmvpe_onnx: str
whisper_tiny: str