This commit is contained in:
w-okada 2024-02-28 23:08:49 +09:00
parent 39e0d0cfd6
commit bc6e8a9c08
35 changed files with 3520 additions and 1896 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@ -28,41 +28,41 @@
"author": "wataru.okada@flect.co.jp",
"license": "ISC",
"devDependencies": {
"@babel/core": "^7.23.5",
"@babel/plugin-transform-runtime": "^7.23.4",
"@babel/preset-env": "^7.23.5",
"@babel/core": "^7.24.0",
"@babel/plugin-transform-runtime": "^7.24.0",
"@babel/preset-env": "^7.24.0",
"@babel/preset-react": "^7.23.3",
"@babel/preset-typescript": "^7.23.3",
"@types/node": "^20.10.2",
"@types/react": "^18.2.40",
"@types/react-dom": "^18.2.17",
"autoprefixer": "^10.4.16",
"@types/node": "^20.11.21",
"@types/react": "^18.2.60",
"@types/react-dom": "^18.2.19",
"autoprefixer": "^10.4.17",
"babel-loader": "^9.1.3",
"copy-webpack-plugin": "^11.0.0",
"css-loader": "^6.8.1",
"eslint": "^8.55.0",
"copy-webpack-plugin": "^12.0.2",
"css-loader": "^6.10.0",
"eslint": "^8.57.0",
"eslint-config-prettier": "^9.1.0",
"eslint-plugin-prettier": "^5.0.1",
"eslint-plugin-prettier": "^5.1.3",
"eslint-plugin-react": "^7.33.2",
"eslint-webpack-plugin": "^4.0.1",
"html-loader": "^4.2.0",
"html-webpack-plugin": "^5.5.3",
"html-loader": "^5.0.0",
"html-webpack-plugin": "^5.6.0",
"npm-run-all": "^4.1.5",
"postcss-loader": "^7.3.3",
"postcss-loader": "^8.1.1",
"postcss-nested": "^6.0.1",
"prettier": "^3.1.0",
"prettier": "^3.2.5",
"rimraf": "^5.0.5",
"style-loader": "^3.3.3",
"style-loader": "^3.3.4",
"ts-loader": "^9.5.1",
"tsconfig-paths": "^4.2.0",
"typescript": "^5.3.2",
"webpack": "^5.89.0",
"typescript": "^5.3.3",
"webpack": "^5.90.3",
"webpack-cli": "^5.1.4",
"webpack-dev-server": "^4.15.1"
"webpack-dev-server": "^5.0.2"
},
"dependencies": {
"@alexanderolsen/libsamplerate-js": "^2.1.0",
"@dannadori/voice-changer-client-js": "^1.0.180",
"@alexanderolsen/libsamplerate-js": "^2.1.1",
"@dannadori/voice-changer-client-js": "^1.0.182",
"@dannadori/voice-changer-js": "^1.0.2",
"@dannadori/worker-manager": "^1.0.20",
"@fortawesome/fontawesome-svg-core": "^6.5.1",
@ -70,9 +70,9 @@
"@fortawesome/free-regular-svg-icons": "^6.5.1",
"@fortawesome/free-solid-svg-icons": "^6.5.1",
"@fortawesome/react-fontawesome": "^0.2.0",
"@tensorflow/tfjs": "^4.14.0",
"onnxruntime-web": "^1.16.3",
"protobufjs": "^7.2.5",
"@tensorflow/tfjs": "^4.17.0",
"onnxruntime-web": "^1.17.1",
"protobufjs": "^7.2.6",
"react": "^18.2.0",
"react-dom": "^18.2.0"
}

View File

@ -125,6 +125,11 @@ export const FileUploaderScreen = (props: FileUploaderScreenProps) => {
return x.kind == "llvcConfig";
});
return enough;
} else if (setting.voiceChangerType == "EasyVC") {
const enough = !!setting.files.find((x) => {
return x.kind == "easyVCModel";
});
return enough;
}
return false;
};
@ -189,6 +194,8 @@ export const FileUploaderScreen = (props: FileUploaderScreenProps) => {
} else if (vcType == "LLVC") {
rows.push(generateFileRow(uploadSetting!, "Model", "llvcModel", ["pth"]));
rows.push(generateFileRow(uploadSetting!, "Config", "llvcConfig", ["json"]));
} else if (vcType == "EasyVC") {
rows.push(generateFileRow(uploadSetting!, "Model", "easyVCModel", ["onnx"]));
}
return rows;
};

View File

@ -1,44 +1,44 @@
{
"name": "@dannadori/voice-changer-client-js",
"version": "1.0.180",
"version": "1.0.182",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@dannadori/voice-changer-client-js",
"version": "1.0.180",
"version": "1.0.182",
"license": "ISC",
"dependencies": {
"@types/readable-stream": "^4.0.9",
"amazon-chime-sdk-js": "^3.18.2",
"@types/readable-stream": "^4.0.10",
"amazon-chime-sdk-js": "^3.20.0",
"buffer": "^6.0.3",
"localforage": "^1.10.0",
"protobufjs": "^7.2.5",
"protobufjs": "^7.2.6",
"react": "^18.2.0",
"react-dom": "^18.2.0",
"socket.io-client": "^4.7.2"
"socket.io-client": "^4.7.4"
},
"devDependencies": {
"@types/audioworklet": "^0.0.52",
"@types/jest": "^29.5.10",
"@types/node": "^20.10.2",
"@types/react": "18.2.40",
"@types/react-dom": "18.2.17",
"eslint": "^8.55.0",
"@types/audioworklet": "^0.0.54",
"@types/jest": "^29.5.12",
"@types/node": "^20.11.21",
"@types/react": "18.2.60",
"@types/react-dom": "18.2.19",
"eslint": "^8.57.0",
"eslint-config-prettier": "^9.1.0",
"eslint-plugin-prettier": "^5.0.1",
"eslint-plugin-prettier": "^5.1.3",
"eslint-plugin-react": "^7.33.2",
"eslint-webpack-plugin": "^4.0.1",
"jest": "^29.7.0",
"npm-run-all": "^4.1.5",
"prettier": "^3.1.0",
"prettier": "^3.2.5",
"raw-loader": "^4.0.2",
"rimraf": "^5.0.5",
"ts-loader": "^9.5.1",
"typescript": "^5.3.2",
"webpack": "^5.89.0",
"typescript": "^5.3.3",
"webpack": "^5.90.3",
"webpack-cli": "^5.1.4",
"webpack-dev-server": "^4.15.1"
"webpack-dev-server": "^5.0.2"
}
},
"node_modules/@aashutoshrathi/word-wrap": {
@ -1585,22 +1585,22 @@
}
},
"node_modules/@eslint/js": {
"version": "8.55.0",
"resolved": "https://registry.npmjs.org/@eslint/js/-/js-8.55.0.tgz",
"integrity": "sha512-qQfo2mxH5yVom1kacMtZZJFVdW+E70mqHMJvVg6WTLo+VBuQJ4TojZlfWBjK0ve5BdEeNAVxOsl/nvNMpJOaJA==",
"version": "8.57.0",
"resolved": "https://registry.npmjs.org/@eslint/js/-/js-8.57.0.tgz",
"integrity": "sha512-Ys+3g2TaW7gADOJzPt83SJtCDhMjndcDMFVQ/Tj9iA1BfJzFKD9mAUXT3OenpuPHbI6P/myECxRJrofUsDx/5g==",
"dev": true,
"engines": {
"node": "^12.22.0 || ^14.17.0 || >=16.0.0"
}
},
"node_modules/@humanwhocodes/config-array": {
"version": "0.11.13",
"resolved": "https://registry.npmjs.org/@humanwhocodes/config-array/-/config-array-0.11.13.tgz",
"integrity": "sha512-JSBDMiDKSzQVngfRjOdFXgFfklaXI4K9nLF49Auh21lmBWRLIK3+xTErTWD4KU54pb6coM6ESE7Awz/FNU3zgQ==",
"version": "0.11.14",
"resolved": "https://registry.npmjs.org/@humanwhocodes/config-array/-/config-array-0.11.14.tgz",
"integrity": "sha512-3T8LkOmg45BV5FICb15QQMsyUSWrQ8AygVfC7ZG32zOalnqrilm018ZVCw0eapXux8FtA33q8PSRSstjee3jSg==",
"dev": true,
"dependencies": {
"@humanwhocodes/object-schema": "^2.0.1",
"debug": "^4.1.1",
"@humanwhocodes/object-schema": "^2.0.2",
"debug": "^4.3.1",
"minimatch": "^3.0.5"
},
"engines": {
@ -1621,9 +1621,9 @@
}
},
"node_modules/@humanwhocodes/object-schema": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/@humanwhocodes/object-schema/-/object-schema-2.0.1.tgz",
"integrity": "sha512-dvuCeX5fC9dXgJn9t+X5atfmgQAzUOWqS1254Gh0m6i8wKd10ebXkfNKiRK+1GWi/yTvvLDHpoxLr0xxxeslWw==",
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/@humanwhocodes/object-schema/-/object-schema-2.0.2.tgz",
"integrity": "sha512-6EwiSjwWYP7pTckG6I5eyFANjPhmPjUX9JRLUSfNPC7FX7zK9gyZAfUEaECL6ALTpGX5AjnBq3C9XmVWPitNpw==",
"dev": true
},
"node_modules/@isaacs/cliui": {
@ -3051,9 +3051,9 @@
"integrity": "sha512-+9jVqKhRSpsc591z5vX+X5Yyw+he/HCB4iQ/RYxw35CEPaY1gnsNE43nf9n9AaYjAQrTiI/mOwKUKdUs9vf7Xg=="
},
"node_modules/@types/audioworklet": {
"version": "0.0.52",
"resolved": "https://registry.npmjs.org/@types/audioworklet/-/audioworklet-0.0.52.tgz",
"integrity": "sha512-+C0QA8HJS07NjSdLFUDsSfUGJiLs+FPa6K7Tu/e76dqHEnuTOjAjDyiFOnZTuf9j4x9P8Nmv0OOfcMNYnGzbAQ==",
"version": "0.0.54",
"resolved": "https://registry.npmjs.org/@types/audioworklet/-/audioworklet-0.0.54.tgz",
"integrity": "sha512-WR1XcwT2LhCaUiKDDgHdTjrVjoBZnTz6FhszeIKgY9i2UYfIRKtnNvqToUDnbCPXBpVuu4Qo5+mMJt+wDphRew==",
"dev": true
},
"node_modules/@types/babel__core": {
@ -3234,9 +3234,9 @@
}
},
"node_modules/@types/jest": {
"version": "29.5.10",
"resolved": "https://registry.npmjs.org/@types/jest/-/jest-29.5.10.tgz",
"integrity": "sha512-tE4yxKEphEyxj9s4inideLHktW/x6DwesIwWZ9NN1FKf9zbJYsnhBoA9vrHA/IuIOKwPa5PcFBNV4lpMIOEzyQ==",
"version": "29.5.12",
"resolved": "https://registry.npmjs.org/@types/jest/-/jest-29.5.12.tgz",
"integrity": "sha512-eDC8bTvT/QhYdxJAulQikueigY5AsdBRH2yDKW3yveW7svY3+DzN84/2NUgkw10RTiJbWqZrTtoGVdYlvFJdLw==",
"dev": true,
"dependencies": {
"expect": "^29.0.0",
@ -3256,9 +3256,9 @@
"dev": true
},
"node_modules/@types/node": {
"version": "20.10.2",
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.10.2.tgz",
"integrity": "sha512-37MXfxkb0vuIlRKHNxwCkb60PNBpR94u4efQuN4JgIAm66zfCDXGSAFCef9XUWFovX2R1ok6Z7MHhtdVXXkkIw==",
"version": "20.11.21",
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.11.21.tgz",
"integrity": "sha512-/ySDLGscFPNasfqStUuWWPfL78jompfIoVzLJPVVAHBh6rpG68+pI2Gk+fNLeI8/f1yPYL4s46EleVIc20F1Ow==",
"dependencies": {
"undici-types": "~5.26.4"
}
@ -3291,9 +3291,9 @@
"dev": true
},
"node_modules/@types/react": {
"version": "18.2.40",
"resolved": "https://registry.npmjs.org/@types/react/-/react-18.2.40.tgz",
"integrity": "sha512-H+BUhb9C1zBtogDLAk+KCNRKiHDrqSwQT/0z0PVTwMFBxqg3011ByLomADtgkgMkfwj4AMOiXBReyLTUBg681g==",
"version": "18.2.60",
"resolved": "https://registry.npmjs.org/@types/react/-/react-18.2.60.tgz",
"integrity": "sha512-dfiPj9+k20jJrLGOu9Nf6eqxm2EyJRrq2NvwOFsfbb7sFExZ9WELPs67UImHj3Ayxg8ruTtKtNnbjaF8olPq0A==",
"dev": true,
"dependencies": {
"@types/prop-types": "*",
@ -3302,27 +3302,27 @@
}
},
"node_modules/@types/react-dom": {
"version": "18.2.17",
"resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-18.2.17.tgz",
"integrity": "sha512-rvrT/M7Df5eykWFxn6MYt5Pem/Dbyc1N8Y0S9Mrkw2WFCRiqUgw9P7ul2NpwsXCSM1DVdENzdG9J5SreqfAIWg==",
"version": "18.2.19",
"resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-18.2.19.tgz",
"integrity": "sha512-aZvQL6uUbIJpjZk4U8JZGbau9KDeAwMfmhyWorxgBkqDIEf6ROjRozcmPIicqsUwPUjbkDfHKgGee1Lq65APcA==",
"dev": true,
"dependencies": {
"@types/react": "*"
}
},
"node_modules/@types/readable-stream": {
"version": "4.0.9",
"resolved": "https://registry.npmjs.org/@types/readable-stream/-/readable-stream-4.0.9.tgz",
"integrity": "sha512-4cwuvrmNF96M4Nrx0Eep37RwPB1Mth+nCSezsGRv5+PsFyRvDdLd0pil6gVLcWD/bh69INNdwZ98dJwfHpLohA==",
"version": "4.0.10",
"resolved": "https://registry.npmjs.org/@types/readable-stream/-/readable-stream-4.0.10.tgz",
"integrity": "sha512-AbUKBjcC8SHmImNi4yK2bbjogQlkFSg7shZCcicxPQapniOlajG8GCc39lvXzCWX4lLRRs7DM3VAeSlqmEVZUA==",
"dependencies": {
"@types/node": "*",
"safe-buffer": "~5.1.1"
}
},
"node_modules/@types/retry": {
"version": "0.12.0",
"resolved": "https://registry.npmjs.org/@types/retry/-/retry-0.12.0.tgz",
"integrity": "sha512-wWKOClTTiizcZhXnPY4wikVAwmdYHp8q6DmC+EJUzAMsycb7HB32Kh9RN4+0gExjmPmZSAQjgURXIGATPegAvA==",
"version": "0.12.2",
"resolved": "https://registry.npmjs.org/@types/retry/-/retry-0.12.2.tgz",
"integrity": "sha512-XISRgDJ2Tc5q4TRqvgJtzsRkFYNJzZrhTdtMoGVBttwzzQJkPnS3WWTFc7kuDRoPtPakl+T+OfdEUjYJj7Jbow==",
"dev": true
},
"node_modules/@types/scheduler": {
@ -3721,9 +3721,9 @@
}
},
"node_modules/amazon-chime-sdk-js": {
"version": "3.18.2",
"resolved": "https://registry.npmjs.org/amazon-chime-sdk-js/-/amazon-chime-sdk-js-3.18.2.tgz",
"integrity": "sha512-w0O/X8NG+i7y6hS+iQOH0Yn1szkfFDyJDrFTtuZ81Ygd6832Ht1MLmNFf5HuaQzhqVve48W/fAtfcRYOcavIeg==",
"version": "3.20.0",
"resolved": "https://registry.npmjs.org/amazon-chime-sdk-js/-/amazon-chime-sdk-js-3.20.0.tgz",
"integrity": "sha512-VfWKwFQEp78pO2kRvCHsQ9Df/cwk0F6OVzFHlJ8pqtKCw8NIeFJ0zM3Yv1vyfU6N35nXg41APld/sTfvLsotCA==",
"dependencies": {
"@aws-crypto/sha256-js": "^2.0.1",
"@aws-sdk/client-chime-sdk-messaging": "^3.341.0",
@ -3736,8 +3736,8 @@
"ua-parser-js": "^1.0.1"
},
"engines": {
"node": "^12 || ^14 || ^15 || ^16 || ^18 || ^19",
"npm": "^6 || ^7 || ^8 || ^9"
"node": "^18 || ^19 || ^20",
"npm": "^8 || ^9 || ^10"
}
},
"node_modules/ansi-escapes": {
@ -3835,12 +3835,6 @@
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/array-flatten": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-2.1.2.tgz",
"integrity": "sha512-hNfzcOV8W4NdualtqBFPyVO+54DSJuZGY9qT4pRroB6S9e3iiido2ISIC5h9R2sPJ8H3FHCIiEnsv1lPXO3KtQ==",
"dev": true
},
"node_modules/array-includes": {
"version": "3.1.7",
"resolved": "https://registry.npmjs.org/array-includes/-/array-includes-3.1.7.tgz",
@ -4165,13 +4159,11 @@
"dev": true
},
"node_modules/bonjour-service": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/bonjour-service/-/bonjour-service-1.1.1.tgz",
"integrity": "sha512-Z/5lQRMOG9k7W+FkeGTNjh7htqn/2LMnfOvBZ8pynNZCM9MwkQkI3zeI4oz09uWdcgmgHugVvBqxGg4VQJ5PCg==",
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/bonjour-service/-/bonjour-service-1.2.1.tgz",
"integrity": "sha512-oSzCS2zV14bh2kji6vNe7vrpJYCHGvcZnlffFQ1MEoX/WOeQ/teD8SYWKR942OI3INjq8OMNJlbPK5LLLUxFDw==",
"dev": true,
"dependencies": {
"array-flatten": "^2.1.2",
"dns-equal": "^1.0.0",
"fast-deep-equal": "^3.1.3",
"multicast-dns": "^7.2.5"
}
@ -4387,16 +4379,10 @@
}
},
"node_modules/chokidar": {
"version": "3.5.3",
"resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.5.3.tgz",
"integrity": "sha512-Dr3sfKRP6oTcjf2JmUmFJfeVMvXBdegxB0iVQ5eb2V10uFJUCAS8OByZdVAyVb8xXNz3GjjTgj9kLWsZTqE6kw==",
"version": "3.6.0",
"resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz",
"integrity": "sha512-7VT13fmjotKpGipCW9JEQAusEPE+Ei8nl6/g4FBAmIm0GOOLMua9NDDo/DWp0ZAxCr3cPq5ZpBqmPAQgDda2Pw==",
"dev": true,
"funding": [
{
"type": "individual",
"url": "https://paulmillr.com/funding/"
}
],
"dependencies": {
"anymatch": "~3.1.2",
"braces": "~3.0.2",
@ -4409,6 +4395,9 @@
"engines": {
"node": ">= 8.10.0"
},
"funding": {
"url": "https://paulmillr.com/funding/"
},
"optionalDependencies": {
"fsevents": "~2.3.2"
}
@ -4990,12 +4979,6 @@
"node": "^14.15.0 || ^16.10.0 || >=18.0.0"
}
},
"node_modules/dns-equal": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/dns-equal/-/dns-equal-1.0.0.tgz",
"integrity": "sha512-z+paD6YUQsk+AbGCEM4PrOXSss5gd66QfcVBFTKR/HpFL9jCqikS94HYwKww6fQyO7IxrIIyUu+g0Ka9tUS2Cg==",
"dev": true
},
"node_modules/dns-packet": {
"version": "5.6.1",
"resolved": "https://registry.npmjs.org/dns-packet/-/dns-packet-5.6.1.tgz",
@ -5277,16 +5260,16 @@
}
},
"node_modules/eslint": {
"version": "8.55.0",
"resolved": "https://registry.npmjs.org/eslint/-/eslint-8.55.0.tgz",
"integrity": "sha512-iyUUAM0PCKj5QpwGfmCAG9XXbZCWsqP/eWAWrG/W0umvjuLRBECwSFdt+rCntju0xEH7teIABPwXpahftIaTdA==",
"version": "8.57.0",
"resolved": "https://registry.npmjs.org/eslint/-/eslint-8.57.0.tgz",
"integrity": "sha512-dZ6+mexnaTIbSBZWgou51U6OmzIhYM2VcNdtiTtI7qPNZm35Akpr0f6vtw3w1Kmn5PYo+tZVfh13WrhpS6oLqQ==",
"dev": true,
"dependencies": {
"@eslint-community/eslint-utils": "^4.2.0",
"@eslint-community/regexpp": "^4.6.1",
"@eslint/eslintrc": "^2.1.4",
"@eslint/js": "8.55.0",
"@humanwhocodes/config-array": "^0.11.13",
"@eslint/js": "8.57.0",
"@humanwhocodes/config-array": "^0.11.14",
"@humanwhocodes/module-importer": "^1.0.1",
"@nodelib/fs.walk": "^1.2.8",
"@ungap/structured-clone": "^1.2.0",
@ -5344,23 +5327,24 @@
}
},
"node_modules/eslint-plugin-prettier": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/eslint-plugin-prettier/-/eslint-plugin-prettier-5.0.1.tgz",
"integrity": "sha512-m3u5RnR56asrwV/lDC4GHorlW75DsFfmUcjfCYylTUs85dBRnB7VM6xG8eCMJdeDRnppzmxZVf1GEPJvl1JmNg==",
"version": "5.1.3",
"resolved": "https://registry.npmjs.org/eslint-plugin-prettier/-/eslint-plugin-prettier-5.1.3.tgz",
"integrity": "sha512-C9GCVAs4Eq7ZC/XFQHITLiHJxQngdtraXaM+LoUFoFp/lHNl2Zn8f3WQbe9HvTBBQ9YnKFB0/2Ajdqwo5D1EAw==",
"dev": true,
"dependencies": {
"prettier-linter-helpers": "^1.0.0",
"synckit": "^0.8.5"
"synckit": "^0.8.6"
},
"engines": {
"node": "^14.18.0 || >=16.0.0"
},
"funding": {
"url": "https://opencollective.com/prettier"
"url": "https://opencollective.com/eslint-plugin-prettier"
},
"peerDependencies": {
"@types/eslint": ">=8.0.0",
"eslint": ">=8.0.0",
"eslint-config-prettier": "*",
"prettier": ">=3.0.0"
},
"peerDependenciesMeta": {
@ -5997,12 +5981,6 @@
"node": ">= 0.6"
}
},
"node_modules/fs-monkey": {
"version": "1.0.5",
"resolved": "https://registry.npmjs.org/fs-monkey/-/fs-monkey-1.0.5.tgz",
"integrity": "sha512-8uMbBjrhzW76TYgEV27Y5E//W2f/lTFmx78P2w19FZSxarhI/798APGQyuGCwmkNxgwGRhrLfvWyLBvNtuOmew==",
"dev": true
},
"node_modules/fs.realpath": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
@ -6824,6 +6802,18 @@
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/is-network-error": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/is-network-error/-/is-network-error-1.0.1.tgz",
"integrity": "sha512-OwQXkwBJeESyhFw+OumbJVD58BFBJJI5OM5S1+eyrDKlgDZPX2XNT5gXS56GSD3NPbbwUuMlR1Q71SRp5SobuQ==",
"dev": true,
"engines": {
"node": ">=16"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/is-number": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz",
@ -8132,17 +8122,27 @@
}
},
"node_modules/memfs": {
"version": "3.5.3",
"resolved": "https://registry.npmjs.org/memfs/-/memfs-3.5.3.tgz",
"integrity": "sha512-UERzLsxzllchadvbPs5aolHh65ISpKpM+ccLbOJ8/vvpBKmAWf+la7dXFy7Mr0ySHbdHrFv5kGFCUHHe6GFEmw==",
"version": "4.7.7",
"resolved": "https://registry.npmjs.org/memfs/-/memfs-4.7.7.tgz",
"integrity": "sha512-x9qc6k88J/VVwnfTkJV8pRRswJ2156Rc4w5rciRqKceFDZ0y1MqsNL9pkg5sE0GOcDzZYbonreALhaHzg1siFw==",
"dev": true,
"dependencies": {
"fs-monkey": "^1.0.4"
"tslib": "^2.0.0"
},
"engines": {
"node": ">= 4.0.0"
},
"funding": {
"type": "github",
"url": "https://github.com/sponsors/streamich"
}
},
"node_modules/memfs/node_modules/tslib": {
"version": "2.6.2",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.2.tgz",
"integrity": "sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q==",
"dev": true
},
"node_modules/memorystream": {
"version": "0.3.1",
"resolved": "https://registry.npmjs.org/memorystream/-/memorystream-0.3.1.tgz",
@ -8775,16 +8775,20 @@
}
},
"node_modules/p-retry": {
"version": "4.6.2",
"resolved": "https://registry.npmjs.org/p-retry/-/p-retry-4.6.2.tgz",
"integrity": "sha512-312Id396EbJdvRONlngUx0NydfrIQ5lsYu0znKVUzVvArzEIt08V1qhtyESbGVd1FGX7UKtiFp5uwKZdM8wIuQ==",
"version": "6.2.0",
"resolved": "https://registry.npmjs.org/p-retry/-/p-retry-6.2.0.tgz",
"integrity": "sha512-JA6nkq6hKyWLLasXQXUrO4z8BUZGUt/LjlJxx8Gb2+2ntodU/SS63YZ8b0LUTbQ8ZB9iwOfhEPhg4ykKnn2KsA==",
"dev": true,
"dependencies": {
"@types/retry": "0.12.0",
"@types/retry": "0.12.2",
"is-network-error": "^1.0.0",
"retry": "^0.13.1"
},
"engines": {
"node": ">=8"
"node": ">=16.17"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/p-try": {
@ -9038,9 +9042,9 @@
}
},
"node_modules/prettier": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/prettier/-/prettier-3.1.0.tgz",
"integrity": "sha512-TQLvXjq5IAibjh8EpBIkNKxO749UEWABoiIZehEPiY4GNpVdhaFKqSTu+QrlU6D2dPAfubRmtJTi4K4YkQ5eXw==",
"version": "3.2.5",
"resolved": "https://registry.npmjs.org/prettier/-/prettier-3.2.5.tgz",
"integrity": "sha512-3/GWa9aOC0YeD7LUfvOG2NiDyhOWRvt1k+rcKhOuYnMY24iiCphgneUfJDyFXd6rZCAnuLBv6UeAULtrhT/F4A==",
"dev": true,
"bin": {
"prettier": "bin/prettier.cjs"
@ -9127,9 +9131,9 @@
"dev": true
},
"node_modules/protobufjs": {
"version": "7.2.5",
"resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.2.5.tgz",
"integrity": "sha512-gGXRSXvxQ7UiPgfw8gevrfRWcTlSbOFg+p/N+JVJEK5VhueL2miT6qTymqAmjr1Q5WbOCyJbyrk6JfWKwlFn6A==",
"version": "7.2.6",
"resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.2.6.tgz",
"integrity": "sha512-dgJaEDDL6x8ASUZ1YqWciTRrdOuYNzoOf27oHNfdyvKqHr5i0FV7FSLU+aIeFjyFgVxrpTOtQUi0BLLBymZaBw==",
"hasInstallScript": true,
"dependencies": {
"@protobufjs/aspromise": "^1.1.2",
@ -9830,9 +9834,9 @@
"dev": true
},
"node_modules/serialize-javascript": {
"version": "6.0.1",
"resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-6.0.1.tgz",
"integrity": "sha512-owoXEFjWRllis8/M1Q+Cw5k8ZH40e3zhp/ovX+Xr/vi1qj6QesbyXXViFbpNvWvPNAD62SutwEXavefrLJWj7w==",
"version": "6.0.2",
"resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-6.0.2.tgz",
"integrity": "sha512-Saa1xPByTTq2gdeFZYLLo+RFE35NHZkAbqZeWNd3BpzppeVisAqpDjcp8dyf6uIvEqJRd46jemmyA4iFIeVk8g==",
"dev": true,
"dependencies": {
"randombytes": "^2.1.0"
@ -10044,9 +10048,9 @@
}
},
"node_modules/socket.io-client": {
"version": "4.7.2",
"resolved": "https://registry.npmjs.org/socket.io-client/-/socket.io-client-4.7.2.tgz",
"integrity": "sha512-vtA0uD4ibrYD793SOIAwlo8cj6haOeMHrGvwPxJsxH7CeIksqJ+3Zc06RvWTIFgiSqx4A3sOnTXpfAEE2Zyz6w==",
"version": "4.7.4",
"resolved": "https://registry.npmjs.org/socket.io-client/-/socket.io-client-4.7.4.tgz",
"integrity": "sha512-wh+OkeF0rAVCrABWQBaEjLfb7DVPotMbu0cgWgyR0v6eA4EoVnAwcIeIbcdTE3GT/H3kbdLl7OoH2+asoDRIIg==",
"dependencies": {
"@socket.io/component-emitter": "~3.1.0",
"debug": "~4.3.2",
@ -10466,9 +10470,9 @@
}
},
"node_modules/terser": {
"version": "5.24.0",
"resolved": "https://registry.npmjs.org/terser/-/terser-5.24.0.tgz",
"integrity": "sha512-ZpGR4Hy3+wBEzVEnHvstMvqpD/nABNelQn/z2r0fjVWGQsN3bpOLzQlqDxmb4CDZnXq5lpjnQ+mHQLAOpfM5iw==",
"version": "5.28.1",
"resolved": "https://registry.npmjs.org/terser/-/terser-5.28.1.tgz",
"integrity": "sha512-wM+bZp54v/E9eRRGXb5ZFDvinrJIOaTapx3WUokyVGZu5ucVCK55zEgGd5Dl2fSr3jUo5sDiERErUWLY6QPFyA==",
"dev": true,
"dependencies": {
"@jridgewell/source-map": "^0.3.3",
@ -10484,16 +10488,16 @@
}
},
"node_modules/terser-webpack-plugin": {
"version": "5.3.9",
"resolved": "https://registry.npmjs.org/terser-webpack-plugin/-/terser-webpack-plugin-5.3.9.tgz",
"integrity": "sha512-ZuXsqE07EcggTWQjXUj+Aot/OMcD0bMKGgF63f7UxYcu5/AJF53aIpK1YoP5xR9l6s/Hy2b+t1AM0bLNPRuhwA==",
"version": "5.3.10",
"resolved": "https://registry.npmjs.org/terser-webpack-plugin/-/terser-webpack-plugin-5.3.10.tgz",
"integrity": "sha512-BKFPWlPDndPs+NGGCr1U59t0XScL5317Y0UReNrHaw9/FwhPENlq6bfgs+4yPfyP51vqC1bQ4rp1EfXW5ZSH9w==",
"dev": true,
"dependencies": {
"@jridgewell/trace-mapping": "^0.3.17",
"@jridgewell/trace-mapping": "^0.3.20",
"jest-worker": "^27.4.5",
"schema-utils": "^3.1.1",
"serialize-javascript": "^6.0.1",
"terser": "^5.16.8"
"terser": "^5.26.0"
},
"engines": {
"node": ">= 10.13.0"
@ -10827,9 +10831,9 @@
}
},
"node_modules/typescript": {
"version": "5.3.2",
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.3.2.tgz",
"integrity": "sha512-6l+RyNy7oAHDfxC4FzSJcz9vnjTKxrLpDG5M2Vu4SHRVNg6xzqZp6LYSR9zjqQTu8DU/f5xwxUdADOkbrIX2gQ==",
"version": "5.3.3",
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.3.3.tgz",
"integrity": "sha512-pXWcraxM0uxAS+tN0AG/BF2TyqmHO014Z070UsJ+pFvYuRSq8KH8DmWpnbXe0pEPDHXZV3FcAbJkijJ5oNEnWw==",
"dev": true,
"bin": {
"tsc": "bin/tsc",
@ -11026,19 +11030,19 @@
}
},
"node_modules/webpack": {
"version": "5.89.0",
"resolved": "https://registry.npmjs.org/webpack/-/webpack-5.89.0.tgz",
"integrity": "sha512-qyfIC10pOr70V+jkmud8tMfajraGCZMBWJtrmuBymQKCrLTRejBI8STDp1MCyZu/QTdZSeacCQYpYNQVOzX5kw==",
"version": "5.90.3",
"resolved": "https://registry.npmjs.org/webpack/-/webpack-5.90.3.tgz",
"integrity": "sha512-h6uDYlWCctQRuXBs1oYpVe6sFcWedl0dpcVaTf/YF67J9bKvwJajFulMVSYKHrksMB3I/pIagRzDxwxkebuzKA==",
"dev": true,
"dependencies": {
"@types/eslint-scope": "^3.7.3",
"@types/estree": "^1.0.0",
"@types/estree": "^1.0.5",
"@webassemblyjs/ast": "^1.11.5",
"@webassemblyjs/wasm-edit": "^1.11.5",
"@webassemblyjs/wasm-parser": "^1.11.5",
"acorn": "^8.7.1",
"acorn-import-assertions": "^1.9.0",
"browserslist": "^4.14.5",
"browserslist": "^4.21.10",
"chrome-trace-event": "^1.0.2",
"enhanced-resolve": "^5.15.0",
"es-module-lexer": "^1.2.1",
@ -11052,7 +11056,7 @@
"neo-async": "^2.6.2",
"schema-utils": "^3.2.0",
"tapable": "^2.1.1",
"terser-webpack-plugin": "^5.3.7",
"terser-webpack-plugin": "^5.3.10",
"watchpack": "^2.4.0",
"webpack-sources": "^3.2.3"
},
@ -11127,77 +11131,82 @@
}
},
"node_modules/webpack-dev-middleware": {
"version": "5.3.3",
"resolved": "https://registry.npmjs.org/webpack-dev-middleware/-/webpack-dev-middleware-5.3.3.tgz",
"integrity": "sha512-hj5CYrY0bZLB+eTO+x/j67Pkrquiy7kWepMHmUMoPsmcUaeEnQJqFzHJOyxgWlq746/wUuA64p9ta34Kyb01pA==",
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/webpack-dev-middleware/-/webpack-dev-middleware-7.0.0.tgz",
"integrity": "sha512-tZ5hqsWwww/8DislmrzXE3x+4f+v10H1z57mA2dWFrILb4i3xX+dPhTkcdR0DLyQztrhF2AUmO5nN085UYjd/Q==",
"dev": true,
"dependencies": {
"colorette": "^2.0.10",
"memfs": "^3.4.3",
"memfs": "^4.6.0",
"mime-types": "^2.1.31",
"range-parser": "^1.2.1",
"schema-utils": "^4.0.0"
},
"engines": {
"node": ">= 12.13.0"
"node": ">= 18.12.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/webpack"
},
"peerDependencies": {
"webpack": "^4.0.0 || ^5.0.0"
"webpack": "^5.0.0"
},
"peerDependenciesMeta": {
"webpack": {
"optional": true
}
}
},
"node_modules/webpack-dev-server": {
"version": "4.15.1",
"resolved": "https://registry.npmjs.org/webpack-dev-server/-/webpack-dev-server-4.15.1.tgz",
"integrity": "sha512-5hbAst3h3C3L8w6W4P96L5vaV0PxSmJhxZvWKYIdgxOQm8pNZ5dEOmmSLBVpP85ReeyRt6AS1QJNyo/oFFPeVA==",
"version": "5.0.2",
"resolved": "https://registry.npmjs.org/webpack-dev-server/-/webpack-dev-server-5.0.2.tgz",
"integrity": "sha512-IVj3qsQhiLJR82zVg3QdPtngMD05CYP/Am+9NG5QSl+XwUR/UPtFwllRBKrMwM9ttzFsC6Zj3DMgniPyn/Z0hQ==",
"dev": true,
"dependencies": {
"@types/bonjour": "^3.5.9",
"@types/connect-history-api-fallback": "^1.3.5",
"@types/express": "^4.17.13",
"@types/serve-index": "^1.9.1",
"@types/serve-static": "^1.13.10",
"@types/sockjs": "^0.3.33",
"@types/ws": "^8.5.5",
"@types/bonjour": "^3.5.13",
"@types/connect-history-api-fallback": "^1.5.4",
"@types/express": "^4.17.21",
"@types/serve-index": "^1.9.4",
"@types/serve-static": "^1.15.5",
"@types/sockjs": "^0.3.36",
"@types/ws": "^8.5.10",
"ansi-html-community": "^0.0.8",
"bonjour-service": "^1.0.11",
"chokidar": "^3.5.3",
"bonjour-service": "^1.2.1",
"chokidar": "^3.6.0",
"colorette": "^2.0.10",
"compression": "^1.7.4",
"connect-history-api-fallback": "^2.0.0",
"default-gateway": "^6.0.3",
"express": "^4.17.3",
"graceful-fs": "^4.2.6",
"html-entities": "^2.3.2",
"html-entities": "^2.4.0",
"http-proxy-middleware": "^2.0.3",
"ipaddr.js": "^2.0.1",
"launch-editor": "^2.6.0",
"open": "^8.0.9",
"p-retry": "^4.5.0",
"rimraf": "^3.0.2",
"schema-utils": "^4.0.0",
"selfsigned": "^2.1.1",
"ipaddr.js": "^2.1.0",
"launch-editor": "^2.6.1",
"open": "^10.0.3",
"p-retry": "^6.2.0",
"rimraf": "^5.0.5",
"schema-utils": "^4.2.0",
"selfsigned": "^2.4.1",
"serve-index": "^1.9.1",
"sockjs": "^0.3.24",
"spdy": "^4.0.2",
"webpack-dev-middleware": "^5.3.1",
"ws": "^8.13.0"
"webpack-dev-middleware": "^7.0.0",
"ws": "^8.16.0"
},
"bin": {
"webpack-dev-server": "bin/webpack-dev-server.js"
},
"engines": {
"node": ">= 12.13.0"
"node": ">= 18.12.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/webpack"
},
"peerDependencies": {
"webpack": "^4.37.0 || ^5.0.0"
"webpack": "^5.0.0"
},
"peerDependenciesMeta": {
"webpack": {
@ -11208,66 +11217,98 @@
}
}
},
"node_modules/webpack-dev-server/node_modules/define-lazy-prop": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/define-lazy-prop/-/define-lazy-prop-2.0.0.tgz",
"integrity": "sha512-Ds09qNh8yw3khSjiJjiUInaGX9xlqZDY7JVryGxdxV7NPeuqQfplOpQ66yJFZut3jLa5zOwkXw1g9EI2uKh4Og==",
"node_modules/webpack-dev-server/node_modules/bundle-name": {
"version": "4.1.0",
"resolved": "https://registry.npmjs.org/bundle-name/-/bundle-name-4.1.0.tgz",
"integrity": "sha512-tjwM5exMg6BGRI+kNmTntNsvdZS1X8BFYS6tnJ2hdH0kVxM6/eVZ2xy+FqStSWvYmtfFMDLIxurorHwDKfDz5Q==",
"dev": true,
"dependencies": {
"run-applescript": "^7.0.0"
},
"engines": {
"node": ">=8"
"node": ">=18"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/webpack-dev-server/node_modules/is-docker": {
"version": "2.2.1",
"resolved": "https://registry.npmjs.org/is-docker/-/is-docker-2.2.1.tgz",
"integrity": "sha512-F+i2BKsFrH66iaUFc0woD8sLy8getkwTwtOBjvs56Cx4CgJDeKQeqfz8wAYiSb8JOprWhHH5p77PbmYCvvUuXQ==",
"node_modules/webpack-dev-server/node_modules/default-browser": {
"version": "5.2.1",
"resolved": "https://registry.npmjs.org/default-browser/-/default-browser-5.2.1.tgz",
"integrity": "sha512-WY/3TUME0x3KPYdRRxEJJvXRHV4PyPoUsxtZa78lwItwRQRHhd2U9xOscaT/YTf8uCXIAjeJOFBVEh/7FtD8Xg==",
"dev": true,
"bin": {
"is-docker": "cli.js"
"dependencies": {
"bundle-name": "^4.1.0",
"default-browser-id": "^5.0.0"
},
"engines": {
"node": ">=8"
"node": ">=18"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/webpack-dev-server/node_modules/default-browser-id": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/default-browser-id/-/default-browser-id-5.0.0.tgz",
"integrity": "sha512-A6p/pu/6fyBcA1TRz/GqWYPViplrftcW2gZC9q79ngNCKAeR/X3gcEdXQHl4KNXV+3wgIJ1CPkJQ3IHM6lcsyA==",
"dev": true,
"engines": {
"node": ">=18"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/webpack-dev-server/node_modules/is-wsl": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/is-wsl/-/is-wsl-3.1.0.tgz",
"integrity": "sha512-UcVfVfaK4Sc4m7X3dUSoHoozQGBEFeDC+zVo06t98xe8CzHSZZBekNXH+tu0NalHolcJ/QAGqS46Hef7QXBIMw==",
"dev": true,
"dependencies": {
"is-inside-container": "^1.0.0"
},
"engines": {
"node": ">=16"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/webpack-dev-server/node_modules/open": {
"version": "8.4.2",
"resolved": "https://registry.npmjs.org/open/-/open-8.4.2.tgz",
"integrity": "sha512-7x81NCL719oNbsq/3mh+hVrAWmFuEYUqrq/Iw3kUzH8ReypT9QQ0BLoJS7/G9k6N81XjW4qHWtjWwe/9eLy1EQ==",
"version": "10.0.4",
"resolved": "https://registry.npmjs.org/open/-/open-10.0.4.tgz",
"integrity": "sha512-oujJ/FFr7ra6/7gJuQ4ZJJ8Gf2VHM0J3J/W7IvH++zaqEzacWVxzK++NiVY5NLHTTj7u/jNH5H3Ei9biL31Lng==",
"dev": true,
"dependencies": {
"define-lazy-prop": "^2.0.0",
"is-docker": "^2.1.1",
"is-wsl": "^2.2.0"
"default-browser": "^5.2.1",
"define-lazy-prop": "^3.0.0",
"is-inside-container": "^1.0.0",
"is-wsl": "^3.1.0"
},
"engines": {
"node": ">=12"
"node": ">=18"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/webpack-dev-server/node_modules/rimraf": {
"version": "3.0.2",
"resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz",
"integrity": "sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==",
"node_modules/webpack-dev-server/node_modules/run-applescript": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/run-applescript/-/run-applescript-7.0.0.tgz",
"integrity": "sha512-9by4Ij99JUr/MCFBUkDKLWK3G9HVXmabKz9U5MlIAIuvuzkiOicRYs8XJLxX+xahD+mLiiCYDqF9dKAgtzKP1A==",
"dev": true,
"dependencies": {
"glob": "^7.1.3"
},
"bin": {
"rimraf": "bin.js"
"engines": {
"node": ">=18"
},
"funding": {
"url": "https://github.com/sponsors/isaacs"
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/webpack-dev-server/node_modules/ws": {
"version": "8.14.2",
"resolved": "https://registry.npmjs.org/ws/-/ws-8.14.2.tgz",
"integrity": "sha512-wEBG1ftX4jcglPxgFCMJmZ2PLtSbJ2Peg6TmpJFTbe9GZYOQCDPdMYu/Tm0/bGZkw8paZnJY45J4K2PZrLYq8g==",
"version": "8.16.0",
"resolved": "https://registry.npmjs.org/ws/-/ws-8.16.0.tgz",
"integrity": "sha512-HS0c//TP7Ina87TfiPUz1rQzMhHrl/SG2guqRcTOIUYD2q8uhUdNHZYJUaQ8aTGPzCh+c6oawMKW35nFl1dxyQ==",
"dev": true,
"engines": {
"node": ">=10.0.0"

View File

@ -1,6 +1,6 @@
{
"name": "@dannadori/voice-changer-client-js",
"version": "1.0.180",
"version": "1.0.182",
"description": "",
"main": "dist/index.js",
"directories": {
@ -26,35 +26,35 @@
"author": "wataru.okada@flect.co.jp",
"license": "ISC",
"devDependencies": {
"@types/audioworklet": "^0.0.52",
"@types/jest": "^29.5.10",
"@types/node": "^20.10.2",
"@types/react": "18.2.40",
"@types/react-dom": "18.2.17",
"eslint": "^8.55.0",
"@types/audioworklet": "^0.0.54",
"@types/jest": "^29.5.12",
"@types/node": "^20.11.21",
"@types/react": "18.2.60",
"@types/react-dom": "18.2.19",
"eslint": "^8.57.0",
"eslint-config-prettier": "^9.1.0",
"eslint-plugin-prettier": "^5.0.1",
"eslint-plugin-prettier": "^5.1.3",
"eslint-plugin-react": "^7.33.2",
"eslint-webpack-plugin": "^4.0.1",
"jest": "^29.7.0",
"npm-run-all": "^4.1.5",
"prettier": "^3.1.0",
"prettier": "^3.2.5",
"raw-loader": "^4.0.2",
"rimraf": "^5.0.5",
"ts-loader": "^9.5.1",
"typescript": "^5.3.2",
"webpack": "^5.89.0",
"typescript": "^5.3.3",
"webpack": "^5.90.3",
"webpack-cli": "^5.1.4",
"webpack-dev-server": "^4.15.1"
"webpack-dev-server": "^5.0.2"
},
"dependencies": {
"@types/readable-stream": "^4.0.9",
"amazon-chime-sdk-js": "^3.18.2",
"@types/readable-stream": "^4.0.10",
"amazon-chime-sdk-js": "^3.20.0",
"buffer": "^6.0.3",
"localforage": "^1.10.0",
"protobufjs": "^7.2.5",
"protobufjs": "^7.2.6",
"react": "^18.2.0",
"react-dom": "^18.2.0",
"socket.io-client": "^4.7.2"
"socket.io-client": "^4.7.4"
}
}

View File

@ -13,8 +13,10 @@ export const VoiceChangerType = {
Beatrice: "Beatrice",
LLVC: "LLVC",
WebModel: "WebModel",
EasyVC: "EasyVC",
} as const;
export type VoiceChangerType = (typeof VoiceChangerType)[keyof typeof VoiceChangerType];
export type VoiceChangerType =
(typeof VoiceChangerType)[keyof typeof VoiceChangerType];
export const StaticModel = {
BeatriceJVS: "Beatrice-JVS",
@ -29,14 +31,16 @@ export const InputSampleRate = {
"44100": 44100,
"24000": 24000,
} as const;
export type InputSampleRate = (typeof InputSampleRate)[keyof typeof InputSampleRate];
export type InputSampleRate =
(typeof InputSampleRate)[keyof typeof InputSampleRate];
export const ModelSamplingRate = {
"48000": 48000,
"40000": 40000,
"32000": 32000,
} as const;
export type ModelSamplingRate = (typeof InputSampleRate)[keyof typeof InputSampleRate];
export type ModelSamplingRate =
(typeof InputSampleRate)[keyof typeof InputSampleRate];
export const CrossFadeOverlapSize = {
"128": 128,
@ -46,7 +50,8 @@ export const CrossFadeOverlapSize = {
"2048": 2048,
"4096": 4096,
} as const;
export type CrossFadeOverlapSize = (typeof CrossFadeOverlapSize)[keyof typeof CrossFadeOverlapSize];
export type CrossFadeOverlapSize =
(typeof CrossFadeOverlapSize)[keyof typeof CrossFadeOverlapSize];
export const F0Detector = {
dio: "dio",
@ -141,7 +146,8 @@ export const ServerSettingKey = {
inputSampleRate: "inputSampleRate",
enableDirectML: "enableDirectML",
} as const;
export type ServerSettingKey = (typeof ServerSettingKey)[keyof typeof ServerSettingKey];
export type ServerSettingKey =
(typeof ServerSettingKey)[keyof typeof ServerSettingKey];
export type VoiceChangerServerSetting = {
passThrough: boolean;
@ -317,7 +323,16 @@ export type WebModelSlot = ModelSlot & {
samplingRate: number;
};
export type ModelSlotUnion = RVCModelSlot | MMVCv13ModelSlot | MMVCv15ModelSlot | SoVitsSvc40ModelSlot | DDSPSVCModelSlot | DiffusionSVCModelSlot | BeatriceModelSlot | LLVCModelSlot | WebModelSlot;
export type ModelSlotUnion =
| RVCModelSlot
| MMVCv13ModelSlot
| MMVCv15ModelSlot
| SoVitsSvc40ModelSlot
| DDSPSVCModelSlot
| DiffusionSVCModelSlot
| BeatriceModelSlot
| LLVCModelSlot
| WebModelSlot;
type ServerAudioDevice = {
kind: "audioinput" | "audiooutput";
@ -471,13 +486,15 @@ export const SendingSampleRate = {
"44100": 44100,
"24000": 24000,
} as const;
export type SendingSampleRate = (typeof SendingSampleRate)[keyof typeof SendingSampleRate];
export type SendingSampleRate =
(typeof SendingSampleRate)[keyof typeof SendingSampleRate];
export const DownSamplingMode = {
decimate: "decimate",
average: "average",
} as const;
export type DownSamplingMode = (typeof DownSamplingMode)[keyof typeof DownSamplingMode];
export type DownSamplingMode =
(typeof DownSamplingMode)[keyof typeof DownSamplingMode];
export type WorkletNodeSetting = {
serverUrl: string;
@ -552,9 +569,11 @@ export const VOICE_CHANGER_CLIENT_EXCEPTION = {
ERR_SIO_INVALID_RESPONSE: "ERR_SIO_INVALID_RESPONSE",
ERR_REST_INVALID_RESPONSE: "ERR_REST_INVALID_RESPONSE",
ERR_MIC_STREAM_NOT_INITIALIZED: "ERR_MIC_STREAM_NOT_INITIALIZED",
ERR_INTERNAL_AUDIO_PROCESS_CALLBACK_IS_NOT_INITIALIZED: "ERR_INTERNAL_AUDIO_PROCESS_CALLBACK_IS_NOT_INITIALIZED",
ERR_INTERNAL_AUDIO_PROCESS_CALLBACK_IS_NOT_INITIALIZED:
"ERR_INTERNAL_AUDIO_PROCESS_CALLBACK_IS_NOT_INITIALIZED",
} as const;
export type VOICE_CHANGER_CLIENT_EXCEPTION = (typeof VOICE_CHANGER_CLIENT_EXCEPTION)[keyof typeof VOICE_CHANGER_CLIENT_EXCEPTION];
export type VOICE_CHANGER_CLIENT_EXCEPTION =
(typeof VOICE_CHANGER_CLIENT_EXCEPTION)[keyof typeof VOICE_CHANGER_CLIENT_EXCEPTION];
////////////////////////////////////
// indexedDB
@ -563,7 +582,8 @@ export const INDEXEDDB_DB_APP_NAME = "INDEXEDDB_KEY_VOICE_CHANGER";
export const INDEXEDDB_DB_NAME = "INDEXEDDB_KEY_VOICE_CHANGER_DB";
export const INDEXEDDB_KEY_CLIENT = "INDEXEDDB_KEY_VOICE_CHANGER_LIB_CLIENT";
export const INDEXEDDB_KEY_SERVER = "INDEXEDDB_KEY_VOICE_CHANGER_LIB_SERVER";
export const INDEXEDDB_KEY_MODEL_DATA = "INDEXEDDB_KEY_VOICE_CHANGER_LIB_MODEL_DATA";
export const INDEXEDDB_KEY_MODEL_DATA =
"INDEXEDDB_KEY_VOICE_CHANGER_LIB_MODEL_DATA";
// ONNX
export type OnnxExporterInfo = {

View File

@ -1,11 +1,20 @@
import { useState, useMemo } from "react";
import { VoiceChangerServerSetting, ServerInfo, ServerSettingKey, OnnxExporterInfo, MergeModelRequest, VoiceChangerType, DefaultServerSetting } from "../const";
import {
VoiceChangerServerSetting,
ServerInfo,
ServerSettingKey,
OnnxExporterInfo,
MergeModelRequest,
VoiceChangerType,
DefaultServerSetting,
} from "../const";
import { VoiceChangerClient } from "../VoiceChangerClient";
export const ModelAssetName = {
iconFile: "iconFile",
} as const;
export type ModelAssetName = (typeof ModelAssetName)[keyof typeof ModelAssetName];
export type ModelAssetName =
(typeof ModelAssetName)[keyof typeof ModelAssetName];
export const ModelFileKind = {
mmvcv13Config: "mmvcv13Config",
@ -32,6 +41,8 @@ export const ModelFileKind = {
llvcModel: "llvcModel",
llvcConfig: "llvcConfig",
easyVCModel: "easyVCModel",
} as const;
export type ModelFileKind = (typeof ModelFileKind)[keyof typeof ModelFileKind];
@ -80,12 +91,23 @@ export type ServerSettingState = {
getOnnx: () => Promise<OnnxExporterInfo>;
mergeModel: (request: MergeModelRequest) => Promise<ServerInfo>;
updateModelDefault: () => Promise<ServerInfo>;
updateModelInfo: (slot: number, key: string, val: string) => Promise<ServerInfo>;
uploadAssets: (slot: number, name: ModelAssetName, file: File) => Promise<void>;
updateModelInfo: (
slot: number,
key: string,
val: string
) => Promise<ServerInfo>;
uploadAssets: (
slot: number,
name: ModelAssetName,
file: File
) => Promise<void>;
};
export const useServerSetting = (props: UseServerSettingProps): ServerSettingState => {
const [serverSetting, _setServerSetting] = useState<ServerInfo>(DefaultServerSetting);
export const useServerSetting = (
props: UseServerSettingProps
): ServerSettingState => {
const [serverSetting, _setServerSetting] =
useState<ServerInfo>(DefaultServerSetting);
const setServerSetting = (info: ServerInfo) => {
if (!info.modelSlots) {
// サーバが情報を空で返したとき。Web版対策
@ -101,12 +123,17 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
return async (setting: ServerInfo) => {
if (!props.voiceChangerClient) return;
for (let i = 0; i < Object.values(ServerSettingKey).length; i++) {
const k = Object.values(ServerSettingKey)[i] as keyof VoiceChangerServerSetting;
const k = Object.values(ServerSettingKey)[
i
] as keyof VoiceChangerServerSetting;
const cur_v = serverSetting[k];
const new_v = setting[k];
if (cur_v != new_v) {
const res = await props.voiceChangerClient.updateServerSettings(k, "" + new_v);
const res = await props.voiceChangerClient.updateServerSettings(
k,
"" + new_v
);
setServerSetting(res);
}
}
@ -121,10 +148,21 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
// (e) モデルアップロード
const _uploadFile2 = useMemo(() => {
return async (file: File, onprogress: (progress: number, end: boolean) => void, dir: string = "") => {
return async (
file: File,
onprogress: (progress: number, end: boolean) => void,
dir: string = ""
) => {
if (!props.voiceChangerClient) return;
const num = await props.voiceChangerClient.uploadFile2(dir, file, onprogress);
const res = await props.voiceChangerClient.concatUploadedFile(dir + file.name, num);
const num = await props.voiceChangerClient.uploadFile2(
dir,
file,
onprogress
);
const res = await props.voiceChangerClient.concatUploadedFile(
dir + file.name,
num
);
console.log("uploaded", num, res);
};
}, [props.voiceChangerClient]);
@ -159,7 +197,11 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
}),
};
const loadPromise = props.voiceChangerClient.loadModel(0, false, JSON.stringify(params));
const loadPromise = props.voiceChangerClient.loadModel(
0,
false,
JSON.stringify(params)
);
await loadPromise;
setUploadProgress(0);
@ -180,7 +222,9 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
name,
file: file.name,
};
await props.voiceChangerClient.uploadAssets(JSON.stringify(assetUploadSetting));
await props.voiceChangerClient.uploadAssets(
JSON.stringify(assetUploadSetting)
);
reloadServerInfo();
};
}, [props.voiceChangerClient]);
@ -209,7 +253,11 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
return serverInfo;
};
const updateModelInfo = async (slot: number, key: string, val: string) => {
const serverInfo = await props.voiceChangerClient!.updateModelInfo(slot, key, val);
const serverInfo = await props.voiceChangerClient!.updateModelInfo(
slot,
key,
val
);
setServerSetting(serverInfo);
return serverInfo;
};

View File

@ -58,6 +58,7 @@ def setupArgParser():
parser.add_argument("--hubert_base", type=str, default="pretrain/hubert_base.pt", help="path to hubert_base model(pytorch)")
parser.add_argument("--hubert_base_jp", type=str, default="pretrain/rinna_hubert_base_jp.pt", help="path to hubert_base_jp model(pytorch)")
parser.add_argument("--hubert_soft", type=str, default="pretrain/hubert/hubert-soft-0d54a1f4.pt", help="path to hubert_soft model(pytorch)")
parser.add_argument("--whisper_tiny", type=str, default="pretrain/whisper_tiny.pt", help="path to hubert_soft model(pytorch)")
parser.add_argument("--nsf_hifigan", type=str, default="pretrain/nsf_hifigan/model", help="path to nsf_hifigan model(pytorch)")
parser.add_argument("--crepe_onnx_full", type=str, default="pretrain/crepe_onnx_full.onnx", help="path to crepe_onnx_full")
parser.add_argument("--crepe_onnx_tiny", type=str, default="pretrain/crepe_onnx_tiny.onnx", help="path to crepe_onnx_tiny")
@ -106,6 +107,7 @@ voiceChangerParams = VoiceChangerParams(
rmvpe=args.rmvpe,
rmvpe_onnx=args.rmvpe_onnx,
sample_mode=args.sample_mode,
whisper_tiny=args.whisper_tiny,
)
vcparams = VoiceChangerParamsManager.get_instance()
vcparams.setParams(voiceChangerParams)

View File

@ -14,6 +14,7 @@ VoiceChangerType: TypeAlias = Literal[
"Diffusion-SVC",
"Beatrice",
"LLVC",
"EasyVC",
]
StaticSlot: TypeAlias = Literal["Beatrice-JVS",]
@ -56,7 +57,12 @@ def getFrontendPath():
return frontend_path
EmbedderType: TypeAlias = Literal["hubert_base", "contentvec", "hubert-base-japanese"]
EmbedderType: TypeAlias = Literal[
"hubert_base",
"contentvec",
"hubert-base-japanese",
"whisper",
]
class EnumInferenceTypes(Enum):
@ -70,6 +76,8 @@ class EnumInferenceTypes(Enum):
onnxRVC = "onnxRVC"
onnxRVCNono = "onnxRVCNono"
easyVC = "easyVC"
DiffusionSVCInferenceType: TypeAlias = Literal["combo",]

View File

@ -141,6 +141,14 @@ class LLVCModelSlot(ModelSlot):
configFile: str = ""
@dataclass
class EasyVCModelSlot(ModelSlot):
voiceChangerType: VoiceChangerType = "EasyVC"
modelFile: str = ""
version: str = ""
samplingRate: int = -1
ModelSlots: TypeAlias = Union[
ModelSlot,
RVCModelSlot,
@ -151,6 +159,7 @@ ModelSlots: TypeAlias = Union[
DiffusionSVCModelSlot,
BeatriceModelSlot,
LLVCModelSlot,
EasyVCModelSlot,
]
@ -188,6 +197,9 @@ def loadSlotInfo(model_dir: str, slotIndex: int | StaticSlot) -> ModelSlots:
elif slotInfo.voiceChangerType == "LLVC":
slotInfoKey.extend(list(LLVCModelSlot.__annotations__.keys()))
return LLVCModelSlot(**{k: v for k, v in jsonDict.items() if k in slotInfoKey})
elif slotInfo.voiceChangerType == "EasyVC":
slotInfoKey.extend(list(EasyVCModelSlot.__annotations__.keys()))
return EasyVCModelSlot(**{k: v for k, v in jsonDict.items() if k in slotInfoKey})
else:
return ModelSlot()

View File

@ -19,9 +19,19 @@ def downloadWeight(voiceChangerParams: VoiceChangerParams):
crepe_onnx_tiny = voiceChangerParams.crepe_onnx_tiny
rmvpe = voiceChangerParams.rmvpe
rmvpe_onnx = voiceChangerParams.rmvpe_onnx
whisper_tiny = voiceChangerParams.whisper_tiny
weight_files = [content_vec_500_onnx, hubert_base, hubert_base_jp, hubert_soft,
nsf_hifigan, crepe_onnx_full, crepe_onnx_tiny, rmvpe]
weight_files = [
content_vec_500_onnx,
hubert_base,
hubert_base_jp,
hubert_soft,
nsf_hifigan,
crepe_onnx_full,
crepe_onnx_tiny,
rmvpe,
whisper_tiny,
]
# file exists check (currently only for rvc)
downloadParams = []
@ -119,6 +129,15 @@ def downloadWeight(voiceChangerParams: VoiceChangerParams):
}
)
if os.path.exists(whisper_tiny) is False:
downloadParams.append(
{
"url": "https://openaipublic.azureedge.net/main/whisper/models/65147644a518d12f04e32d6f3b26facc3f8dd46e5390956a9424a650c0ce22b9/tiny.pt",
"saveTo": whisper_tiny,
"position": 10,
}
)
with ThreadPoolExecutor() as pool:
pool.map(download, downloadParams)

View File

@ -0,0 +1,326 @@
"""
VoiceChangerV2向け
"""
from dataclasses import asdict
import numpy as np
import torch
from data.ModelSlot import RVCModelSlot
from mods.log_control import VoiceChangaerLogger
from voice_changer.EasyVC.EasyVCSettings import EasyVCSettings
from voice_changer.EasyVC.pipeline.Pipeline import Pipeline
from voice_changer.EasyVC.pipeline.PipelineGenerator import createPipeline
from voice_changer.RVC.RVCSettings import RVCSettings
from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager
from voice_changer.utils.Timer import Timer2
from voice_changer.utils.VoiceChangerModel import (
AudioInOut,
PitchfInOut,
FeatureInOut,
VoiceChangerModel,
)
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
from voice_changer.RVC.onnxExporter.export2onnx import export2onnx
from voice_changer.RVC.pitchExtractor.PitchExtractorManager import PitchExtractorManager
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
from Exceptions import (
DeviceCannotSupportHalfPrecisionException,
PipelineCreateException,
PipelineNotInitializedException,
)
import resampy
from typing import cast
logger = VoiceChangaerLogger.get_instance().getLogger()
class EasyVC(VoiceChangerModel):
def __init__(self, params: VoiceChangerParams, slotInfo: RVCModelSlot):
logger.info("[Voice Changer] [EasyVC] Creating instance ")
self.voiceChangerType = "RVC"
self.deviceManager = DeviceManager.get_instance()
EmbedderManager.initialize(params)
PitchExtractorManager.initialize(params)
self.settings = EasyVCSettings()
self.params = params
# self.pitchExtractor = PitchExtractorManager.getPitchExtractor(self.settings.f0Detector, self.settings.gpu)
self.pipeline: Pipeline | None = None
self.audio_buffer: AudioInOut | None = None
self.pitchf_buffer: PitchfInOut | None = None
self.feature_buffer: FeatureInOut | None = None
self.prevVol = 0.0
self.slotInfo = slotInfo
# self.initialize()
def initialize(self):
logger.info("[Voice Changer][EasyVC] Initializing... ")
# pipelineの生成
try:
self.pipeline = createPipeline(self.params, self.slotInfo, self.settings.gpu, self.settings.f0Detector)
except PipelineCreateException as e: # NOQA
logger.error("[Voice Changer] pipeline create failed. check your model is valid.")
return
# その他の設定
logger.info("[Voice Changer] [EasyVC] Initializing... done")
def setSamplingRate(self, inputSampleRate, outputSampleRate):
self.inputSampleRate = inputSampleRate
self.outputSampleRate = outputSampleRate
# self.initialize()
def update_settings(self, key: str, val: int | float | str):
logger.info(f"[Voice Changer][RVC]: update_settings {key}:{val}")
if key in self.settings.intData:
setattr(self.settings, key, int(val))
if key == "gpu":
self.deviceManager.setForceTensor(False)
self.initialize()
elif key in self.settings.floatData:
setattr(self.settings, key, float(val))
elif key in self.settings.strData:
setattr(self.settings, key, str(val))
if key == "f0Detector" and self.pipeline is not None:
pitchExtractor = PitchExtractorManager.getPitchExtractor(self.settings.f0Detector, self.settings.gpu)
self.pipeline.setPitchExtractor(pitchExtractor)
else:
return False
return True
def get_info(self):
data = asdict(self.settings)
if self.pipeline is not None:
pipelineInfo = self.pipeline.getPipelineInfo()
data["pipelineInfo"] = pipelineInfo
else:
data["pipelineInfo"] = "None"
return data
def get_processing_sampling_rate(self):
return self.slotInfo.samplingRate
def generate_input(
self,
newData: AudioInOut,
crossfadeSize: int,
solaSearchFrame: int,
extra_frame: int,
):
# 16k で入ってくる。
inputSize = newData.shape[0]
newData = newData.astype(np.float32) / 32768.0
newFeatureLength = inputSize // 160 # hopsize:=160
if self.audio_buffer is not None:
# 過去のデータに連結
self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0)
# if self.slotInfo.f0:
# self.pitchf_buffer = np.concatenate([self.pitchf_buffer, np.zeros(newFeatureLength)], 0)
self.feature_buffer = np.concatenate(
[
self.feature_buffer,
# np.zeros([newFeatureLength, self.slotInfo.embChannels]),
np.zeros([newFeatureLength, 768]),
],
0,
)
else:
self.audio_buffer = newData
# if self.slotInfo.f0:
# self.pitchf_buffer = np.zeros(newFeatureLength)
self.feature_buffer = np.zeros([newFeatureLength, 768])
convertSize = inputSize + crossfadeSize + solaSearchFrame + extra_frame
if convertSize % 160 != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
convertSize = convertSize + (160 - (convertSize % 160))
outSize = int(((convertSize - extra_frame) / 16000) * self.slotInfo.samplingRate)
# バッファがたまっていない場合はzeroで補う
if self.audio_buffer.shape[0] < convertSize:
self.audio_buffer = np.concatenate([np.zeros([convertSize]), self.audio_buffer])
# if self.slotInfo.f0:
# self.pitchf_buffer = np.concatenate([np.zeros([convertSize // 160]), self.pitchf_buffer])
self.feature_buffer = np.concatenate(
[
np.zeros([convertSize // 160, 768]),
self.feature_buffer,
]
)
# 不要部分をトリミング
convertOffset = -1 * convertSize
featureOffset = convertOffset // 160
self.audio_buffer = self.audio_buffer[convertOffset:] # 変換対象の部分だけ抽出
# if self.slotInfo.f0:
# self.pitchf_buffer = self.pitchf_buffer[featureOffset:]
self.feature_buffer = self.feature_buffer[featureOffset:]
# 出力部分だけ切り出して音量を確認。(TODO:段階的消音にする)
cropOffset = -1 * (inputSize + crossfadeSize)
cropEnd = -1 * (crossfadeSize)
crop = self.audio_buffer[cropOffset:cropEnd]
vol = np.sqrt(np.square(crop).mean())
vol = max(vol, self.prevVol * 0.0)
self.prevVol = vol
return (
self.audio_buffer,
self.pitchf_buffer,
self.feature_buffer,
convertSize,
vol,
outSize,
)
def inference(self, receivedData: AudioInOut, crossfade_frame: int, sola_search_frame: int):
if self.pipeline is None:
logger.info("[Voice Changer] Pipeline is not initialized.")
raise PipelineNotInitializedException()
enableTimer = True
with Timer2("infer_easyvc", enableTimer) as t:
# 処理は16Kで実施(Pitch, embed, (infer))
receivedData = cast(
AudioInOut,
resampy.resample(
receivedData,
self.inputSampleRate,
16000,
filter="kaiser_fast",
),
)
crossfade_frame = int((crossfade_frame / self.inputSampleRate) * 16000)
sola_search_frame = int((sola_search_frame / self.inputSampleRate) * 16000)
extra_frame = int((self.settings.extraConvertSize / self.inputSampleRate) * 16000)
# 入力データ生成
data = self.generate_input(receivedData, crossfade_frame, sola_search_frame, extra_frame)
t.record("generate_input")
audio = data[0]
pitchf = data[1]
feature = data[2]
convertSize = data[3]
vol = data[4]
outSize = data[5]
if vol < self.settings.silentThreshold:
return np.zeros(convertSize).astype(np.int16) * np.sqrt(vol)
device = self.pipeline.device
audio = torch.from_numpy(audio).to(device=device, dtype=torch.float32)
repeat = 0
sid = self.settings.dstId
f0_up_key = self.settings.tran
index_rate = self.settings.indexRatio
protect = self.settings.protect
# if_f0 = 1 if self.slotInfo.f0 else 0
if_f0 = 0
# embOutputLayer = self.slotInfo.embOutputLayer
# useFinalProj = self.slotInfo.useFinalProj
t.record("pre-process")
try:
audio_out, self.pitchf_buffer, self.feature_buffer = self.pipeline.exec(
sid,
audio,
pitchf,
feature,
f0_up_key,
index_rate,
if_f0,
# 0,
self.settings.extraConvertSize / self.inputSampleRate if self.settings.silenceFront else 0.0, # extaraDataSizeの秒数。入力のサンプリングレートで算出
repeat,
outSize,
)
t.record("pipeline-exec")
# result = audio_out.detach().cpu().numpy() * np.sqrt(vol)
result = audio_out[-outSize:].detach().cpu().numpy() * np.sqrt(vol)
result = cast(
AudioInOut,
resampy.resample(
result,
16000,
self.outputSampleRate,
filter="kaiser_fast",
),
)
t.record("resample")
return result
except DeviceCannotSupportHalfPrecisionException as e: # NOQA
logger.warn("[Device Manager] Device cannot support half precision. Fallback to float....")
self.deviceManager.setForceTensor(True)
self.initialize()
# raise e
return
def __del__(self):
del self.pipeline
# print("---------- REMOVING ---------------")
# remove_path = os.path.join("RVC")
# sys.path = [x for x in sys.path if x.endswith(remove_path) is False]
# for key in list(sys.modules):
# val = sys.modules.get(key)
# try:
# file_path = val.__file__
# if file_path.find("RVC" + os.path.sep) >= 0:
# # print("remove", key, file_path)
# sys.modules.pop(key)
# except Exception: # type:ignore
# # print(e)
# pass
def export2onnx(self):
modelSlot = self.slotInfo
if modelSlot.isONNX:
logger.warn("[Voice Changer] export2onnx, No pyTorch filepath.")
return {"status": "ng", "path": ""}
if self.pipeline is not None:
del self.pipeline
self.pipeline = None
torch.cuda.empty_cache()
self.initialize()
output_file_simple = export2onnx(self.settings.gpu, modelSlot)
return {
"status": "ok",
"path": f"/tmp/{output_file_simple}",
"filename": output_file_simple,
}
def get_model_current(self):
return [
{
"key": "defaultTune",
"val": self.settings.tran,
},
{
"key": "defaultIndexRatio",
"val": self.settings.indexRatio,
},
{
"key": "defaultProtect",
"val": self.settings.protect,
},
]

View File

@ -0,0 +1,17 @@
import os
from data.ModelSlot import EasyVCModelSlot
from voice_changer.utils.LoadModelParams import LoadModelParams
from voice_changer.utils.ModelSlotGenerator import ModelSlotGenerator
class EasyVCModelSlotGenerator(ModelSlotGenerator):
@classmethod
def loadModel(cls, props: LoadModelParams):
slotInfo: EasyVCModelSlot = EasyVCModelSlot()
for file in props.files:
if file.kind == "easyVCModel":
slotInfo.modelFile = file.name
slotInfo.name = os.path.splitext(os.path.basename(slotInfo.modelFile))[0]
slotInfo.slotIndex = props.slot
return slotInfo

View File

@ -0,0 +1,33 @@
from dataclasses import dataclass, field
from const import PitchExtractorType
@dataclass
class EasyVCSettings:
gpu: int = -9999
dstId: int = 0
f0Detector: PitchExtractorType = "rmvpe_onnx" # dio or harvest
tran: int = 12
silentThreshold: float = 0.00001
extraConvertSize: int = 1024 * 4
indexRatio: float = 0
protect: float = 0.5
rvcQuality: int = 0
silenceFront: int = 1 # 0:off, 1:on
modelSamplingRate: int = 48000
speakers: dict[str, int] = field(default_factory=lambda: {})
intData = [
"gpu",
"dstId",
"tran",
"extraConvertSize",
"rvcQuality",
"silenceFront",
]
floatData = ["silentThreshold", "indexRatio", "protect"]
strData = ["f0Detector"]

View File

@ -0,0 +1,237 @@
from typing import Any
import math
import torch
import torch.nn.functional as F
from torch.cuda.amp import autocast
from Exceptions import (
DeviceCannotSupportHalfPrecisionException,
DeviceChangingException,
HalfPrecisionChangingException,
NotEnoughDataExtimateF0,
)
from mods.log_control import VoiceChangaerLogger
from voice_changer.RVC.embedder.Embedder import Embedder
from voice_changer.RVC.inferencer.Inferencer import Inferencer
from voice_changer.RVC.inferencer.OnnxRVCInferencer import OnnxRVCInferencer
from voice_changer.RVC.inferencer.OnnxRVCInferencerNono import OnnxRVCInferencerNono
from voice_changer.RVC.pitchExtractor.PitchExtractor import PitchExtractor
from voice_changer.utils.Timer import Timer2
logger = VoiceChangaerLogger.get_instance().getLogger()
class Pipeline(object):
embedder: Embedder
inferencer: Inferencer
pitchExtractor: PitchExtractor
index: Any | None
big_npy: Any | None
# feature: Any | None
targetSR: int
device: torch.device
isHalf: bool
def __init__(
self,
embedder: Embedder,
inferencer: Inferencer,
pitchExtractor: PitchExtractor,
targetSR,
device,
isHalf,
):
self.embedder = embedder
self.inferencer = inferencer
self.pitchExtractor = pitchExtractor
logger.info("GENERATE INFERENCER" + str(self.inferencer))
logger.info("GENERATE EMBEDDER" + str(self.embedder))
logger.info("GENERATE PITCH EXTRACTOR" + str(self.pitchExtractor))
self.targetSR = targetSR
self.device = device
self.isHalf = isHalf
self.sr = 16000
self.window = 160
def getPipelineInfo(self):
inferencerInfo = self.inferencer.getInferencerInfo() if self.inferencer else {}
embedderInfo = self.embedder.getEmbedderInfo()
pitchExtractorInfo = self.pitchExtractor.getPitchExtractorInfo()
return {"inferencer": inferencerInfo, "embedder": embedderInfo, "pitchExtractor": pitchExtractorInfo, "isHalf": self.isHalf}
def setPitchExtractor(self, pitchExtractor: PitchExtractor):
self.pitchExtractor = pitchExtractor
def extractPitch(self, audio_pad, if_f0, pitchf, f0_up_key, silence_front):
try:
if if_f0 == 1:
pitch, pitchf = self.pitchExtractor.extract(
audio_pad,
pitchf,
f0_up_key,
self.sr,
self.window,
silence_front=silence_front,
)
# pitch = pitch[:p_len]
# pitchf = pitchf[:p_len]
pitch = torch.tensor(pitch, device=self.device).unsqueeze(0).long()
pitchf = torch.tensor(pitchf, device=self.device, dtype=torch.float).unsqueeze(0)
else:
pitch = None
pitchf = None
except IndexError as e: # NOQA
print(e)
import traceback
traceback.print_exc()
raise NotEnoughDataExtimateF0()
return pitch, pitchf
def extractFeatures(self, feats):
with autocast(enabled=self.isHalf):
try:
feats = self.embedder.extractFeatures(feats)
if torch.isnan(feats).all():
raise DeviceCannotSupportHalfPrecisionException()
return feats
except RuntimeError as e:
if "HALF" in e.__str__().upper():
raise HalfPrecisionChangingException()
elif "same device" in e.__str__():
raise DeviceChangingException()
else:
raise e
def infer(self, feats, p_len, pitch, pitchf, sid, out_size):
try:
with torch.no_grad():
with autocast(enabled=self.isHalf):
audio1 = self.inferencer.infer(feats, p_len, pitch, pitchf, sid, out_size)
audio1 = (audio1 * 32767.5).data.to(dtype=torch.int16)
return audio1
except RuntimeError as e:
if "HALF" in e.__str__().upper():
print("HalfPresicion Error:", e)
raise HalfPrecisionChangingException()
else:
raise e
def exec(
self,
sid,
audio, # torch.tensor [n]
pitchf, # np.array [m]
feature, # np.array [m, feat]
f0_up_key,
index_rate,
if_f0,
silence_front,
repeat,
out_size=None,
):
# print(f"pipeline exec input, audio:{audio.shape}, pitchf:{pitchf.shape}, feature:{feature.shape}")
# print(f"pipeline exec input, silence_front:{silence_front}, out_size:{out_size}")
enablePipelineTimer = True
with Timer2("Pipeline-Exec", enablePipelineTimer) as t: # NOQA
# 16000のサンプリングレートで入ってきている。以降この世界は16000で処理。
# self.t_pad = self.sr * repeat # 1秒
# self.t_pad_tgt = self.targetSR * repeat # 1秒 出力時のトリミング(モデルのサンプリングで出力される)
audio = audio.unsqueeze(0)
quality_padding_sec = (repeat * (audio.shape[1] - 1)) / self.sr # padding(reflect)のサイズは元のサイズより小さい必要がある。
self.t_pad = round(self.sr * quality_padding_sec) # 前後に音声を追加
self.t_pad_tgt = round(self.targetSR * quality_padding_sec) # 前後に音声を追加 出力時のトリミング(モデルのサンプリングで出力される)
audio_pad = F.pad(audio, (self.t_pad, self.t_pad), mode="reflect").squeeze(0)
p_len = audio_pad.shape[0] // self.window
sid = torch.tensor(sid, device=self.device).unsqueeze(0).long()
# # RVC QualityがOnのときにはsilence_frontをオフに。
# silence_front = silence_front if repeat == 0 else 0
# pitchf = pitchf if repeat == 0 else np.zeros(p_len)
# out_size = out_size if repeat == 0 else None
# tensor型調整
feats = audio_pad
if feats.dim() == 2: # double channels
feats = feats.mean(-1)
assert feats.dim() == 1, feats.dim()
feats = feats.view(1, -1)
t.record("pre-process")
# ピッチ検出
pitch, pitchf = self.extractPitch(audio_pad, if_f0, pitchf, f0_up_key, silence_front)
t.record("extract-pitch")
# embedding
feats = self.extractFeatures(feats)
t.record("extract-feats")
feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
# if protect < 0.5 and search_index:
# feats0 = feats.clone()
# ピッチサイズ調整
p_len = audio_pad.shape[0] // self.window
if feats.shape[1] < p_len:
p_len = feats.shape[1]
if pitch is not None and pitchf is not None:
pitch = pitch[:, :p_len]
pitchf = pitchf[:, :p_len]
feats_len = feats.shape[1]
if pitch is not None and pitchf is not None:
pitch = pitch[:, -feats_len:]
pitchf = pitchf[:, -feats_len:]
p_len = torch.tensor([feats_len], device=self.device).long()
# apply silent front for inference
if type(self.inferencer) in [OnnxRVCInferencer, OnnxRVCInferencerNono]:
npyOffset = math.floor(silence_front * 16000) // 360
feats = feats[:, npyOffset * 2 :, :] # NOQA
feats_len = feats.shape[1]
if pitch is not None and pitchf is not None:
pitch = pitch[:, -feats_len:]
pitchf = pitchf[:, -feats_len:]
p_len = torch.tensor([feats_len], device=self.device).long()
t.record("mid-precess")
# 推論実行
audio1 = self.infer(feats, p_len, pitch, pitchf, sid, out_size)
t.record("infer")
feats_buffer = feats.squeeze(0).detach().cpu()
if pitchf is not None:
pitchf_buffer = pitchf.squeeze(0).detach().cpu()
else:
pitchf_buffer = None
del p_len, pitch, pitchf, feats
# torch.cuda.empty_cache()
# inferで出力されるサンプリングレートはモデルのサンプリングレートになる。
# pipelineに入力されるときはhubertように16k
if self.t_pad_tgt != 0:
offset = self.t_pad_tgt
end = -1 * self.t_pad_tgt
audio1 = audio1[offset:end]
del sid
t.record("post-process")
# torch.cuda.empty_cache()
# print("EXEC AVERAGE:", t.avrSecs)
return audio1, pitchf_buffer, feats_buffer
def __del__(self):
del self.embedder
del self.inferencer
del self.pitchExtractor
print("Pipeline has been deleted")

View File

@ -0,0 +1,52 @@
import os
import traceback
from Exceptions import PipelineCreateException
from const import EnumInferenceTypes, PitchExtractorType
from data.ModelSlot import EasyVCModelSlot
from voice_changer.EasyVC.pipeline.Pipeline import Pipeline
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager
from voice_changer.RVC.inferencer.InferencerManager import InferencerManager
from voice_changer.RVC.pitchExtractor.PitchExtractorManager import PitchExtractorManager
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
def createPipeline(params: VoiceChangerParams, modelSlot: EasyVCModelSlot, gpu: int, f0Detector: PitchExtractorType):
dev = DeviceManager.get_instance().getDevice(gpu)
half = DeviceManager.get_instance().halfPrecisionAvailable(gpu)
# Inferencer 生成
try:
modelPath = os.path.join(params.model_dir, str(modelSlot.slotIndex), os.path.basename(modelSlot.modelFile))
inferencer = InferencerManager.getInferencer(EnumInferenceTypes.easyVC, modelPath, gpu, modelSlot.version)
except Exception as e:
print("[Voice Changer] exception! loading inferencer", e)
traceback.print_exc()
raise PipelineCreateException("[Voice Changer] exception! loading inferencer")
# Embedder 生成
try:
embedder = EmbedderManager.getEmbedder(
"whisper",
half,
dev,
)
except Exception as e:
print("[Voice Changer] exception! loading embedder", e, dev)
traceback.print_exc()
raise PipelineCreateException("[Voice Changer] exception! loading embedder")
# pitchExtractor
pitchExtractor = PitchExtractorManager.getPitchExtractor(f0Detector, gpu)
pipeline = Pipeline(
embedder,
inferencer,
pitchExtractor,
modelSlot.samplingRate,
dev,
half,
)
return pipeline

View File

@ -1,6 +1,6 @@
import os
from data.ModelSlot import BeatriceModelSlot, LLVCModelSlot
from data.ModelSlot import LLVCModelSlot
from voice_changer.utils.LoadModelParams import LoadModelParams
from voice_changer.utils.ModelSlotGenerator import ModelSlotGenerator

View File

@ -1,6 +1,7 @@
"""
VoiceChangerV2向け
"""
from dataclasses import asdict
import numpy as np
import torch
@ -59,13 +60,9 @@ class RVCr2(VoiceChangerModel):
# pipelineの生成
try:
self.pipeline = createPipeline(
self.params, self.slotInfo, self.settings.gpu, self.settings.f0Detector
)
self.pipeline = createPipeline(self.params, self.slotInfo, self.settings.gpu, self.settings.f0Detector)
except PipelineCreateException as e: # NOQA
logger.error(
"[Voice Changer] pipeline create failed. check your model is valid."
)
logger.error("[Voice Changer] pipeline create failed. check your model is valid.")
return
# その他の設定
@ -91,9 +88,7 @@ class RVCr2(VoiceChangerModel):
elif key in self.settings.strData:
setattr(self.settings, key, str(val))
if key == "f0Detector" and self.pipeline is not None:
pitchExtractor = PitchExtractorManager.getPitchExtractor(
self.settings.f0Detector, self.settings.gpu
)
pitchExtractor = PitchExtractorManager.getPitchExtractor(self.settings.f0Detector, self.settings.gpu)
self.pipeline.setPitchExtractor(pitchExtractor)
else:
return False
@ -127,9 +122,7 @@ class RVCr2(VoiceChangerModel):
# 過去のデータに連結
self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0)
if self.slotInfo.f0:
self.pitchf_buffer = np.concatenate(
[self.pitchf_buffer, np.zeros(newFeatureLength)], 0
)
self.pitchf_buffer = np.concatenate([self.pitchf_buffer, np.zeros(newFeatureLength)], 0)
self.feature_buffer = np.concatenate(
[
self.feature_buffer,
@ -141,27 +134,19 @@ class RVCr2(VoiceChangerModel):
self.audio_buffer = newData
if self.slotInfo.f0:
self.pitchf_buffer = np.zeros(newFeatureLength)
self.feature_buffer = np.zeros(
[newFeatureLength, self.slotInfo.embChannels]
)
self.feature_buffer = np.zeros([newFeatureLength, self.slotInfo.embChannels])
convertSize = inputSize + crossfadeSize + solaSearchFrame + extra_frame
if convertSize % 160 != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
convertSize = convertSize + (160 - (convertSize % 160))
outSize = int(
((convertSize - extra_frame) / 16000) * self.slotInfo.samplingRate
)
outSize = int(((convertSize - extra_frame) / 16000) * self.slotInfo.samplingRate)
# バッファがたまっていない場合はzeroで補う
if self.audio_buffer.shape[0] < convertSize:
self.audio_buffer = np.concatenate(
[np.zeros([convertSize]), self.audio_buffer]
)
self.audio_buffer = np.concatenate([np.zeros([convertSize]), self.audio_buffer])
if self.slotInfo.f0:
self.pitchf_buffer = np.concatenate(
[np.zeros([convertSize // 160]), self.pitchf_buffer]
)
self.pitchf_buffer = np.concatenate([np.zeros([convertSize // 160]), self.pitchf_buffer])
self.feature_buffer = np.concatenate(
[
np.zeros([convertSize // 160, self.slotInfo.embChannels]),
@ -194,9 +179,7 @@ class RVCr2(VoiceChangerModel):
outSize,
)
def inference(
self, receivedData: AudioInOut, crossfade_frame: int, sola_search_frame: int
):
def inference(self, receivedData: AudioInOut, crossfade_frame: int, sola_search_frame: int):
if self.pipeline is None:
logger.info("[Voice Changer] Pipeline is not initialized.")
raise PipelineNotInitializedException()
@ -208,18 +191,15 @@ class RVCr2(VoiceChangerModel):
receivedData,
self.inputSampleRate,
16000,
filter="kaiser_fast",
),
)
crossfade_frame = int((crossfade_frame / self.inputSampleRate) * 16000)
sola_search_frame = int((sola_search_frame / self.inputSampleRate) * 16000)
extra_frame = int(
(self.settings.extraConvertSize / self.inputSampleRate) * 16000
)
extra_frame = int((self.settings.extraConvertSize / self.inputSampleRate) * 16000)
# 入力データ生成
data = self.generate_input(
receivedData, crossfade_frame, sola_search_frame, extra_frame
)
data = self.generate_input(receivedData, crossfade_frame, sola_search_frame, extra_frame)
audio = data[0]
pitchf = data[1]
@ -254,9 +234,7 @@ class RVCr2(VoiceChangerModel):
index_rate,
if_f0,
# 0,
self.settings.extraConvertSize / self.inputSampleRate
if self.settings.silenceFront
else 0.0, # extaraDataSizeの秒数。入力のサンプリングレートで算出
self.settings.extraConvertSize / self.inputSampleRate if self.settings.silenceFront else 0.0, # extaraDataSizeの秒数。入力のサンプリングレートで算出
embOutputLayer,
useFinalProj,
repeat,
@ -272,14 +250,13 @@ class RVCr2(VoiceChangerModel):
result,
self.slotInfo.samplingRate,
self.outputSampleRate,
filter="kaiser_fast",
),
)
return result
except DeviceCannotSupportHalfPrecisionException as e: # NOQA
logger.warn(
"[Device Manager] Device cannot support half precision. Fallback to float...."
)
logger.warn("[Device Manager] Device cannot support half precision. Fallback to float....")
self.deviceManager.setForceTensor(True)
self.initialize()
# raise e

View File

@ -15,14 +15,6 @@ class Embedder(EmbedderProtocol):
self.model: Any | None = None
def loadModel(self, file: str, dev: device, isHalf: bool = True):
...
def extractFeatures(
self, feats: torch.Tensor, embOutputLayer=9, useFinalProj=True
) -> torch.Tensor:
...
def getEmbedderInfo(self):
return {
"embedderType": self.embedderType,

View File

@ -6,6 +6,7 @@ from voice_changer.RVC.embedder.FairseqContentvec import FairseqContentvec
from voice_changer.RVC.embedder.FairseqHubert import FairseqHubert
from voice_changer.RVC.embedder.FairseqHubertJp import FairseqHubertJp
from voice_changer.RVC.embedder.OnnxContentvec import OnnxContentvec
from voice_changer.RVC.embedder.Whisper import Whisper
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
@ -18,9 +19,7 @@ class EmbedderManager:
cls.params = params
@classmethod
def getEmbedder(
cls, embederType: EmbedderType, isHalf: bool, dev: device
) -> Embedder:
def getEmbedder(cls, embederType: EmbedderType, isHalf: bool, dev: device) -> Embedder:
if cls.currentEmbedder is None:
print("[Voice Changer] generate new embedder. (no embedder)")
cls.currentEmbedder = cls.loadEmbedder(embederType, isHalf, dev)
@ -36,9 +35,7 @@ class EmbedderManager:
return cls.currentEmbedder
@classmethod
def loadEmbedder(
cls, embederType: EmbedderType, isHalf: bool, dev: device
) -> Embedder:
def loadEmbedder(cls, embederType: EmbedderType, isHalf: bool, dev: device) -> Embedder:
if embederType == "hubert_base":
try:
if cls.params.content_vec_500_onnx_on is False:
@ -62,5 +59,8 @@ class EmbedderManager:
print(e)
file = cls.params.hubert_base
return FairseqContentvec().loadModel(file, dev, isHalf)
elif embederType == "whisper":
file = cls.params.whisper_tiny
return Whisper().loadModel(file, dev, isHalf)
else:
return FairseqHubert().loadModel(file, dev, isHalf)

View File

@ -0,0 +1,53 @@
import torch
from torch import device
from voice_changer.RVC.embedder.Embedder import Embedder
from voice_changer.RVC.embedder.whisper.audio import log_mel_spectrogram
from .whisper.whisper import load_model
import numpy as np
import torch.nn.functional as F
class Whisper(Embedder):
def loadModel(self, file: str, dev: device, isHalf: bool = True) -> Embedder:
super().setProps("whisper", file, dev, isHalf)
whisper = load_model(file).to(dev)
self.model = whisper
return self
def extractFeatures(self, audio: torch.Tensor) -> torch.Tensor:
try:
if isinstance(audio, np.ndarray):
audio = torch.from_numpy(audio.astype(np.float32))
audio = audio.to(self.dev)
# if self.isHalf and audio.dtype != torch.float16:
# audio = audio.half()
if self.isHalf is False and audio.dtype != torch.float32:
audio = audio.float()
if audio.dim() != 1:
audio = audio.squeeze(0)
if audio.dim() != 1:
raise RuntimeError(f"Exeption in {self.__class__.__name__} audio.dim is not 1 (size :{audio.dim()}, {audio.shape})")
audln = audio.shape[0]
ppgln = audln // 320
mel = log_mel_spectrogram(audio).to(self.model.device)
# print(f"[whisper_ppg] audio:{audio.shape}({audio.shape[0]/16000}ms) -> ppg:{ppgln}")
# print(f"[whisper_ppg] mel:{mel.shape}({mel.dtype})")
with torch.no_grad():
ppg = self.model.encoder(mel.unsqueeze(0))
padding = (0, 384)
ppg_padded = F.pad(ppg, padding, "constant", 0)
ppg_padded = ppg_padded.data
# print(f"[whisper_ppg] ppg:{ppg.shape}")
except Exception as e:
print(e)
raise RuntimeError(f"Exeption in {self.__class__.__name__}", e)
# raise EmbedderProcessException(f"Exeption in {self.__class__.__name__}", e)
return ppg_padded

View File

@ -0,0 +1,120 @@
import os
from functools import lru_cache
from typing import Union
import numpy as np
import torch
import torch.nn.functional as F
from voice_changer.RVC.embedder.whisper.utils import exact_div
# hard-coded audio hyperparameters
SAMPLE_RATE = 16000
N_FFT = 400
N_MELS = 80
HOP_LENGTH = 160
CHUNK_LENGTH = 30
N_SAMPLES = CHUNK_LENGTH * SAMPLE_RATE # 480000: number of samples in a chunk
N_FRAMES = exact_div(N_SAMPLES, HOP_LENGTH) # 3000: number of frames in a mel spectrogram input
# def load_audio(file: str, sr: int = SAMPLE_RATE):
# """
# Open an audio file and read as mono waveform, resampling as necessary
# Parameters
# ----------
# file: str
# The audio file to open
# sr: int
# The sample rate to resample the audio if necessary
# Returns
# -------
# A NumPy array containing the audio waveform, in float32 dtype.
# """
# try:
# # This launches a subprocess to decode audio while down-mixing and resampling as necessary.
# # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
# out, _ = ffmpeg.input(file, threads=0).output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=sr).run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
# except ffmpeg.Error as e:
# raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
# return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
def pad_or_trim(array, length: int = N_SAMPLES, *, axis: int = -1):
"""
Pad or trim the audio array to N_SAMPLES, as expected by the encoder.
"""
if torch.is_tensor(array):
if array.shape[axis] > length:
array = array.index_select(dim=axis, index=torch.arange(length, device=array.device))
if array.shape[axis] < length:
pad_widths = [(0, 0)] * array.ndim
pad_widths[axis] = (0, length - array.shape[axis])
array = F.pad(array, [pad for sizes in pad_widths[::-1] for pad in sizes])
else:
if array.shape[axis] > length:
array = array.take(indices=range(length), axis=axis)
if array.shape[axis] < length:
pad_widths = [(0, 0)] * array.ndim
pad_widths[axis] = (0, length - array.shape[axis])
array = np.pad(array, pad_widths)
return array
@lru_cache(maxsize=None)
def mel_filters(device, n_mels: int = N_MELS) -> torch.Tensor:
"""
load the mel filterbank matrix for projecting STFT into a Mel spectrogram.
Allows decoupling librosa dependency; saved using:
np.savez_compressed(
"mel_filters.npz",
mel_80=librosa.filters.mel(sr=16000, n_fft=400, n_mels=80),
)
"""
assert n_mels == 80, f"Unsupported n_mels: {n_mels}"
with np.load(os.path.join(os.path.dirname(__file__), "assets", "mel_filters.npz")) as f:
return torch.from_numpy(f[f"mel_{n_mels}"]).to(device)
def log_mel_spectrogram(audio: Union[str, np.ndarray, torch.Tensor], n_mels: int = N_MELS):
"""
Compute the log-Mel spectrogram of
Parameters
----------
audio: Union[str, np.ndarray, torch.Tensor], shape = (*)
The path to audio or either a NumPy array or Tensor containing the audio waveform in 16 kHz
n_mels: int
The number of Mel-frequency filters, only 80 is supported
Returns
-------
torch.Tensor, shape = (80, n_frames)
A Tensor that contains the Mel spectrogram
"""
if not torch.is_tensor(audio):
if isinstance(audio, str):
audio = load_audio(audio)
audio = torch.from_numpy(audio)
window = torch.hann_window(N_FFT).to(audio.device) # type: ignore
stft = torch.stft(audio, N_FFT, HOP_LENGTH, window=window, return_complex=True) # type: ignore
magnitudes = stft[..., :-1].abs() ** 2
filters = mel_filters(audio.device, n_mels) # type: ignore
mel_spec = filters @ magnitudes
log_spec = torch.clamp(mel_spec, min=1e-10).log10()
log_spec = torch.maximum(log_spec, log_spec.max() - 8.0)
log_spec = (log_spec + 4.0) / 4.0
return log_spec

View File

@ -0,0 +1,222 @@
from dataclasses import dataclass
from typing import Dict
from typing import Iterable, Optional
import numpy as np
import torch
import torch.nn.functional as F
from torch import Tensor
from torch import nn
# from .decoding import detect_language as detect_language_function, decode as decode_function
@dataclass
class ModelDimensions:
n_mels: int
n_audio_ctx: int
n_audio_state: int
n_audio_head: int
n_audio_layer: int
n_vocab: int
n_text_ctx: int
n_text_state: int
n_text_head: int
n_text_layer: int
class LayerNorm(nn.LayerNorm):
def forward(self, x: Tensor) -> Tensor:
return super().forward(x.float()).type(x.dtype)
class Linear(nn.Linear):
def forward(self, x: Tensor) -> Tensor:
return F.linear(x, self.weight.to(x.dtype), None if self.bias is None else self.bias.to(x.dtype))
class Conv1d(nn.Conv1d):
def _conv_forward(self, x: Tensor, weight: Tensor, bias: Optional[Tensor]) -> Tensor:
return super()._conv_forward(x, weight.to(x.dtype), None if bias is None else bias.to(x.dtype))
def sinusoids(length, channels, max_timescale=10000):
"""Returns sinusoids for positional embedding"""
assert channels % 2 == 0
log_timescale_increment = np.log(max_timescale) / (channels // 2 - 1)
inv_timescales = torch.exp(-log_timescale_increment * torch.arange(channels // 2))
scaled_time = torch.arange(length)[:, np.newaxis] * inv_timescales[np.newaxis, :]
return torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], dim=1)
class MultiHeadAttention(nn.Module):
def __init__(self, n_state: int, n_head: int):
super().__init__()
self.n_head = n_head
self.query = Linear(n_state, n_state)
self.key = Linear(n_state, n_state, bias=False)
self.value = Linear(n_state, n_state)
self.out = Linear(n_state, n_state)
def forward(
self,
x: Tensor,
xa: Optional[Tensor] = None,
mask: Optional[Tensor] = None,
kv_cache: Optional[dict] = None,
):
q = self.query(x)
if kv_cache is None or xa is None or self.key not in kv_cache:
# hooks, if installed (i.e. kv_cache is not None), will prepend the cached kv tensors;
# otherwise, perform key/value projections for self- or cross-attention as usual.
k = self.key(x if xa is None else xa)
v = self.value(x if xa is None else xa)
else:
# for cross-attention, calculate keys and values once and reuse in subsequent calls.
k = kv_cache[self.key]
v = kv_cache[self.value]
wv, qk = self.qkv_attention(q, k, v, mask)
return self.out(wv), qk
def qkv_attention(self, q: Tensor, k: Tensor, v: Tensor, mask: Optional[Tensor] = None):
n_batch, n_ctx, n_state = q.shape
scale = (n_state // self.n_head) ** -0.25
q = q.view(*q.shape[:2], self.n_head, -1).permute(0, 2, 1, 3) * scale
k = k.view(*k.shape[:2], self.n_head, -1).permute(0, 2, 3, 1) * scale
v = v.view(*v.shape[:2], self.n_head, -1).permute(0, 2, 1, 3)
qk = q @ k
if mask is not None:
qk = qk + mask[:n_ctx, :n_ctx]
qk = qk.float()
w = F.softmax(qk, dim=-1).to(q.dtype)
return (w @ v).permute(0, 2, 1, 3).flatten(start_dim=2), qk.detach()
class ResidualAttentionBlock(nn.Module):
def __init__(self, n_state: int, n_head: int, cross_attention: bool = False):
super().__init__()
self.attn = MultiHeadAttention(n_state, n_head)
self.attn_ln = LayerNorm(n_state)
self.cross_attn = MultiHeadAttention(n_state, n_head) if cross_attention else None
self.cross_attn_ln = LayerNorm(n_state) if cross_attention else None
n_mlp = n_state * 4
self.mlp = nn.Sequential(Linear(n_state, n_mlp), nn.GELU(), Linear(n_mlp, n_state))
self.mlp_ln = LayerNorm(n_state)
def forward(
self,
x: Tensor,
xa: Optional[Tensor] = None,
mask: Optional[Tensor] = None,
kv_cache: Optional[dict] = None,
):
x = x + self.attn(self.attn_ln(x), mask=mask, kv_cache=kv_cache)[0]
if self.cross_attn and self.cross_attn_ln:
x = x + self.cross_attn(self.cross_attn_ln(x), xa, kv_cache=kv_cache)[0]
x = x + self.mlp(self.mlp_ln(x))
return x
class AudioEncoder(nn.Module):
def __init__(self, n_mels: int, n_ctx: int, n_state: int, n_head: int, n_layer: int):
super().__init__()
self.conv1 = Conv1d(n_mels, n_state, kernel_size=3, padding=1)
self.conv2 = Conv1d(n_state, n_state, kernel_size=3, stride=2, padding=1)
self.register_buffer("positional_embedding", sinusoids(n_ctx, n_state))
self.blocks: Iterable[ResidualAttentionBlock] = nn.ModuleList([ResidualAttentionBlock(n_state, n_head) for _ in range(n_layer)])
self.ln_post = LayerNorm(n_state)
def forward(self, x: Tensor):
"""
x : torch.Tensor, shape = (batch_size, n_mels, n_ctx)
the mel spectrogram of the audio
"""
x = F.gelu(self.conv1(x))
x = F.gelu(self.conv2(x))
x = x.permute(0, 2, 1)
x = (x[:, :, :] + self.positional_embedding[: x.shape[1], :]).to(x.dtype)
for j, block in enumerate(self.blocks):
x = block(x)
x = self.ln_post(x)
return x
class TextDecoder(nn.Module):
def __init__(self, n_vocab: int, n_ctx: int, n_state: int, n_head: int, n_layer: int):
super().__init__()
self.token_embedding = nn.Embedding(n_vocab, n_state)
self.positional_embedding = nn.Parameter(torch.empty(n_ctx, n_state))
self.blocks: Iterable[ResidualAttentionBlock] = nn.ModuleList([ResidualAttentionBlock(n_state, n_head, cross_attention=True) for _ in range(n_layer)])
self.ln = LayerNorm(n_state)
mask = torch.empty(n_ctx, n_ctx).fill_(-np.inf).triu_(1)
self.register_buffer("mask", mask, persistent=False)
def forward(self, x: Tensor, xa: Tensor, kv_cache: Optional[dict] = None):
"""
x : torch.LongTensor, shape = (batch_size, <= n_ctx)
the text tokens
xa : torch.Tensor, shape = (batch_size, n_mels, n_audio_ctx)
the encoded audio features to be attended on
"""
offset = next(iter(kv_cache.values())).shape[1] if kv_cache else 0
x = self.token_embedding(x) + self.positional_embedding[offset : offset + x.shape[-1]]
x = x.to(xa.dtype)
for block in self.blocks:
x = block(x, xa, mask=self.mask, kv_cache=kv_cache)
x = self.ln(x)
logits = (x @ torch.transpose(self.token_embedding.weight.to(x.dtype), 0, 1)).float()
return logits
class Whisper(nn.Module):
def __init__(self, dims: ModelDimensions):
super().__init__()
self.dims = dims
self.encoder = AudioEncoder(
self.dims.n_mels,
self.dims.n_audio_ctx,
self.dims.n_audio_state,
self.dims.n_audio_head,
self.dims.n_audio_layer,
)
self.decoder = TextDecoder(
self.dims.n_vocab,
self.dims.n_text_ctx,
self.dims.n_text_state,
self.dims.n_text_head,
self.dims.n_text_layer,
)
def embed_audio(self, mel: torch.Tensor):
return self.encoder(mel)
def logits(self, tokens: torch.Tensor, audio_features: torch.Tensor):
return self.decoder(tokens, audio_features)
def forward(self, mel: torch.Tensor, tokens: torch.Tensor) -> Dict[str, torch.Tensor]:
return self.decoder(tokens, self.encoder(mel))
@property
def device(self):
return next(self.parameters()).device
@property
def is_multilingual(self):
return self.dims.n_vocab == 51865

View File

@ -0,0 +1,22 @@
import sys
system_encoding = sys.getdefaultencoding()
if system_encoding != "utf-8":
def make_safe(string):
# replaces any character not representable using the system default encoding with an '?',
# avoiding UnicodeEncodeError (https://github.com/openai/whisper/discussions/729).
return string.encode(system_encoding, errors="replace").decode(system_encoding)
else:
def make_safe(string):
# utf-8 can encode any Unicode code point, so no need to do the round-trip encoding
return string
def exact_div(x, y):
assert x % y == 0
return x // y

View File

@ -0,0 +1,208 @@
# from whisper_ppg.model import Whisper, ModelDimensions
# from whisper_ppg_custom._LightWhisper import LightWhisper
# from whisper_ppg_custom.Timer import Timer2
# from whisper_ppg_custom.whisper_ppg.audio import load_audio, pad_or_trim, log_mel_spectrogram
# from whisper_ppg_custom.whisper_ppg.model import Whisper, ModelDimensions
import torch
# import numpy as np
# from easy_vc_dev.utils.whisper.audio import load_audio, pad_or_trim
from .model import ModelDimensions, Whisper
# import onnx
# from onnxsim import simplify
# import json
# import onnxruntime
def load_model(path) -> Whisper:
device = "cpu"
checkpoint = torch.load(path, map_location=device)
dims = ModelDimensions(**checkpoint["dims"])
model = Whisper(dims)
model.load_state_dict(checkpoint["model_state_dict"])
model = model.to(device)
return model
# def pred_ppg(whisper: Whisper, wavPath: str, ppgPath: str):
# print("pred")
# # whisper = load_model("base.pt") # "base": "https://openaipublic.azureedge.net/main/whisper/models/ed3a0b6b1c0edf879ad9b11b1af5a0e6ab5db9205f891f668f8b0e6c6326e34e/base.pt"
# audio = load_audio(wavPath)
# audln = audio.shape[0]
# ppgln = audln // 320
# print("audio.shape1", audio.shape, audio.shape[0] / 16000)
# audio = pad_or_trim(audio)
# audio = audio[:400000]
# print("audio.shape2", audio.shape)
# print(f"whisper.device {whisper.device}")
# for i in range(5):
# with Timer2("mainPorcess timer", True) as t:
# mel = log_mel_spectrogram(audio).to(whisper.device)
# with torch.no_grad():
# ppg = whisper.encoder(mel.unsqueeze(0)).squeeze().data.cpu().float().numpy()
# print("ppg.shape", ppg.shape)
# ppg = ppg[:ppgln,]
# print(ppg.shape)
# np.save(ppgPath, ppg, allow_pickle=False)
# t.record("fin")
# print("res", ppg)
# def pred_ppg_onnx(wavPath, ppgPath):
# print("pred")
# # whisper = load_model("base.pt") # "base": "https://openaipublic.azureedge.net/main/whisper/models/ed3a0b6b1c0edf879ad9b11b1af5a0e6ab5db9205f891f668f8b0e6c6326e34e/base.pt"
# whisper = load_model("tiny.pt")
# audio = load_audio(wavPath)
# # audln = audio.shape[0]
# # ppgln = audln // 320
# print("audio.shape1", audio.shape, audio.shape[0] / 16000)
# audio = pad_or_trim(audio)
# audio = audio[:1000]
# print("audio.shape2", audio.shape)
# print(f"whisper.device {whisper.device}")
# onnx_session = onnxruntime.InferenceSession(
# "wencoder_sim.onnx",
# providers=["CPUExecutionProvider"],
# provider_options=[
# {
# "intra_op_num_threads": 8,
# "execution_mode": onnxruntime.ExecutionMode.ORT_PARALLEL,
# "inter_op_num_threads": 8,
# }
# ],
# )
# for i in range(5):
# with Timer2("mainPorcess timer", True) as t:
# mel = log_mel_spectrogram(audio).to(whisper.device).unsqueeze(0)
# onnx_res = onnx_session.run(
# ["ppg"],
# {
# "mel": mel.cpu().numpy(),
# },
# )
# t.record("fin")
# print("onnx_res", onnx_res)
# def export_encoder(wavPath, ppgPath):
# print("pred")
# # whisper = load_model("base.pt") # "base": "https://openaipublic.azureedge.net/main/whisper/models/ed3a0b6b1c0edf879ad9b11b1af5a0e6ab5db9205f891f668f8b0e6c6326e34e/base.pt"
# whisper = load_model("tiny.pt")
# audio = load_audio(wavPath)
# # audln = audio.shape[0]
# # ppgln = audln // 320
# print("audio.shape1", audio.shape, audio.shape[0] / 16000)
# audio = pad_or_trim(audio)
# print("audio.shape2", audio.shape)
# print(f"whisper.device {whisper.device}")
# mel = log_mel_spectrogram(audio).to(whisper.device).unsqueeze(0)
# input_names = ["mel"]
# output_names = ["ppg"]
# torch.onnx.export(
# whisper.encoder,
# (mel,),
# "wencoder.onnx",
# dynamic_axes={
# "mel": [2],
# },
# do_constant_folding=False,
# opset_version=17,
# verbose=False,
# input_names=input_names,
# output_names=output_names,
# )
# metadata = {
# "application": "VC_CLIENT",
# "version": "2.1",
# }
# model_onnx2 = onnx.load("wencoder.onnx")
# model_simp, check = simplify(model_onnx2)
# meta = model_simp.metadata_props.add()
# meta.key = "metadata"
# meta.value = json.dumps(metadata)
# onnx.save(model_simp, "wencoder_sim.onnx")
# def pred_ppg_onnx_w(wavPath, ppgPath):
# print("pred")
# audio = load_audio(wavPath)
# print("audio.shape1", audio.shape, audio.shape[0] / 16000)
# audio = pad_or_trim(audio)
# print("audio.shape2", audio.shape)
# onnx_session = onnxruntime.InferenceSession(
# "wencoder_sim.onnx",
# providers=["CPUExecutionProvider"],
# provider_options=[
# {
# "intra_op_num_threads": 8,
# "execution_mode": onnxruntime.ExecutionMode.ORT_PARALLEL,
# "inter_op_num_threads": 8,
# }
# ],
# )
# for i in range(5):
# with Timer2("mainPorcess timer", True) as t:
# mel = log_mel_spectrogram(audio).to("cpu").unsqueeze(0)
# # mel = mel[:, :, 1500:]
# mel = mel[:, :, 2500:]
# # mel[0, 79, 1499] = 0.1
# print("x.shape", mel.shape)
# onnx_res = onnx_session.run(
# ["ppg"],
# {
# "mel": mel.cpu().numpy(),
# },
# )
# t.record("fin")
# print("onnx_res", onnx_res)
# def export_wrapped_encoder(wavPath, ppgPath):
# print("pred")
# whisper = LightWhisper("tiny.pt")
# audio = load_audio(wavPath)
# # audln = audio.shape[0]
# # ppgln = audln // 320
# print("audio.shape1", audio.shape, audio.shape[0] / 16000)
# audio = pad_or_trim(audio)
# print("audio.shape2", audio.shape)
# mel = log_mel_spectrogram(audio).to("cpu").unsqueeze(0)
# mel = mel[:, :, 1500:]
# input_names = ["mel"]
# output_names = ["ppg"]
# torch.onnx.export(
# whisper,
# (mel,),
# "wencoder.onnx",
# dynamic_axes={
# "mel": [2],
# },
# do_constant_folding=True,
# opset_version=17,
# verbose=False,
# input_names=input_names,
# output_names=output_names,
# )
# metadata = {
# "application": "VC_CLIENT",
# "version": "2.1",
# }
# model_onnx2 = onnx.load("wencoder.onnx")
# model_simp, check = simplify(model_onnx2)
# meta = model_simp.metadata_props.add()
# meta.key = "metadata"
# meta.value = json.dumps(metadata)
# onnx.save(model_simp, "wencoder_sim.onnx")

View File

@ -0,0 +1,46 @@
import torch
import numpy as np
from const import EnumInferenceTypes
from voice_changer.RVC.inferencer.OnnxRVCInferencer import OnnxRVCInferencer
class EasyVCInferencerONNX(OnnxRVCInferencer):
def loadModel(self, file: str, gpu: int, inferencerTypeVersion: str | None = None):
super().loadModel(file, gpu, inferencerTypeVersion)
self.setProps(EnumInferenceTypes.easyVC, file, self.isHalf, gpu)
return self
def infer(
self,
feats: torch.Tensor,
pitch_length: torch.Tensor,
pitch: torch.Tensor | None,
pitchf: torch.Tensor | None,
sid: torch.Tensor,
convert_length: int | None,
) -> torch.Tensor:
if self.isHalf:
audio1 = self.model.run(
["audio"],
{
"feats": feats.cpu().numpy().astype(np.float16),
"p_len": pitch_length.cpu().numpy().astype(np.int64),
},
)
else:
audio1 = self.model.run(
["audio"],
{
"feats": feats.cpu().numpy().astype(np.float32),
"p_len": pitch_length.cpu().numpy().astype(np.int64),
},
)
res = audio1[0][0][0]
# if self.inferencerTypeVersion == "v2.1" or self.inferencerTypeVersion == "v1.1":
# res = audio1[0]
# else:
# res = np.array(audio1)[0][0, 0]
# res = np.clip(res, -1.0, 1.0)
return torch.tensor(res)

View File

@ -1,4 +1,5 @@
from const import EnumInferenceTypes
from voice_changer.RVC.inferencer.EasyVCInferencerONNX import EasyVCInferencerONNX
from voice_changer.RVC.inferencer.Inferencer import Inferencer
from voice_changer.RVC.inferencer.OnnxRVCInferencer import OnnxRVCInferencer
from voice_changer.RVC.inferencer.OnnxRVCInferencerNono import OnnxRVCInferencerNono
@ -42,6 +43,7 @@ class InferencerManager:
elif inferencerType == EnumInferenceTypes.pyTorchVoRASbeta or inferencerType == EnumInferenceTypes.pyTorchVoRASbeta.value:
if sys.platform.startswith("darwin") is False:
from voice_changer.RVC.inferencer.VorasInferencebeta import VoRASInferencer
return VoRASInferencer().loadModel(file, gpu)
else:
raise RuntimeError("[Voice Changer] VoRAS is not supported on macOS")
@ -55,5 +57,8 @@ class InferencerManager:
return OnnxRVCInferencer().loadModel(file, gpu, inferencerTypeVersion)
elif inferencerType == EnumInferenceTypes.onnxRVCNono or inferencerType == EnumInferenceTypes.onnxRVCNono.value:
return OnnxRVCInferencerNono().loadModel(file, gpu, inferencerTypeVersion)
elif inferencerType == EnumInferenceTypes.easyVC or inferencerType == EnumInferenceTypes.easyVC.value:
return EasyVCInferencerONNX().loadModel(file, gpu)
else:
raise RuntimeError("[Voice Changer] Inferencer not found", inferencerType)

View File

@ -206,6 +206,12 @@ class VoiceChangerManager(ServerDeviceCallbacks):
slotInfo = LLVCModelSlotGenerator.loadModel(params)
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
elif params.voiceChangerType == "EasyVC":
from voice_changer.EasyVC.EasyVCModelSlotGenerator import EasyVCModelSlotGenerator
slotInfo = EasyVCModelSlotGenerator.loadModel(params)
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
logger.info(f"params, {params}")
def get_info(self):
@ -307,6 +313,15 @@ class VoiceChangerManager(ServerDeviceCallbacks):
self.voiceChanger.setModel(self.voiceChangerModel)
pass
elif slotInfo.voiceChangerType == "EasyVC":
logger.info("................EasyVC")
from voice_changer.EasyVC.EasyVC import EasyVC
self.voiceChangerModel = EasyVC(self.params, slotInfo)
self.voiceChanger = VoiceChangerV2(self.params)
self.voiceChanger.setModel(self.voiceChangerModel)
pass
else:
logger.info(f"[Voice Changer] unknown voice changer model: {slotInfo.voiceChangerType}")
if hasattr(self, "voiceChangerModel"):

View File

@ -216,8 +216,8 @@ class VoiceChangerV2(VoiceChangerIF):
try:
if self.voiceChanger is None:
raise VoiceChangerIsNotSelectedException("Voice Changer is not selected.")
with Timer2("main-process", False) as t:
enableMainprocessTimer = False
with Timer2("main-process", enableMainprocessTimer) as t:
processing_sampling_rate = self.voiceChanger.get_processing_sampling_rate()
if self.noCrossFade: # Beatrice, LLVC
@ -234,12 +234,14 @@ class VoiceChangerV2(VoiceChangerIF):
block_frame = receivedData.shape[0]
crossfade_frame = min(self.settings.crossFadeOverlapSize, block_frame)
self._generate_strength(crossfade_frame)
t.record("generate_strength")
audio = self.voiceChanger.inference(
receivedData,
crossfade_frame=crossfade_frame,
sola_search_frame=sola_search_frame,
)
t.record("inference")
if hasattr(self, "sola_buffer") is True:
np.set_printoptions(threshold=10000)
@ -271,6 +273,8 @@ class VoiceChangerV2(VoiceChangerIF):
logger.info("[Voice Changer] warming up... generating sola buffer.")
result = np.zeros(4096).astype(np.int16)
t.record("sora")
if hasattr(self, "sola_buffer") is True and sola_offset < sola_search_frame:
offset = -1 * (sola_search_frame + crossfade_frame - sola_offset)
end = -1 * (sola_search_frame - sola_offset)
@ -280,6 +284,8 @@ class VoiceChangerV2(VoiceChangerIF):
self.sola_buffer = audio[-crossfade_frame:] * self.np_prev_strength
# self.sola_buffer = audio[- crossfade_frame:]
t.record("post")
mainprocess_time = t.secs
# 後処理

View File

@ -22,6 +22,7 @@ LoadModelParamFileKind: TypeAlias = Literal[
"beatriceModel",
"llvcModel",
"llvcConfig",
"easyVCModel",
]

View File

@ -57,7 +57,7 @@ class Timer2(object):
if self.enable is False:
return
self.maxStores = 10
self.maxStores = 1
current_frame = inspect.currentframe()
caller_frame = inspect.getouterframes(current_frame, 2)

View File

@ -16,3 +16,4 @@ class VoiceChangerParams:
crepe_onnx_tiny: str
rmvpe: str
rmvpe_onnx: str
whisper_tiny: str