WIP:client work v000

This commit is contained in:
wataru 2023-01-05 02:28:36 +09:00
parent 60db7877e2
commit e74752f548
36 changed files with 29916 additions and 12 deletions

3
.gitignore vendored
View File

@ -9,3 +9,6 @@ server/info
server/memo.txt server/memo.txt
client/lib/dist
client/lib/worklet/dist
client/demo/dist/

18
client/demo/.eslintrc.js Normal file
View File

@ -0,0 +1,18 @@
module.exports = {
env: {
browser: true,
es2021: true,
node: true,
},
extends: ["eslint:recommended", "plugin:react/recommended", "plugin:@typescript-eslint/recommended"],
parser: "@typescript-eslint/parser",
parserOptions: {
ecmaFeatures: {
jsx: true,
},
ecmaVersion: 13,
sourceType: "module",
},
plugins: ["react", "@typescript-eslint"],
rules: {},
};

6
client/demo/.prettierrc Normal file
View File

@ -0,0 +1,6 @@
{
"tabWidth": 4,
"useTabs": false,
"semi": true,
"printWidth": 360
}

15183
client/demo/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

55
client/demo/package.json Normal file
View File

@ -0,0 +1,55 @@
{
"name": "demo",
"version": "1.0.0",
"description": "",
"main": ".eslintrc.js",
"scripts": {
"clean": "rimraf dist/*",
"webpack:prod": "npx webpack --config webpack.prod.js",
"webpack:dev": "npx webpack --config webpack.dev.js",
"build:prod": "npm-run-all clean webpack:prod",
"build:dev": "npm-run-all clean webpack:dev",
"start": "webpack-dev-server --config webpack.dev.js",
"test": "echo \"Error: no test specified\" && exit 1"
},
"keywords": [],
"author": "",
"license": "ISC",
"devDependencies": {
"@babel/plugin-transform-runtime": "^7.19.6",
"@babel/preset-env": "^7.20.2",
"@babel/preset-react": "^7.18.6",
"@babel/preset-typescript": "^7.18.6",
"@types/node": "^18.11.17",
"@types/react": "^18.0.26",
"@types/react-dom": "^18.0.9",
"autoprefixer": "^10.4.13",
"babel-loader": "^9.1.0",
"copy-webpack-plugin": "^11.0.0",
"css-loader": "^6.7.3",
"eslint": "^8.30.0",
"eslint-config-prettier": "^8.5.0",
"eslint-plugin-prettier": "^4.2.1",
"eslint-plugin-react": "^7.31.11",
"eslint-webpack-plugin": "^3.2.0",
"html-loader": "^4.2.0",
"html-webpack-plugin": "^5.5.0",
"npm-run-all": "^4.1.5",
"postcss-loader": "^7.0.2",
"postcss-nested": "^6.0.0",
"prettier": "^2.8.1",
"rimraf": "^3.0.2",
"style-loader": "^3.3.1",
"ts-loader": "^9.4.2",
"tsconfig-paths": "^4.1.1",
"typescript": "^4.9.4",
"webpack": "^5.75.0",
"webpack-cli": "^5.0.1",
"webpack-dev-server": "^4.11.1"
},
"dependencies": {
"@dannadori/voice-changer-client-js": "file:../lib",
"react": "^18.2.0",
"react-dom": "^18.2.0"
}
}

View File

@ -0,0 +1,6 @@
module.exports = {
plugins: {
autoprefixer: {},
"postcss-nested": {},
},
};

View File

@ -0,0 +1,10 @@
<!DOCTYPE html>
<html style="width: 100%; height: 100%; overflow: hidden">
<head>
<meta charset="utf-8" />
<title>Voice Changer Client Demo</title>
</head>
<body style="width: 100%; height: 100%; margin: 0px">
<div id="app" style="width: 100%; height: 100%"></div>
</body>
</html>

1
client/demo/src/const.ts Normal file
View File

@ -0,0 +1 @@
export const CHROME_EXTENSION = false

174
client/demo/src/css/App.css Normal file
View File

@ -0,0 +1,174 @@
@import url("https://fonts.googleapis.com/css2?family=Chicle&family=Poppins:ital,wght@0,200;0,400;0,600;1,200;1,400;1,600&display=swap");
@import url("https://fonts.googleapis.com/css2?family=Yusei+Magic&display=swap");
:root {
--text-color: #333;
--company-color1: rgba(64, 119, 187, 1);
--company-color2: rgba(29, 47, 78, 1);
--company-color3: rgba(255, 255, 255, 1);
--company-color1-alpha: rgba(64, 119, 187, 0.3);
--company-color2-alpha: rgba(29, 47, 78, 0.3);
--company-color3-alpha: rgba(255, 255, 255, 0.3);
--global-shadow-color: rgba(0, 0, 0, 0.4);
--sidebar-transition-time: 0.3s;
--sidebar-transition-time-quick: 0.1s;
--sidebar-transition-animation: ease-in-out;
--header-height: 1.5rem;
--right-sidebar-width: 320px;
--dialog-border-color: rgba(100, 100, 100, 1);
--dialog-shadow-color: rgba(0, 0, 0, 0.3);
--dialog-background-color: rgba(255, 255, 255, 1);
--dialog-primary-color: rgba(19, 70, 209, 1);
--dialog-active-color: rgba(40, 70, 209, 1);
--dialog-input-border-color: rgba(200, 200, 200, 1);
--dialog-submit-button-color: rgba(180, 190, 230, 1);
--dialog-cancel-button-color: rgba(235, 80, 80, 1);
--body-video-seeker-height: 3rem;
}
* {
margin: 0;
padding: 0;
box-sizing: border-box;
font-family: "Poppins", sans-serif;
}
html {
font-size: 16px;
}
body {
height: 100%;
width: 100%;
color: var(--text-color);
background: linear-gradient(45deg, var(--company-color1) 0, 5%, var(--company-color2) 5% 10%, var(--company-color3) 10% 80%, var(--company-color1) 80% 85%, var(--company-color2) 85% 100%);
}
#app {
height: 100%;
width: 100%;
}
.body {
height: 100%;
width: 100%;
margin: 1rem;
font-family: "Yusei Magic", sans-serif;
display: flex;
flex-direction: column;
font-size: 1rem;
overflow-y: scroll;
}
.body-row {
}
.split-4-6 {
display: flex;
width: 100%;
justify-content: center;
margin: 1px 0px 1px 0px;
& > div:nth-child(1) {
left: 0px;
width: 40%;
}
& > div:nth-child(2) {
left: 40%;
width: 60%;
}
}
.split-3-7 {
display: flex;
width: 100%;
justify-content: center;
margin: 1px 0px 1px 0px;
& > div:nth-child(1) {
left: 0px;
width: 30%;
}
& > div:nth-child(2) {
left: 30%;
width: 70%;
}
}
.split-3-3-4 {
display: flex;
width: 100%;
justify-content: center;
margin: 1px 0px 1px 0px;
& > div:nth-child(1) {
left: 0px;
width: 30%;
}
& > div:nth-child(2) {
left: 30%;
width: 30%;
}
& > div:nth-child(3) {
left: 60%;
width: 40%;
}
}
.split-3-1-2-4 {
display: flex;
width: 100%;
justify-content: center;
margin: 1px 0px 1px 0px;
& > div:nth-child(1) {
left: 0px;
width: 30%;
}
& > div:nth-child(2) {
left: 30%;
width: 10%;
}
& > div:nth-child(3) {
left: 40%;
width: 20%;
}
& > div:nth-child(4) {
left: 60%;
width: 40%;
}
}
.underline {
border-bottom: 3px solid #333;
}
.left-padding-1 {
padding-left: 1rem;
}
.left-padding-2 {
padding-left: 2rem;
}
.highlight {
background-color: rgba(200, 200, 255, 0.1);
}
.body-top-title {
font-size: 3rem;
}
.body-section-title {
font-size: 1.5rem;
color: rgb(51, 49, 49);
}
.body-item-title {
color: rgb(51, 99, 49);
}
.body-item-text {
color: rgb(30, 30, 30);
}
.body-button-container {
display: flex;
flex-direction: row;
.body-button {
user-select: none;
border: solid 1px #333;
border-radius: 2px;
padding: 2px;
}
}
.body-select-container {
.body-select {
color: rgb(30, 30, 30);
}
}

85
client/demo/src/index.tsx Normal file
View File

@ -0,0 +1,85 @@
import * as React from "react";
import { createRoot } from "react-dom/client";
import "./css/App.css"
import { useEffect, useMemo, useRef } from "react";
import { VoiceChnagerClient } from "@dannadori/voice-changer-client-js"
import { useMicrophoneOptions } from "./options_microphone";
const container = document.getElementById("app")!;
const root = createRoot(container);
const App = () => {
const { component: microphoneSettingComponent, options: microphonOptions } = useMicrophoneOptions()
const voiceChnagerClientRef = useRef<VoiceChnagerClient | null>(null)
console.log(microphonOptions)
const onClearSettingClicked = async () => {
//@ts-ignore
await chrome.storage.local.clear();
//@ts-ignore
await chrome.storage.sync.clear();
location.reload()
}
useEffect(() => {
if (microphonOptions.audioInputDeviceId.length == 0) {
return
}
const setAudio = async () => {
const ctx = new AudioContext()
if (voiceChnagerClientRef.current) {
}
voiceChnagerClientRef.current = new VoiceChnagerClient(ctx, true, {
notifySendBufferingTime: (val: number) => { console.log(`buf:${val}`) },
notifyResponseTime: (val: number) => { console.log(`res:${val}`) },
notifyException: (mes: string) => { console.log(`error:${mes}`) }
})
await voiceChnagerClientRef.current.isInitialized()
voiceChnagerClientRef.current.setServerUrl("https://192.168.0.3:18888/test", "sio")
voiceChnagerClientRef.current.setup(microphonOptions.audioInputDeviceId, 1024)
const audio = document.getElementById("audio-output") as HTMLAudioElement
audio.srcObject = voiceChnagerClientRef.current.stream
audio.play()
}
setAudio()
}, [microphonOptions.audioInputDeviceId])
const clearRow = useMemo(() => {
return (
<>
<div className="body-row split-3-3-4 left-padding-1 highlight">
<div className="body-item-title">Clear Setting</div>
<div className="body-item-text"></div>
<div className="body-button-container">
<div className="body-button" onClick={onClearSettingClicked}>clear</div>
</div>
</div>
</>
)
}, [])
return (
<div className="body">
<div className="body-row">
<div className="body-top-title">
Voice Changer Setting
</div>
</div>
{clearRow}
{microphoneSettingComponent}
<div>
<audio id="audio-output"></audio>
</div>
</div>
)
}
root.render(
<App></App>
);

View File

@ -0,0 +1,322 @@
import * as React from "react";
import { useEffect, useMemo, useState } from "react";
import { CHROME_EXTENSION } from "./const";
import { Speaker, VoiceChangerMode, DefaultSpeakders, SampleRate, BufferSize } from "@dannadori/voice-changer-client-js"
export type MicrophoneOptionsState = {
audioInputDeviceId: string,
mmvcServerUrl: string,
sampleRate: number,
bufferSize: number,
chunkSize: number,
speakers: Speaker[],
srcId: number,
dstId: number,
vfEnabled: boolean,
voiceChangerMode: VoiceChangerMode,
gpu: number,
crossFadeLowerValue: number,
crossFadeOffsetRate: number,
crossFadeEndRate: number,
}
const InitMicrophoneOptionsState = {
audioInputDeviceId: "",
mmvcServerUrl: "https://localhost:5543/test",
sampleRate: 48000,
bufferSize: 1024,
chunkSize: 24,
speakers: DefaultSpeakders,
srcId: 107,
dstId: 100,
vfEnabled: true,
voiceChangerMode: VoiceChangerMode.realtime,
gpu: 0,
crossFadeLowerValue: 0.1,
crossFadeOffsetRate: 0.3,
crossFadeEndRate: 0.6,
} as const
const reloadDevices = async () => {
try {
await navigator.mediaDevices.getUserMedia({ video: true, audio: true });
} catch (e) {
console.warn("Enumerate device error::", e)
}
const mediaDeviceInfos = await navigator.mediaDevices.enumerateDevices();
return mediaDeviceInfos.filter(x => { return x.kind == "audioinput" })
}
export type MicrophoneOptionsComponent = {
component: JSX.Element,
options: MicrophoneOptionsState
}
export const useMicrophoneOptions = (): MicrophoneOptionsComponent => {
// GUI Info
const [audioDeviceInfo, setAudioDeviceInfo] = useState<MediaDeviceInfo[]>([])
const [editSpeakerTargetId, setEditSpeakerTargetId] = useState<number>(0)
const [editSpeakerTargetName, setEditSpeakerTargetName] = useState<string>("")
const [options, setOptions] = useState<MicrophoneOptionsState>(InitMicrophoneOptionsState)
useEffect(() => {
const initialize = async () => {
const audioInfo = await reloadDevices()
setAudioDeviceInfo(audioInfo)
if (CHROME_EXTENSION) {
//@ts-ignore
const storedOptions = await chrome.storage.local.get("microphoneOptions")
if (storedOptions) {
setOptions(storedOptions)
}
}
}
initialize()
}, [])
useEffect(() => {
const storeOptions = async () => {
if (CHROME_EXTENSION) {
// @ts-ignore
await chrome.storage.local.set({ microphoneOptions: options })
}
}
storeOptions()
}, [options]) // loadより前に持ってくるとstorage内が初期化されるのでだめかも。要検証
const setAudioInputDeviceId = async (deviceId: string) => {
setOptions({ ...options, audioInputDeviceId: deviceId })
}
const onSetServerClicked = async () => {
const input = document.getElementById("mmvc-server-url") as HTMLInputElement
setOptions({ ...options, mmvcServerUrl: input.value })
}
const onSampleRateChanged = async (val: SampleRate) => {
setOptions({ ...options, sampleRate: val })
}
const onBufferSizeChanged = async (val: BufferSize) => {
setOptions({ ...options, bufferSize: val })
}
const onChunkSizeChanged = async (val: number) => {
setOptions({ ...options, chunkSize: val })
}
const onSrcIdChanged = async (val: number) => {
setOptions({ ...options, srcId: val })
}
const onDstIdChanged = async (val: number) => {
setOptions({ ...options, dstId: val })
}
const onSetSpeakerMappingClicked = async () => {
const targetId = editSpeakerTargetId
const targetName = editSpeakerTargetName
const targetSpeaker = options.speakers.find(x => { return x.id == targetId })
if (targetSpeaker) {
if (targetName.length == 0) { // Delete
const newSpeakers = options.speakers.filter(x => { return x.id != targetId })
options.speakers = newSpeakers
} else { // Update
targetSpeaker.name = targetName
}
} else {
if (targetName.length == 0) { // Noop
} else {// add
options.speakers.push({
id: targetId,
name: targetName
})
}
}
setOptions({ ...options })
}
const onVfEnabledChange = async (val: boolean) => {
setOptions({ ...options, vfEnabled: val })
}
const onVoiceChangeModeChanged = async (val: VoiceChangerMode) => {
setOptions({ ...options, voiceChangerMode: val })
}
const onGpuChanged = async (val: number) => {
setOptions({ ...options, gpu: val })
}
const onCrossFadeLowerValueChanged = async (val: number) => {
setOptions({ ...options, crossFadeLowerValue: val })
}
const onCrossFadeOffsetRateChanged = async (val: number) => {
setOptions({ ...options, crossFadeOffsetRate: val })
}
const onCrossFadeEndRateChanged = async (val: number) => {
setOptions({ ...options, crossFadeEndRate: val })
}
const settings = useMemo(() => {
return (
<>
<div className="body-row left-padding-1">
<div className="body-section-title">Virtual Microphone</div>
</div>
<div className="body-row split-3-7 left-padding-1 highlight">
<div className="body-item-title">Microphone</div>
<div className="body-select-container">
<select className="body-select" onChange={(e) => { setAudioInputDeviceId(e.target.value) }}>
{
audioDeviceInfo.map(x => {
return <option key={x.deviceId} value={x.deviceId}>{x.label}</option>
})
}
</select>
</div>
</div>
<div className="body-row split-3-3-4 left-padding-1">
<div className="body-item-title">MMVC Server</div>
<div className="body-input-container">
<input type="text" defaultValue={options.mmvcServerUrl} id="mmvc-server-url" />
</div>
<div className="body-button-container">
<div className="body-button" onClick={onSetServerClicked}>set</div>
</div>
</div>
<div className="body-row split-3-7 left-padding-1 highlight">
<div className="body-item-title">Sample Rate</div>
<div className="body-select-container">
<select className="body-select" value={options.sampleRate} onChange={(e) => { onSampleRateChanged(Number(e.target.value) as SampleRate) }}>
{
Object.values(SampleRate).map(x => {
return <option key={x} value={x}>{x}</option>
})
}
</select>
</div>
</div>
<div className="body-row split-3-7 left-padding-1 highlight">
<div className="body-item-title">Buffer Size</div>
<div className="body-select-container">
<select className="body-select" value={options.bufferSize} onChange={(e) => { onBufferSizeChanged(Number(e.target.value) as BufferSize) }}>
{
Object.values(BufferSize).map(x => {
return <option key={x} value={x}>{x}</option>
})
}
</select>
</div>
</div>
<div className="body-row split-3-7 left-padding-1 highlight">
<div className="body-item-title">Chunk Size</div>
<div className="body-input-container">
<input type="number" min={1} max={256} step={1} value={options.chunkSize} onChange={(e) => { onChunkSizeChanged(Number(e.target.value)) }} />
</div>
</div>
<div className="body-row split-3-7 left-padding-1 highlight">
<div className="body-item-title">Source Speaker Id</div>
<div className="body-select-container">
<select className="body-select" value={options.srcId} onChange={(e) => { onSrcIdChanged(Number(e.target.value)) }}>
{
options.speakers.map(x => {
return <option key={x.id} value={x.id}>{x.name}({x.id})</option>
})
}
</select>
</div>
</div>
<div className="body-row split-3-7 left-padding-1 highlight">
<div className="body-item-title">Destination Speaker Id</div>
<div className="body-select-container">
<select className="body-select" value={options.dstId} onChange={(e) => { onDstIdChanged(Number(e.target.value)) }}>
{
options.speakers.map(x => {
return <option key={x.id} value={x.id}>{x.name}({x.id})</option>
})
}
</select>
</div>
</div>
<div className="body-row split-3-1-2-4 left-padding-1 highlight">
<div className="body-item-title">Edit Speaker Mapping</div>
<div className="body-input-container">
<input type="number" min={1} max={256} step={1} value={editSpeakerTargetId} onChange={(e) => {
const id = Number(e.target.value)
setEditSpeakerTargetId(id)
setEditSpeakerTargetName(options.speakers.find(x => { return x.id == id })?.name || "")
}} />
</div>
<div className="body-input-container">
<input type="text" value={editSpeakerTargetName} onChange={(e) => { setEditSpeakerTargetName(e.target.value) }} />
</div>
<div className="body-button-container">
<div className="body-button" onClick={onSetSpeakerMappingClicked}>set</div>
</div>
</div>
<div className="body-row split-3-3-4 left-padding-1 highlight">
<div className="body-item-title">VF Enabled</div>
<div>
<input type="checkbox" checked={options.vfEnabled} onChange={(e) => onVfEnabledChange(e.target.checked)} />
</div>
<div className="body-button-container">
</div>
</div>
<div className="body-row split-3-7 left-padding-1 highlight">
<div className="body-item-title">Voice Change Mode</div>
<div className="body-select-container">
<select className="body-select" value={options.voiceChangerMode} onChange={(e) => { onVoiceChangeModeChanged(e.target.value as VoiceChangerMode) }}>
{
Object.values(VoiceChangerMode).map(x => {
return <option key={x} value={x}>{x}</option>
})
}
</select>
</div>
</div>
<div className="body-row split-3-7 left-padding-1 highlight">
<div className="body-item-title">GPU</div>
<div className="body-input-container">
<input type="number" min={-1} max={5} step={1} value={options.gpu} onChange={(e) => { onGpuChanged(Number(e.target.value)) }} />
</div>
</div>
<div className="body-row split-3-7 left-padding-1 highlight">
<div className="body-item-title">Cross Fade Lower Val</div>
<div className="body-input-container">
<input type="number" min={0} max={1} step={0.1} value={options.crossFadeLowerValue} onChange={(e) => { onCrossFadeLowerValueChanged(Number(e.target.value)) }} />
</div>
</div>
<div className="body-row split-3-7 left-padding-1 highlight">
<div className="body-item-title">Cross Fade Offset Rate</div>
<div className="body-input-container">
<input type="number" min={0} max={1} step={0.1} value={options.crossFadeOffsetRate} onChange={(e) => { onCrossFadeOffsetRateChanged(Number(e.target.value)) }} />
</div>
</div>
<div className="body-row split-3-7 left-padding-1 highlight">
<div className="body-item-title">Cross Fade End Rate</div>
<div className="body-input-container">
<input type="number" min={0} max={1} step={0.1} value={options.crossFadeEndRate} onChange={(e) => { onCrossFadeEndRateChanged(Number(e.target.value)) }} />
</div>
</div>
</>
)
}, [audioDeviceInfo, editSpeakerTargetId, editSpeakerTargetName, options])
return {
component: settings,
options: options
}
}

33
client/demo/tsconfig.json Normal file
View File

@ -0,0 +1,33 @@
{
"compilerOptions": {
"target": "ES2020",
"jsx": "react",
"lib": ["dom"],
/* */
"forceConsistentCasingInFileNames": true,
/* */
"strict": true,
"noImplicitAny": true,
"strictNullChecks": true,
"noUnusedLocals": true,
"noUnusedParameters": true,
"noImplicitReturns": true,
/* Module */
"moduleResolution": "node",
"esModuleInterop": true,
"isolatedModules": true,
"allowSyntheticDefaultImports": true,
/* */
// "noEmit": true,
/* For avoid WebGL2 error */
/* https://stackoverflow.com/questions/52846622/error-ts2430-interface-webglrenderingcontext-incorrectly-extends-interface-w */
"skipLibCheck": true
},
/* tsc */
"include": ["src/**/*.ts", "src/**/*.tsx"],
"exclude": ["node_modules"]
}

View File

@ -0,0 +1,45 @@
const path = require("path");
const HtmlWebpackPlugin = require("html-webpack-plugin");
const CopyPlugin = require("copy-webpack-plugin");
module.exports = {
mode: "production",
entry: "./src/index.tsx",
resolve: {
extensions: [".ts", ".tsx", ".js"],
},
module: {
rules: [
{
test: [/\.ts$/, /\.tsx$/],
use: [
{
loader: "babel-loader",
options: {
presets: ["@babel/preset-env", "@babel/preset-react", "@babel/preset-typescript"],
plugins: ["@babel/plugin-transform-runtime"],
},
},
],
},
{
test: /\.html$/,
loader: "html-loader",
},
{
test: /\.css$/,
use: ["style-loader", { loader: "css-loader", options: { importLoaders: 1 } }, "postcss-loader"],
},
],
},
output: {
filename: "index.js",
path: path.resolve(__dirname, "dist"),
},
plugins: [
new HtmlWebpackPlugin({
template: path.resolve(__dirname, "public/index.html"),
filename: "./index.html",
}),
]
};

View File

@ -0,0 +1,20 @@
const path = require("path");
const { merge } = require('webpack-merge');
const common = require('./webpack.common.js')
module.exports = merge(common, {
mode: 'development',
devServer: {
static: {
directory: path.join(__dirname, "public"),
},
client: {
overlay: {
errors: false,
warnings: false,
},
},
host: "0.0.0.0",
https: true,
},
})

View File

@ -0,0 +1,6 @@
const { merge } = require('webpack-merge');
const common = require('./webpack.common.js')
module.exports = merge(common, {
mode: 'production',
})

12951
client/lib/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

50
client/lib/package.json Normal file
View File

@ -0,0 +1,50 @@
{
"name": "@dannadori/voice-changer-client-js",
"version": "1.0.0",
"description": "",
"main": "dist/index.js",
"directories": {
"lib": "lib"
},
"scripts": {
"clean:worklet": "rimraf worklet/dist/*",
"webpack:worklet:dev": "webpack --config webpack.worklet.dev.js",
"webpack:worklet:prod": "webpack --config webpack.worklet.prod.js",
"build:worklet:dev": "npm-run-all clean:worklet webpack:worklet:dev",
"build:worklet:prod": "npm-run-all clean:worklet webpack:worklet:prod",
"clean": "rimraf dist/*",
"webpack:dev": "webpack --config webpack.dev.js",
"webpack:prod": "webpack --config webpack.prod.js",
"build:dev": "npm-run-all build:worklet:dev clean webpack:dev",
"build:prod": "npm-run-all build:worklet:prod clean webpack:prod",
"test": "echo \"Error: no test specified\" && exit 1"
},
"keywords": [],
"author": "",
"license": "ISC",
"devDependencies": {
"@types/audioworklet": "^0.0.36",
"@types/node": "^18.11.17",
"eslint": "^8.30.0",
"eslint-config-prettier": "^8.5.0",
"eslint-plugin-prettier": "^4.2.1",
"eslint-plugin-react": "^7.31.11",
"eslint-webpack-plugin": "^3.2.0",
"npm-run-all": "^4.1.5",
"prettier": "^2.8.1",
"raw-loader": "^4.0.2",
"rimraf": "^3.0.2",
"ts-loader": "^9.4.2",
"typescript": "^4.9.4",
"webpack": "^5.75.0",
"webpack-cli": "^5.0.1",
"webpack-dev-server": "^4.11.1"
},
"dependencies": {
"@types/readable-stream": "^2.3.15",
"amazon-chime-sdk-js": "^3.9.0",
"microphone-stream": "^6.0.1",
"readable-stream": "^4.3.0",
"socket.io-client": "^4.5.4"
}
}

View File

@ -0,0 +1,277 @@
import { io, Socket } from "socket.io-client";
import { DefaultEventsMap } from "@socket.io/component-emitter";
import { Duplex, DuplexOptions } from "readable-stream";
import { DefaultVoiceChangerRequestParamas, MajarModeTypes, VoiceChangerMode, VoiceChangerRequestParamas } from "./const";
export type Callbacks = {
onVoiceReceived: (voiceChangerMode: VoiceChangerMode, data: ArrayBuffer) => void
}
export type AudioStreamerListeners = {
notifySendBufferingTime: (time: number) => void
notifyResponseTime: (time: number) => void
notifyException: (message: string) => void
}
export class AudioStreamer extends Duplex {
private callbacks: Callbacks
private audioStreamerListeners: AudioStreamerListeners
private majarMode: MajarModeTypes
private serverUrl = ""
private socket: Socket<DefaultEventsMap, DefaultEventsMap> | null = null
private voiceChangerMode: VoiceChangerMode = "realtime"
private requestParamas: VoiceChangerRequestParamas = DefaultVoiceChangerRequestParamas
private chunkNum = 8
private requestChunks: ArrayBuffer[] = []
private recordChunks: ArrayBuffer[] = []
private isRecording = false
// performance monitor
private bufferStart = 0;
constructor(majarMode: MajarModeTypes, callbacks: Callbacks, audioStreamerListeners: AudioStreamerListeners, options?: DuplexOptions) {
super(options);
this.majarMode = majarMode
this.callbacks = callbacks
this.audioStreamerListeners = audioStreamerListeners
}
private createSocketIO = () => {
if (this.socket) {
this.socket.close()
}
if (this.majarMode === "sio") {
this.socket = io(this.serverUrl);
this.socket.on('connect', () => console.log(`[SIO] sonnect to ${this.serverUrl}`));
this.socket.on('response', (response: any[]) => {
const cur = Date.now()
const responseTime = cur - response[0]
const result = response[1] as ArrayBuffer
if (result.byteLength < 128 * 2) {
this.audioStreamerListeners.notifyException(`[SIO] recevied data is too short ${result.byteLength}`)
} else {
this.audioStreamerListeners.notifyException(``)
this.callbacks.onVoiceReceived(this.voiceChangerMode, response[1])
this.audioStreamerListeners.notifyResponseTime(responseTime)
}
});
}
}
// Option Change
setServerUrl = (serverUrl: string, mode: MajarModeTypes) => {
this.serverUrl = serverUrl
this.majarMode = mode
window.open(serverUrl, '_blank')
console.log(`[AudioStreamer] Server Setting:${this.serverUrl} ${this.majarMode}`)
this.createSocketIO()// mode check is done in the method.
}
setRequestParams = (val: VoiceChangerRequestParamas) => {
this.requestParamas = val
}
setChunkNum = (num: number) => {
this.chunkNum = num
}
setVoiceChangerMode = (val: VoiceChangerMode) => {
this.voiceChangerMode = val
}
// Main Process
//// Pipe from mic stream
_write = (chunk: AudioBuffer, _encoding: any, callback: any) => {
const buffer = chunk.getChannelData(0);
// console.log("SAMPLERATE:", chunk.sampleRate, chunk.numberOfChannels, chunk.length, buffer)
if (this.voiceChangerMode === "realtime") {
this._write_realtime(buffer)
} else {
this._write_record(buffer)
}
callback();
}
private _write_realtime = (buffer: Float32Array) => {
// bufferSize個のデータ48Khzが入ってくる。
//// 48000Hz で入ってくるので間引いて24000Hzに変換する。
//// バイトサイズは周波数変換で(x1/2), 16bit(2byte)で(x2)
const arrayBuffer = new ArrayBuffer((buffer.length / 2) * 2)
const dataView = new DataView(arrayBuffer);
for (let i = 0; i < buffer.length; i++) {
if (i % 2 == 0) {
let s = Math.max(-1, Math.min(1, buffer[i]));
s = s < 0 ? s * 0x8000 : s * 0x7FFF
// 2分の1個目で2バイトずつ進むので((i/2)*2)
dataView.setInt16((i / 2) * 2, s, true);
}
}
// 256byte(最低バッファサイズ256から間引いた個数x2byte)をchunkとして管理
const chunkByteSize = 256 // (const.ts ★1)
for (let i = 0; i < arrayBuffer.byteLength / chunkByteSize; i++) {
const ab = arrayBuffer.slice(i * chunkByteSize, (i + 1) * chunkByteSize)
this.requestChunks.push(ab)
}
//// リクエストバッファの中身が、リクエスト送信数と違う場合は処理終了。
if (this.requestChunks.length < this.chunkNum) {
return
}
// リクエスト用の入れ物を作成
const windowByteLength = this.requestChunks.reduce((prev, cur) => {
return prev + cur.byteLength
}, 0)
const newBuffer = new Uint8Array(windowByteLength);
// リクエストのデータをセット
this.requestChunks.reduce((prev, cur) => {
newBuffer.set(new Uint8Array(cur), prev)
return prev + cur.byteLength
}, 0)
console.log("send buff length", newBuffer.length)
this.sendBuffer(newBuffer)
this.requestChunks = []
this.audioStreamerListeners.notifySendBufferingTime(Date.now() - this.bufferStart)
this.bufferStart = Date.now()
}
private _write_record = (buffer: Float32Array) => {
if (!this.isRecording) { return }
// buffer(for48Khz)x16bit * chunksize / 2(for24Khz)
const sendBuffer = new ArrayBuffer(buffer.length * 2 / 2);
const sendDataView = new DataView(sendBuffer);
for (var i = 0; i < buffer.length; i++) {
if (i % 2 == 0) {
let s = Math.max(-1, Math.min(1, buffer[i]));
s = s < 0 ? s * 0x8000 : s * 0x7FFF
sendDataView.setInt16(i, s, true);
// if (i % 3000 === 0) {
// console.log("buffer_converting", s, buffer[i])
// }
}
}
this.recordChunks.push(sendBuffer)
}
// Near Realtime用のトリガ
sendRecordedData = () => {
const length = this.recordChunks.reduce((prev, cur) => {
return prev + cur.byteLength
}, 0)
const newBuffer = new Uint8Array(length);
this.recordChunks.reduce((prev, cur) => {
newBuffer.set(new Uint8Array(cur), prev)
return prev + cur.byteLength
}, 0)
this.sendBuffer(newBuffer)
}
startRecord = () => {
this.recordChunks = []
this.isRecording = true
}
stopRecord = () => {
this.isRecording = false
}
private sendBuffer = async (newBuffer: Uint8Array) => {
if (this.serverUrl.length == 0) {
console.error("no server url")
throw "no server url"
}
const timestamp = Date.now()
// console.log("REQUEST_MESSAGE:", [this.gpu, this.srcId, this.dstId, timestamp, newBuffer.buffer])
console.log("SERVER_URL", this.serverUrl, this.majarMode)
const convertChunkNum = this.voiceChangerMode === "realtime" ? this.requestParamas.convertChunkNum : 0
if (this.majarMode === "sio") {
if (!this.socket) {
console.warn(`sio is not initialized`)
return
}
console.log("emit!")
this.socket.emit('request_message', [
this.requestParamas.gpu,
this.requestParamas.srcId,
this.requestParamas.dstId,
timestamp,
convertChunkNum,
this.requestParamas.crossFadeLowerValue,
this.requestParamas.crossFadeOffsetRate,
this.requestParamas.crossFadeEndRate,
newBuffer.buffer]);
} else {
const res = await postVoice(
this.serverUrl,
this.requestParamas.gpu,
this.requestParamas.srcId,
this.requestParamas.dstId,
timestamp,
convertChunkNum,
this.requestParamas.crossFadeLowerValue,
this.requestParamas.crossFadeOffsetRate,
this.requestParamas.crossFadeEndRate,
newBuffer.buffer)
if (res.byteLength < 128 * 2) {
this.audioStreamerListeners.notifyException(`[REST] recevied data is too short ${res.byteLength}`)
} else {
this.audioStreamerListeners.notifyException(``)
this.callbacks.onVoiceReceived(this.voiceChangerMode, res)
this.audioStreamerListeners.notifyResponseTime(Date.now() - timestamp)
}
}
}
}
export const postVoice = async (
url: string,
gpu: number,
srcId: number,
dstId: number,
timestamp: number,
convertSize: number,
crossFadeLowerValue: number,
crossFadeOffsetRate: number,
crossFadeEndRate: number,
buffer: ArrayBuffer) => {
const obj = {
gpu,
srcId,
dstId,
timestamp,
convertSize,
crossFadeLowerValue,
crossFadeOffsetRate,
crossFadeEndRate,
buffer: Buffer.from(buffer).toString('base64')
};
const body = JSON.stringify(obj);
const res = await fetch(`${url}`, {
method: "POST",
headers: {
'Accept': 'application/json',
'Content-Type': 'application/json'
},
body: body
})
const receivedJson = await res.json()
const changedVoiceBase64 = receivedJson["changedVoiceBase64"]
const buf = Buffer.from(changedVoiceBase64, "base64")
const ab = new ArrayBuffer(buf.length);
// console.log("RECIV", buf.length)
const view = new Uint8Array(ab);
for (let i = 0; i < buf.length; ++i) {
view[i] = buf[i];
}
return ab
}

View File

@ -0,0 +1,159 @@
import { VoiceChangerWorkletNode } from "./VoiceChangerWorkletNode";
// @ts-ignore
import workerjs from "raw-loader!../worklet/dist/index.js";
import { VoiceFocusDeviceTransformer, VoiceFocusTransformDevice } from "amazon-chime-sdk-js";
import { createDummyMediaStream } from "./util";
import { BufferSize, MajarModeTypes, VoiceChangerMode, VoiceChangerRequestParamas } from "./const";
import MicrophoneStream from "microphone-stream";
import { AudioStreamer, Callbacks, AudioStreamerListeners } from "./AudioStreamer";
// オーディオデータの流れ
// input node(mic or MediaStream) -> [vf node] -> microphne stream -> audio streamer ->
// sio/rest server -> audio streamer-> vc node -> output node
export class VoiceChnagerClient {
private ctx: AudioContext
private vfEnable = false
private vf: VoiceFocusDeviceTransformer | null = null
private currentDevice: VoiceFocusTransformDevice | null = null
private currentMediaStream: MediaStream | null = null
private currentMediaStreamAudioSourceNode: MediaStreamAudioSourceNode | null = null
private outputNodeFromVF: MediaStreamAudioDestinationNode | null = null
private micStream: MicrophoneStream | null = null
private audioStreamer!: AudioStreamer
private vcNode!: VoiceChangerWorkletNode
private currentMediaStreamAudioDestinationNode!: MediaStreamAudioDestinationNode
private promiseForInitialize: Promise<void>
private callbacks: Callbacks = {
onVoiceReceived: (voiceChangerMode: VoiceChangerMode, data: ArrayBuffer): void => {
console.log(voiceChangerMode, data)
if (voiceChangerMode === "realtime") {
this.vcNode.postReceivedVoice(data)
return
}
// For Near Realtime Mode
console.log("near realtime mode")
const i16Data = new Int16Array(data)
const f32Data = new Float32Array(i16Data.length)
// https://stackoverflow.com/questions/35234551/javascript-converting-from-int16-to-float32
i16Data.forEach((x, i) => {
const float = (x >= 0x8000) ? -(0x10000 - x) / 0x8000 : x / 0x7FFF;
f32Data[i] = float
})
const source = this.ctx.createBufferSource();
const buffer = this.ctx.createBuffer(1, f32Data.length, 24000);
buffer.getChannelData(0).set(f32Data);
source.buffer = buffer;
source.start();
source.connect(this.currentMediaStreamAudioDestinationNode)
}
}
constructor(ctx: AudioContext, vfEnable: boolean, audioStreamerListeners: AudioStreamerListeners) {
this.ctx = ctx
this.vfEnable = vfEnable
this.promiseForInitialize = new Promise<void>(async (resolve) => {
const scriptUrl = URL.createObjectURL(new Blob([workerjs], { type: "text/javascript" }));
await this.ctx.audioWorklet.addModule(scriptUrl)
this.vcNode = new VoiceChangerWorkletNode(this.ctx); // vc node
this.currentMediaStreamAudioDestinationNode = this.ctx.createMediaStreamDestination() // output node
this.vcNode.connect(this.currentMediaStreamAudioDestinationNode) // vc node -> output node
// (vc nodeにはaudio streamerのcallbackでデータが投げ込まれる)
this.audioStreamer = new AudioStreamer("sio", this.callbacks, audioStreamerListeners, { objectMode: true, })
if (this.vfEnable) {
this.vf = await VoiceFocusDeviceTransformer.create({ variant: 'c20' })
const dummyMediaStream = createDummyMediaStream(this.ctx)
this.currentDevice = (await this.vf.createTransformDevice(dummyMediaStream)) || null;
this.outputNodeFromVF = this.ctx.createMediaStreamDestination();
}
resolve()
})
}
isInitialized = async () => {
if (this.promiseForInitialize) {
await this.promiseForInitialize
}
return true
}
// forceVfDisable is for the condition that vf is enabled in constructor.
setup = async (input: string | MediaStream, bufferSize: BufferSize, forceVfDisable: boolean = false) => {
// condition check
if (!this.vcNode) {
console.warn("vc node is not initialized.")
throw "vc node is not initialized."
}
// Main Process
//// shutdown & re-generate mediastream
if (this.currentMediaStream) {
this.currentMediaStream.getTracks().forEach(x => { x.stop() })
this.currentMediaStream = null
}
if (typeof input == "string") {
this.currentMediaStream = await navigator.mediaDevices.getUserMedia({
audio: { deviceId: input }
})
} else {
this.currentMediaStream = input
}
// create mic stream
this.micStream = new MicrophoneStream({
objectMode: true,
bufferSize: bufferSize,
context: this.ctx
})
// connect nodes.
if (this.currentDevice && forceVfDisable == false) {
this.currentMediaStreamAudioSourceNode = this.ctx.createMediaStreamSource(this.currentMediaStream) // input node
this.currentDevice.chooseNewInnerDevice(this.currentMediaStream)
const voiceFocusNode = await this.currentDevice.createAudioNode(this.ctx); // vf node
this.currentMediaStreamAudioSourceNode.connect(voiceFocusNode.start) // input node -> vf node
voiceFocusNode.end.connect(this.outputNodeFromVF!)
this.micStream.setStream(this.outputNodeFromVF!.stream) // vf node -> mic stream
} else {
this.micStream.setStream(this.currentMediaStream) // input device -> mic stream
}
this.micStream.pipe(this.audioStreamer!) // mic stream -> audio streamer
}
get stream(): MediaStream {
return this.currentMediaStreamAudioDestinationNode.stream
}
// Audio Streamer Settingg
setServerUrl = (serverUrl: string, mode: MajarModeTypes) => {
this.audioStreamer.setServerUrl(serverUrl, mode)
}
setRequestParams = (val: VoiceChangerRequestParamas) => {
this.audioStreamer.setRequestParams(val)
}
setChunkNum = (num: number) => {
this.audioStreamer.setChunkNum(num)
}
setVoiceChangerMode = (val: VoiceChangerMode) => {
this.audioStreamer.setVoiceChangerMode(val)
}
}

View File

@ -0,0 +1,17 @@
export class VoiceChangerWorkletNode extends AudioWorkletNode {
constructor(context: AudioContext) {
super(context, "voice-changer-worklet-processor");
this.port.onmessage = this.handleMessage.bind(this);
console.log(`[worklet_node][voice-changer-worklet-processor] created.`);
}
postReceivedVoice = (data: ArrayBuffer) => {
this.port.postMessage({
data: data,
}, [data]);
}
handleMessage(event: any) {
console.log(`[Node:handleMessage_] `, event.data.volume);
}
}

99
client/lib/src/const.ts Normal file
View File

@ -0,0 +1,99 @@
// (★1) chunk sizeは 128サンプル, 256byte(int16)と定義。
// (★2) 256byte(最低バッファサイズ256から間引いた個数x2byte)をchunkとして管理。
// types
export type VoiceChangerRequestParamas = {
convertChunkNum: number,
srcId: number,
dstId: number,
gpu: number,
crossFadeLowerValue: number,
crossFadeOffsetRate: number,
crossFadeEndRate: number,
}
export type VoiceChangerRequest = VoiceChangerRequestParamas & {
data: ArrayBuffer,
timestamp: number
}
export type VoiceChangerOptions = {
audioInputDeviceId: string | null,
mediaStream: MediaStream | null,
mmvcServerUrl: string,
sampleRate: SampleRate, // 48000Hz
bufferSize: BufferSize, // 256, 512, 1024, 2048, 4096, 8192, 16384 (for mic stream)
chunkNum: number, // n of (256 x n) for send buffer
speakers: Speaker[],
forceVfDisable: boolean,
voiceChangerMode: VoiceChangerMode,
}
export type Speaker = {
"id": number,
"name": string,
}
// Consts
export const MajarModeTypes = {
"sio": "sio",
"rest": "rest",
} as const
export type MajarModeTypes = typeof MajarModeTypes[keyof typeof MajarModeTypes]
export const VoiceChangerMode = {
"realtime": "realtime",
"near-realtime": "near-realtime",
} as const
export type VoiceChangerMode = typeof VoiceChangerMode[keyof typeof VoiceChangerMode]
export const SampleRate = {
"48000": 48000,
} as const
export type SampleRate = typeof SampleRate[keyof typeof SampleRate]
export const BufferSize = {
"1024": 1024,
} as const
export type BufferSize = typeof BufferSize[keyof typeof BufferSize]
// Defaults
export const DefaultVoiceChangerRequestParamas: VoiceChangerRequestParamas = {
convertChunkNum: 12, //(★1)
srcId: 107,
dstId: 100,
gpu: 0,
crossFadeLowerValue: 0.1,
crossFadeOffsetRate: 0.3,
crossFadeEndRate: 0.6
}
export const DefaultSpeakders: Speaker[] = [
{
"id": 100,
"name": "ずんだもん"
},
{
"id": 107,
"name": "user"
},
{
"id": 101,
"name": "そら"
},
{
"id": 102,
"name": "めたん"
},
{
"id": 103,
"name": "つむぎ"
}
]

2
client/lib/src/index.ts Normal file
View File

@ -0,0 +1,2 @@
export * from "./const"
export * from "./VoiceChangerClient"

12
client/lib/src/util.ts Normal file
View File

@ -0,0 +1,12 @@
export const createDummyMediaStream = (audioContext: AudioContext) => {
const dummyOutputNode = audioContext.createMediaStreamDestination();
const gainNode = audioContext.createGain();
gainNode.gain.value = 0.0;
gainNode.connect(dummyOutputNode);
const oscillatorNode = audioContext.createOscillator();
oscillatorNode.frequency.value = 440;
oscillatorNode.connect(gainNode);
oscillatorNode.start();
return dummyOutputNode.stream;
};

34
client/lib/tsconfig.json Normal file
View File

@ -0,0 +1,34 @@
{
"compilerOptions": {
"target": "ES2020",
"declaration": true,
"outDir": "./dist",
/* */
"forceConsistentCasingInFileNames": true,
/* */
"strict": true,
"noImplicitAny": true,
"strictNullChecks": true,
"noUnusedLocals": true,
"noUnusedParameters": true,
"noImplicitReturns": true,
/* Module */
"moduleResolution": "node",
"esModuleInterop": true,
// "isolatedModules": true,
"allowSyntheticDefaultImports": true,
// /* */
// "noEmit": true,
/* For avoid WebGL2 error */
/* https://stackoverflow.com/questions/52846622/error-ts2430-interface-webglrenderingcontext-incorrectly-extends-interface-w */
"skipLibCheck": true
},
/* tsc */
"include": ["src/*.ts"],
"exclude": ["node_modules"]
}

View File

@ -0,0 +1,31 @@
{
"compilerOptions": {
"target": "ES2020",
"lib":["ES2020"],
/* */
"forceConsistentCasingInFileNames": true,
/* */
"strict": true,
"noImplicitAny": true,
"strictNullChecks": true,
"noUnusedLocals": true,
"noUnusedParameters": true,
"noImplicitReturns": true,
/* Module */
"moduleResolution": "node",
"esModuleInterop": true,
// "isolatedModules": true,
"allowSyntheticDefaultImports": true,
// /* */
// "noEmit": true,
/* For avoid WebGL2 error */
/* https://stackoverflow.com/questions/52846622/error-ts2430-interface-webglrenderingcontext-incorrectly-extends-interface-w */
"skipLibCheck": true
},
/* tsc */
"include": ["worklet/src/*.ts"],
"exclude": ["node_modules"]
}

View File

@ -0,0 +1,34 @@
const path = require("path");
const webpack = require("webpack");
module.exports = {
entry: "./src/index.ts",
resolve: {
extensions: [".ts", ".js"],
},
module: {
rules: [
{
test: [/\.ts$/, /\.tsx$/],
use: [
{
loader: "ts-loader",
options: {
configFile: "tsconfig.json",
},
},
],
},
],
},
output: {
filename: "index.js",
path: path.resolve(__dirname, "dist"),
libraryTarget: "umd",
globalObject: "typeof self !== 'undefined' ? self : this",
},
plugins: [
new webpack.ProvidePlugin({
process: "process/browser",
}),
],
};

View File

@ -0,0 +1,6 @@
const { merge } = require('webpack-merge');
const common = require('./webpack.common.js')
module.exports = merge(common, {
mode: 'development',
})

View File

@ -0,0 +1,6 @@
const { merge } = require('webpack-merge');
const common = require('./webpack.common.js')
module.exports = merge(common, {
mode: 'production',
})

View File

@ -0,0 +1,31 @@
/* eslint @typescript-eslint/no-var-requires: "off" */
const path = require("path");
module.exports = {
// mode: "development",
mode: "production",
entry: path.resolve(__dirname, "worklet/src/voice-changer-worklet-processor.ts"),
output: {
path: path.resolve(__dirname, "worklet/dist"),
filename: "index.js",
},
resolve: {
modules: [path.resolve(__dirname, "node_modules")],
extensions: [".ts", ".js"],
},
module: {
rules: [
{
test: [/\.ts$/, /\.tsx$/],
use: [
{
loader: "ts-loader",
options: {
configFile: "tsconfig.worklet.json",
},
},
],
},
],
}
};

View File

@ -0,0 +1,7 @@
const { merge } = require('webpack-merge');
const common = require('./webpack.worklet.common.js')
const worklet = merge(common, {
mode: 'development',
})
module.exports = [worklet];

View File

@ -0,0 +1,8 @@
const { merge } = require('webpack-merge');
const common = require('./webpack.worklet.common.js')
const worklet = merge(common, {
mode: 'production',
})
module.exports = [worklet];

View File

@ -0,0 +1,82 @@
class VoiceChangerWorkletProcessor extends AudioWorkletProcessor {
private BLOCK_SIZE = 128
private initialized = false;
private volume = 0
playBuffer: Float32Array[] = []
/**
* @constructor
*/
constructor() {
super();
this.initialized = true;
this.port.onmessage = this.handleMessage.bind(this);
}
handleMessage(event: any) {
// noop
const arrayBuffer = event.data.data as ArrayBuffer
// データは(int16)で受信
const i16Data = new Int16Array(arrayBuffer)
const f32Data = new Float32Array(i16Data.length)
console.log(`[worklet] f32DataLength${f32Data.length} i16DataLength${i16Data.length}`)
i16Data.forEach((x, i) => {
const float = (x >= 0x8000) ? -(0x10000 - x) / 0x8000 : x / 0x7FFF;
f32Data[i] = float
})
if (this.playBuffer.length > 50) {
console.log("[worklet] Buffer truncated")
while (this.playBuffer.length > 2) {
this.playBuffer.shift()
}
}
// アップサンプリングしてPlayバッファに蓄積
let f32Block: Float32Array
for (let i = 0; i < f32Data.length; i++) {
const frameIndexInBlock = (i * 2) % this.BLOCK_SIZE //
if (frameIndexInBlock === 0) {
f32Block = new Float32Array(this.BLOCK_SIZE)
}
const currentFrame = f32Data[i]
const nextFrame = i + 1 < f32Data.length ? f32Data[i + 1] : f32Data[i]
f32Block![frameIndexInBlock] = currentFrame
f32Block![frameIndexInBlock + 1] = (currentFrame + nextFrame) / 2
if (f32Block!.length === frameIndexInBlock + 2) {
this.playBuffer.push(f32Block!)
}
}
}
process(_inputs: Float32Array[][], outputs: Float32Array[][], _parameters: Record<string, Float32Array>) {
if (!this.initialized) {
console.warn("[worklet] worklet_process not ready");
return true;
}
if (this.playBuffer.length === 0) {
console.log("[worklet] no play buffer")
return true
}
const data = this.playBuffer.shift()!
const sum = data.reduce((prev, cur) => {
return prev + cur * cur
}, 0)
const rms = Math.sqrt(sum / data.length)
this.volume = Math.max(rms, this.volume * 0.95)
this.port.postMessage({ volume: this.volume });
outputs[0][0].set(data)
return true;
}
}
registerProcessor("voice-changer-worklet-processor", VoiceChangerWorkletProcessor);

View File

@ -177,7 +177,7 @@ if __name__ == '__main__':
reload=True, reload=True,
ssl_keyfile=key_path, ssl_keyfile=key_path,
ssl_certfile=cert_path, ssl_certfile=cert_path,
log_level="warning" # log_level="warning"
) )
else: else:
# HTTP サーバ起動 # HTTP サーバ起動

View File

@ -25,14 +25,20 @@ class MMVC_Namespace(socketio.AsyncNamespace):
srcId = int(msg[1]) srcId = int(msg[1])
dstId = int(msg[2]) dstId = int(msg[2])
timestamp = int(msg[3]) timestamp = int(msg[3])
prefixChunkSize = int(msg[4]) convertChunkNum = int(msg[4])
data = msg[5] crossFadeLowerValue = msg[5]
# print(srcId, dstId, timestamp) crossFadeOffsetRate = msg[6]
crossFadeEndRate = msg[7]
data = msg[8]
# print(srcId, dstId, timestamp, convertChunkNum, crossFadeLowerValue, crossFadeOffsetRate, crossFadeEndRate)
unpackedData = np.array(struct.unpack( unpackedData = np.array(struct.unpack(
'<%sh' % (len(data) // struct.calcsize('<h')), data)) '<%sh' % (len(data) // struct.calcsize('<h')), data))
# audio1 = self.voiceChangerManager.changeVoice(
# gpu, srcId, dstId, timestamp, prefixChunkSize, unpackedData)
audio1 = self.voiceChangerManager.changeVoice( audio1 = self.voiceChangerManager.changeVoice(
gpu, srcId, dstId, timestamp, prefixChunkSize, unpackedData) gpu, srcId, dstId, timestamp, convertChunkNum, crossFadeLowerValue, crossFadeOffsetRate, crossFadeEndRate, unpackedData)
print("sio result:", len(audio1), audio1.shape)
bin = struct.pack('<%sh' % len(audio1), *audio1) bin = struct.pack('<%sh' % len(audio1), *audio1)
await self.emit('response', [timestamp, bin]) await self.emit('response', [timestamp, bin])

View File

@ -1,5 +1,5 @@
import torch import torch
import math
from scipy.io.wavfile import write, read from scipy.io.wavfile import write, read
import numpy as np import numpy as np
import traceback import traceback
@ -37,13 +37,131 @@ class VoiceChanger():
self.mps_enabled = getattr( self.mps_enabled = getattr(
torch.backends, "mps", None) is not None and torch.backends.mps.is_available() torch.backends, "mps", None) is not None and torch.backends.mps.is_available()
print( print(f"VoiceChanger Initialized (GPU_NUM:{self.gpu_num}, mps_enabled:{self.mps_enabled})")
f"VoiceChanger Initialized (GPU_NUM:{self.gpu_num}, mps_enabled:{self.mps_enabled})")
self.crossFadeOffsetRate = 0
self.crossFadeEndRate = 0
self.unpackedData_length = 0
def destroy(self): def destroy(self):
del self.net_g del self.net_g
def on_request(self, gpu, srcId, dstId, timestamp, prefixChunkSize, wav):
def on_request(self, gpu, srcId, dstId, timestamp, convertChunkNum, crossFadeLowerValue, crossFadeOffsetRate, crossFadeEndRate, unpackedData):
# convertSize = unpackedData.shape[0] + (convertChunkNum * 128) # 128sample/1chunk
convertSize = convertChunkNum * 128 # 128sample/1chunk
# print("on_request", unpackedData.shape[0], convertChunkNum* 128 )
if unpackedData.shape[0] * 2 > convertSize:
# print(f"Convert sample_num = {128 * convertChunkNum} (128 * {convertChunkNum}) is less than input sample_num x2 ({unpackedData.shape[0]}) x2. Chage to {unpackedData.shape[0] * 2} samples")
convertSize = unpackedData.shape[0] * 2
if self.crossFadeOffsetRate != crossFadeOffsetRate or self.crossFadeEndRate != crossFadeEndRate or self.unpackedData_length != unpackedData.shape[0]:
self.crossFadeOffsetRate = crossFadeOffsetRate
self.crossFadeEndRate = crossFadeEndRate
self.unpackedData_length = unpackedData.shape[0]
cf_offset = int(unpackedData.shape[0] * crossFadeOffsetRate)
cf_end = int(unpackedData.shape[0] * crossFadeEndRate)
cf_range = cf_end - cf_offset
percent = np.arange(cf_range) / cf_range
np_prev_strength = np.cos(percent * 0.5 * np.pi) ** 2
np_cur_strength = np.cos((1-percent) * 0.5 * np.pi) ** 2
np_prev_strength = np.concatenate([np.ones(cf_offset), np_prev_strength, np.zeros(unpackedData.shape[0]-cf_offset-len(np_prev_strength))])
np_cur_strength = np.concatenate([np.zeros(cf_offset), np_cur_strength, np.ones(unpackedData.shape[0]-cf_offset-len(np_cur_strength))])
self.prev_strength = torch.FloatTensor(np_prev_strength)
self.cur_strength = torch.FloatTensor(np_cur_strength)
torch.set_printoptions(edgeitems=2100)
print("Generated Strengths")
print(f"cross fade: start:{cf_offset} end:{cf_end} range:{cf_range}")
print(f"target_len:{unpackedData.shape[0]}, prev_len:{len(self.prev_strength)} cur_len:{len(self.cur_strength)}")
print("Prev", self.prev_strength)
print("Cur", self.cur_strength)
# ひとつ前の結果とサイズが変わるため、記録は消去する。
delattr(self,"prev_audio1")
try:
# 今回変換するデータをテンソルとして整形する
audio = torch.FloatTensor(unpackedData.astype(np.float32)) # float32でtensorfを作成
audio_norm = audio / self.hps.data.max_wav_value # normalize
audio_norm = audio_norm.unsqueeze(0) # unsqueeze
self.audio_buffer = torch.cat([self.audio_buffer, audio_norm], axis=1) # 過去のデータに連結
audio_norm = self.audio_buffer[:, -convertSize:] # 変換対象の部分だけ抽出
self.audio_buffer = audio_norm
spec = spectrogram_torch(audio_norm, self.hps.data.filter_length,
self.hps.data.sampling_rate, self.hps.data.hop_length, self.hps.data.win_length,
center=False)
spec = torch.squeeze(spec, 0)
sid = torch.LongTensor([int(srcId)])
data = (self.text_norm, spec, audio_norm, sid)
data = TextAudioSpeakerCollate()([data])
# if gpu < 0 or (self.gpu_num == 0 and not self.mps_enabled):
if gpu < 0 or self.gpu_num == 0:
with torch.no_grad():
x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [
x.cpu() for x in data]
sid_tgt1 = torch.LongTensor([dstId]).cpu()
audio1 = (self.net_g.cpu().voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt1)[
0][0, 0].data * self.hps.data.max_wav_value).cpu().float().numpy()
# elif self.mps_enabled == True: # MPS doesnt support aten::weight_norm_interface, and PYTORCH_ENABLE_MPS_FALLBACK=1 cause a big dely.
# x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [
# x.to("mps") for x in data]
# sid_tgt1 = torch.LongTensor([dstId]).to("mps")
# audio1 = (self.net_g.to("mps").voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt1)[
# 0][0, 0].data * self.hps.data.max_wav_value).cpu().float().numpy()
else:
with torch.no_grad():
x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x.cuda(gpu) for x in data]
sid_tgt1 = torch.LongTensor([dstId]).cuda(gpu)
# audio1 = (self.net_g.cuda(gpu).voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt1)[0][0, 0].data * self.hps.data.max_wav_value).cpu().float().numpy()
audio1 = self.net_g.cuda(gpu).voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt1)[0][0, 0].data * self.hps.data.max_wav_value
if self.prev_strength.device != torch.device('cuda', gpu):
print(f"prev_strength move from {self.prev_strength.device} to gpu{gpu}")
self.prev_strength = self.prev_strength.cuda(gpu)
if self.cur_strength.device != torch.device('cuda', gpu):
print(f"cur_strength move from {self.cur_strength.device} to gpu{gpu}")
self.cur_strength = self.cur_strength.cuda(gpu)
if hasattr(self, 'prev_audio1') == True:
prev = self.prev_audio1[-1*unpackedData.shape[0]:]
cur = audio1[-2*unpackedData.shape[0]:-1*unpackedData.shape[0]]
result = prev * self.prev_strength + cur * self.cur_strength
# print("merging...", prev.shape, cur.shape)
else:
cur = audio1[-2*unpackedData.shape[0]:-1*unpackedData.shape[0]]
result = cur
# print("no merging...", cur.shape)
self.prev_audio1 = audio1
#print(result)
result = result.cpu().float().numpy()
except Exception as e:
print("VC PROCESSING!!!! EXCEPTION!!!", e)
print(traceback.format_exc())
result = result.astype(np.int16)
print("on_request result size:",result.shape)
return result
def on_request_old(self, gpu, srcId, dstId, timestamp, prefixChunkSize, wav):
unpackedData = wav unpackedData = wav
convertSize = unpackedData.shape[0] + (prefixChunkSize * 512) convertSize = unpackedData.shape[0] + (prefixChunkSize * 512)
try: try:
@ -102,4 +220,4 @@ class VoiceChanger():
print(traceback.format_exc()) print(traceback.format_exc())
audio1 = audio1.astype(np.int16) audio1 = audio1.astype(np.int16)
return audio1 return audio1

View File

@ -13,7 +13,14 @@ class VoiceChangerManager():
self.voiceChanger.destroy() self.voiceChanger.destroy()
self.voiceChanger = VoiceChanger(config, model) self.voiceChanger = VoiceChanger(config, model)
def changeVoice(self, gpu, srcId, dstId, timestamp, prefixChunkSize, unpackedData): def changeVoice(self, gpu, srcId, dstId, timestamp, convertChunkNum, crossFadeLowerValue, crossFadeOffsetRate, crossFadeEndRate, unpackedData):
if hasattr(self, 'voiceChanger') == True:
return self.voiceChanger.on_request(gpu, srcId, dstId, timestamp, convertChunkNum, crossFadeLowerValue, crossFadeOffsetRate, crossFadeEndRate, unpackedData)
else:
print("Voice Change is not loaded. Did you load a correct model?")
return np.zeros(1).astype(np.int16)
def changeVoice_old(self, gpu, srcId, dstId, timestamp, prefixChunkSize, unpackedData):
if hasattr(self, 'voiceChanger') == True: if hasattr(self, 'voiceChanger') == True:
return self.voiceChanger.on_request(gpu, srcId, dstId, timestamp, prefixChunkSize, unpackedData) return self.voiceChanger.on_request(gpu, srcId, dstId, timestamp, prefixChunkSize, unpackedData)
else: else: