This commit is contained in:
w-okada 2024-02-28 23:23:22 +09:00
parent bc6e8a9c08
commit 2ac5ec9feb
7 changed files with 424 additions and 426 deletions

11
client/.vscode/settings.json vendored Normal file
View File

@ -0,0 +1,11 @@
{
"workbench.colorCustomizations": {
"tab.activeBackground": "#65952acc"
},
"editor.defaultFormatter": "esbenp.prettier-vscode",
"prettier.printWidth": 1024,
"prettier.tabWidth": 4,
"files.associations": {
"*.css": "postcss"
}
}

View File

@ -1,8 +1,11 @@
{ {
"files.associations": {
"*.css": "postcss"
},
"workbench.colorCustomizations": { "workbench.colorCustomizations": {
"tab.activeBackground": "#65952acc" "tab.activeBackground": "#65952acc"
},
"editor.defaultFormatter": "esbenp.prettier-vscode",
"prettier.printWidth": 1024,
"prettier.tabWidth": 4,
"files.associations": {
"*.css": "postcss"
} }
} }

View File

@ -1,4 +1,4 @@
import React from "react" import React from "react";
import { GuiStateProvider } from "./001_GuiStateProvider"; import { GuiStateProvider } from "./001_GuiStateProvider";
import { Dialogs } from "./900_Dialogs"; import { Dialogs } from "./900_Dialogs";
import { ModelSlotControl } from "./b00_ModelSlotControl"; import { ModelSlotControl } from "./b00_ModelSlotControl";
@ -13,5 +13,5 @@ export const Demo = () => {
<ModelSlotControl></ModelSlotControl> <ModelSlotControl></ModelSlotControl>
</div> </div>
</GuiStateProvider> </GuiStateProvider>
) );
} };

View File

@ -1,8 +1,11 @@
{ {
"workbench.colorCustomizations": { "workbench.colorCustomizations": {
"tab.activeBackground": "#65952acc" "tab.activeBackground": "#65952acc"
}, },
"editor.defaultFormatter": "esbenp.prettier-vscode", "editor.defaultFormatter": "esbenp.prettier-vscode",
"prettier.printWidth": 1024, "prettier.printWidth": 1024,
"prettier.tabWidth": 4 "prettier.tabWidth": 4,
"files.associations": {
"*.css": "postcss"
}
} }

View File

@ -4,22 +4,21 @@
// 187.5chunk -> 1sec // 187.5chunk -> 1sec
export const VoiceChangerType = { export const VoiceChangerType = {
MMVCv15: "MMVCv15", MMVCv15: "MMVCv15",
MMVCv13: "MMVCv13", MMVCv13: "MMVCv13",
"so-vits-svc-40": "so-vits-svc-40", "so-vits-svc-40": "so-vits-svc-40",
"DDSP-SVC": "DDSP-SVC", "DDSP-SVC": "DDSP-SVC",
RVC: "RVC", RVC: "RVC",
"Diffusion-SVC": "Diffusion-SVC", "Diffusion-SVC": "Diffusion-SVC",
Beatrice: "Beatrice", Beatrice: "Beatrice",
LLVC: "LLVC", LLVC: "LLVC",
WebModel: "WebModel", WebModel: "WebModel",
EasyVC: "EasyVC", EasyVC: "EasyVC",
} as const; } as const;
export type VoiceChangerType = export type VoiceChangerType = (typeof VoiceChangerType)[keyof typeof VoiceChangerType];
(typeof VoiceChangerType)[keyof typeof VoiceChangerType];
export const StaticModel = { export const StaticModel = {
BeatriceJVS: "Beatrice-JVS", BeatriceJVS: "Beatrice-JVS",
} as const; } as const;
export type StaticModel = (typeof StaticModel)[keyof typeof StaticModel]; export type StaticModel = (typeof StaticModel)[keyof typeof StaticModel];
@ -27,439 +26,426 @@ export type StaticModel = (typeof StaticModel)[keyof typeof StaticModel];
// サーバセッティング // サーバセッティング
/////////////////////// ///////////////////////
export const InputSampleRate = { export const InputSampleRate = {
"48000": 48000, "48000": 48000,
"44100": 44100, "44100": 44100,
"24000": 24000, "24000": 24000,
} as const; } as const;
export type InputSampleRate = export type InputSampleRate = (typeof InputSampleRate)[keyof typeof InputSampleRate];
(typeof InputSampleRate)[keyof typeof InputSampleRate];
export const ModelSamplingRate = { export const ModelSamplingRate = {
"48000": 48000, "48000": 48000,
"40000": 40000, "40000": 40000,
"32000": 32000, "32000": 32000,
} as const; } as const;
export type ModelSamplingRate = export type ModelSamplingRate = (typeof InputSampleRate)[keyof typeof InputSampleRate];
(typeof InputSampleRate)[keyof typeof InputSampleRate];
export const CrossFadeOverlapSize = { export const CrossFadeOverlapSize = {
"128": 128, "128": 128,
"256": 256, "256": 256,
"512": 512, "512": 512,
"1024": 1024, "1024": 1024,
"2048": 2048, "2048": 2048,
"4096": 4096, "4096": 4096,
} as const; } as const;
export type CrossFadeOverlapSize = export type CrossFadeOverlapSize = (typeof CrossFadeOverlapSize)[keyof typeof CrossFadeOverlapSize];
(typeof CrossFadeOverlapSize)[keyof typeof CrossFadeOverlapSize];
export const F0Detector = { export const F0Detector = {
dio: "dio", dio: "dio",
harvest: "harvest", harvest: "harvest",
crepe: "crepe", crepe: "crepe",
crepe_full: "crepe_full", crepe_full: "crepe_full",
crepe_tiny: "crepe_tiny", crepe_tiny: "crepe_tiny",
rmvpe: "rmvpe", rmvpe: "rmvpe",
rmvpe_onnx: "rmvpe_onnx", rmvpe_onnx: "rmvpe_onnx",
fcpe: "fcpe", fcpe: "fcpe",
} as const; } as const;
export type F0Detector = (typeof F0Detector)[keyof typeof F0Detector]; export type F0Detector = (typeof F0Detector)[keyof typeof F0Detector];
export const DiffMethod = { export const DiffMethod = {
pndm: "pndm", pndm: "pndm",
"dpm-solver": "dpm-solver", "dpm-solver": "dpm-solver",
} as const; } as const;
export type DiffMethod = (typeof DiffMethod)[keyof typeof DiffMethod]; export type DiffMethod = (typeof DiffMethod)[keyof typeof DiffMethod];
export const RVCModelType = { export const RVCModelType = {
pyTorchRVC: "pyTorchRVC", pyTorchRVC: "pyTorchRVC",
pyTorchRVCNono: "pyTorchRVCNono", pyTorchRVCNono: "pyTorchRVCNono",
pyTorchRVCv2: "pyTorchRVCv2", pyTorchRVCv2: "pyTorchRVCv2",
pyTorchRVCv2Nono: "pyTorchRVCv2Nono", pyTorchRVCv2Nono: "pyTorchRVCv2Nono",
pyTorchWebUI: "pyTorchWebUI", pyTorchWebUI: "pyTorchWebUI",
pyTorchWebUINono: "pyTorchWebUINono", pyTorchWebUINono: "pyTorchWebUINono",
onnxRVC: "onnxRVC", onnxRVC: "onnxRVC",
onnxRVCNono: "onnxRVCNono", onnxRVCNono: "onnxRVCNono",
} as const; } as const;
export type RVCModelType = (typeof RVCModelType)[keyof typeof RVCModelType]; export type RVCModelType = (typeof RVCModelType)[keyof typeof RVCModelType];
export const ServerSettingKey = { export const ServerSettingKey = {
passThrough: "passThrough", passThrough: "passThrough",
srcId: "srcId", srcId: "srcId",
dstId: "dstId", dstId: "dstId",
gpu: "gpu", gpu: "gpu",
crossFadeOffsetRate: "crossFadeOffsetRate", crossFadeOffsetRate: "crossFadeOffsetRate",
crossFadeEndRate: "crossFadeEndRate", crossFadeEndRate: "crossFadeEndRate",
crossFadeOverlapSize: "crossFadeOverlapSize", crossFadeOverlapSize: "crossFadeOverlapSize",
framework: "framework", framework: "framework",
onnxExecutionProvider: "onnxExecutionProvider", onnxExecutionProvider: "onnxExecutionProvider",
f0Factor: "f0Factor", f0Factor: "f0Factor",
f0Detector: "f0Detector", f0Detector: "f0Detector",
recordIO: "recordIO", recordIO: "recordIO",
enableServerAudio: "enableServerAudio", enableServerAudio: "enableServerAudio",
serverAudioStated: "serverAudioStated", serverAudioStated: "serverAudioStated",
serverAudioSampleRate: "serverAudioSampleRate", serverAudioSampleRate: "serverAudioSampleRate",
serverInputAudioSampleRate: "serverInputAudioSampleRate", serverInputAudioSampleRate: "serverInputAudioSampleRate",
serverOutputAudioSampleRate: "serverOutputAudioSampleRate", serverOutputAudioSampleRate: "serverOutputAudioSampleRate",
serverMonitorAudioSampleRate: "serverMonitorAudioSampleRate", serverMonitorAudioSampleRate: "serverMonitorAudioSampleRate",
serverInputAudioBufferSize: "serverInputAudioBufferSize", serverInputAudioBufferSize: "serverInputAudioBufferSize",
serverOutputAudioBufferSize: "serverOutputAudioBufferSize", serverOutputAudioBufferSize: "serverOutputAudioBufferSize",
serverInputDeviceId: "serverInputDeviceId", serverInputDeviceId: "serverInputDeviceId",
serverOutputDeviceId: "serverOutputDeviceId", serverOutputDeviceId: "serverOutputDeviceId",
serverMonitorDeviceId: "serverMonitorDeviceId", serverMonitorDeviceId: "serverMonitorDeviceId",
serverReadChunkSize: "serverReadChunkSize", serverReadChunkSize: "serverReadChunkSize",
serverInputAudioGain: "serverInputAudioGain", serverInputAudioGain: "serverInputAudioGain",
serverOutputAudioGain: "serverOutputAudioGain", serverOutputAudioGain: "serverOutputAudioGain",
serverMonitorAudioGain: "serverMonitorAudioGain", serverMonitorAudioGain: "serverMonitorAudioGain",
tran: "tran", tran: "tran",
noiseScale: "noiseScale", noiseScale: "noiseScale",
predictF0: "predictF0", predictF0: "predictF0",
silentThreshold: "silentThreshold", silentThreshold: "silentThreshold",
extraConvertSize: "extraConvertSize", extraConvertSize: "extraConvertSize",
clusterInferRatio: "clusterInferRatio", clusterInferRatio: "clusterInferRatio",
indexRatio: "indexRatio", indexRatio: "indexRatio",
protect: "protect", protect: "protect",
rvcQuality: "rvcQuality", rvcQuality: "rvcQuality",
modelSamplingRate: "modelSamplingRate", modelSamplingRate: "modelSamplingRate",
silenceFront: "silenceFront", silenceFront: "silenceFront",
modelSlotIndex: "modelSlotIndex", modelSlotIndex: "modelSlotIndex",
useEnhancer: "useEnhancer", useEnhancer: "useEnhancer",
useDiff: "useDiff", useDiff: "useDiff",
// "useDiffDpm": "useDiffDpm", // "useDiffDpm": "useDiffDpm",
diffMethod: "diffMethod", diffMethod: "diffMethod",
useDiffSilence: "useDiffSilence", useDiffSilence: "useDiffSilence",
diffAcc: "diffAcc", diffAcc: "diffAcc",
diffSpkId: "diffSpkId", diffSpkId: "diffSpkId",
kStep: "kStep", kStep: "kStep",
threshold: "threshold", threshold: "threshold",
speedUp: "speedUp", speedUp: "speedUp",
skipDiffusion: "skipDiffusion", skipDiffusion: "skipDiffusion",
inputSampleRate: "inputSampleRate", inputSampleRate: "inputSampleRate",
enableDirectML: "enableDirectML", enableDirectML: "enableDirectML",
} as const; } as const;
export type ServerSettingKey = export type ServerSettingKey = (typeof ServerSettingKey)[keyof typeof ServerSettingKey];
(typeof ServerSettingKey)[keyof typeof ServerSettingKey];
export type VoiceChangerServerSetting = { export type VoiceChangerServerSetting = {
passThrough: boolean; passThrough: boolean;
srcId: number; srcId: number;
dstId: number; dstId: number;
gpu: number; gpu: number;
crossFadeOffsetRate: number; crossFadeOffsetRate: number;
crossFadeEndRate: number; crossFadeEndRate: number;
crossFadeOverlapSize: CrossFadeOverlapSize; crossFadeOverlapSize: CrossFadeOverlapSize;
f0Factor: number; f0Factor: number;
f0Detector: F0Detector; // dio or harvest f0Detector: F0Detector; // dio or harvest
recordIO: number; // 0:off, 1:on recordIO: number; // 0:off, 1:on
enableServerAudio: number; // 0:off, 1:on enableServerAudio: number; // 0:off, 1:on
serverAudioStated: number; // 0:off, 1:on serverAudioStated: number; // 0:off, 1:on
serverAudioSampleRate: number; serverAudioSampleRate: number;
serverInputAudioSampleRate: number; serverInputAudioSampleRate: number;
serverOutputAudioSampleRate: number; serverOutputAudioSampleRate: number;
serverMonitorAudioSampleRate: number; serverMonitorAudioSampleRate: number;
serverInputAudioBufferSize: number; serverInputAudioBufferSize: number;
serverOutputAudioBufferSize: number; serverOutputAudioBufferSize: number;
serverInputDeviceId: number; serverInputDeviceId: number;
serverOutputDeviceId: number; serverOutputDeviceId: number;
serverMonitorDeviceId: number; serverMonitorDeviceId: number;
serverReadChunkSize: number; serverReadChunkSize: number;
serverInputAudioGain: number; serverInputAudioGain: number;
serverOutputAudioGain: number; serverOutputAudioGain: number;
serverMonitorAudioGain: number; serverMonitorAudioGain: number;
tran: number; // so-vits-svc tran: number; // so-vits-svc
noiseScale: number; // so-vits-svc noiseScale: number; // so-vits-svc
predictF0: number; // so-vits-svc predictF0: number; // so-vits-svc
silentThreshold: number; // so-vits-svc silentThreshold: number; // so-vits-svc
extraConvertSize: number; // so-vits-svc extraConvertSize: number; // so-vits-svc
clusterInferRatio: number; // so-vits-svc clusterInferRatio: number; // so-vits-svc
indexRatio: number; // RVC indexRatio: number; // RVC
protect: number; // RVC protect: number; // RVC
rvcQuality: number; // 0:low, 1:high rvcQuality: number; // 0:low, 1:high
silenceFront: number; // 0:off, 1:on silenceFront: number; // 0:off, 1:on
modelSamplingRate: ModelSamplingRate; // 32000,40000,48000 modelSamplingRate: ModelSamplingRate; // 32000,40000,48000
modelSlotIndex: number | StaticModel; modelSlotIndex: number | StaticModel;
useEnhancer: number; // DDSP-SVC useEnhancer: number; // DDSP-SVC
useDiff: number; // DDSP-SVC useDiff: number; // DDSP-SVC
// useDiffDpm: number// DDSP-SVC // useDiffDpm: number// DDSP-SVC
diffMethod: DiffMethod; // DDSP-SVC diffMethod: DiffMethod; // DDSP-SVC
useDiffSilence: number; // DDSP-SVC useDiffSilence: number; // DDSP-SVC
diffAcc: number; // DDSP-SVC diffAcc: number; // DDSP-SVC
diffSpkId: number; // DDSP-SVC diffSpkId: number; // DDSP-SVC
kStep: number; // DDSP-SVC kStep: number; // DDSP-SVC
threshold: number; // DDSP-SVC threshold: number; // DDSP-SVC
speedUp: number; // Diffusion-SVC speedUp: number; // Diffusion-SVC
skipDiffusion: number; // Diffusion-SVC 0:off, 1:on skipDiffusion: number; // Diffusion-SVC 0:off, 1:on
inputSampleRate: InputSampleRate; inputSampleRate: InputSampleRate;
enableDirectML: number; enableDirectML: number;
}; };
type ModelSlot = { type ModelSlot = {
slotIndex: number | StaticModel; slotIndex: number | StaticModel;
voiceChangerType: VoiceChangerType; voiceChangerType: VoiceChangerType;
name: string; name: string;
description: string; description: string;
credit: string; credit: string;
termsOfUseUrl: string; termsOfUseUrl: string;
iconFile: string; iconFile: string;
speakers: { [key: number]: string }; speakers: { [key: number]: string };
}; };
export type RVCModelSlot = ModelSlot & { export type RVCModelSlot = ModelSlot & {
modelFile: string; modelFile: string;
indexFile: string; indexFile: string;
defaultIndexRatio: number; defaultIndexRatio: number;
defaultProtect: number; defaultProtect: number;
defaultTune: number; defaultTune: number;
modelType: RVCModelType; modelType: RVCModelType;
embChannels: number; embChannels: number;
f0: boolean; f0: boolean;
samplingRate: number; samplingRate: number;
deprecated: boolean; deprecated: boolean;
}; };
export type MMVCv13ModelSlot = ModelSlot & { export type MMVCv13ModelSlot = ModelSlot & {
modelFile: string; modelFile: string;
configFile: string; configFile: string;
srcId: number; srcId: number;
dstId: number; dstId: number;
samplingRate: number; samplingRate: number;
speakers: { [key: number]: string }; speakers: { [key: number]: string };
}; };
export type MMVCv15ModelSlot = ModelSlot & { export type MMVCv15ModelSlot = ModelSlot & {
modelFile: string; modelFile: string;
configFile: string; configFile: string;
srcId: number; srcId: number;
dstId: number; dstId: number;
f0Factor: number; f0Factor: number;
samplingRate: number; samplingRate: number;
f0: { [key: number]: number }; f0: { [key: number]: number };
}; };
export type SoVitsSvc40ModelSlot = ModelSlot & { export type SoVitsSvc40ModelSlot = ModelSlot & {
modelFile: string; modelFile: string;
configFile: string; configFile: string;
clusterFile: string; clusterFile: string;
dstId: number; dstId: number;
samplingRate: number; samplingRate: number;
defaultTune: number; defaultTune: number;
defaultClusterInferRatio: number; defaultClusterInferRatio: number;
noiseScale: number; noiseScale: number;
speakers: { [key: number]: string }; speakers: { [key: number]: string };
}; };
export type DDSPSVCModelSlot = ModelSlot & { export type DDSPSVCModelSlot = ModelSlot & {
modelFile: string; modelFile: string;
configFile: string; configFile: string;
diffModelFile: string; diffModelFile: string;
diffConfigFile: string; diffConfigFile: string;
dstId: number; dstId: number;
samplingRate: number; samplingRate: number;
defaultTune: number; defaultTune: number;
enhancer: boolean; enhancer: boolean;
diffusion: boolean; diffusion: boolean;
acc: number; acc: number;
kstep: number; kstep: number;
speakers: { [key: number]: string }; speakers: { [key: number]: string };
}; };
export type DiffusionSVCModelSlot = ModelSlot & { export type DiffusionSVCModelSlot = ModelSlot & {
modelFile: string; modelFile: string;
dstId: number; dstId: number;
samplingRate: number; samplingRate: number;
defaultTune: number; defaultTune: number;
defaultKstep: number; defaultKstep: number;
defaultSpeedup: number; defaultSpeedup: number;
kStepMax: number; kStepMax: number;
nLayers: number; nLayers: number;
nnLayers: number; nnLayers: number;
speakers: { [key: number]: string }; speakers: { [key: number]: string };
}; };
export type BeatriceModelSlot = ModelSlot & { export type BeatriceModelSlot = ModelSlot & {
modelFile: string; modelFile: string;
dstId: number; dstId: number;
speakers: { [key: number]: string }; speakers: { [key: number]: string };
}; };
export type LLVCModelSlot = ModelSlot & { export type LLVCModelSlot = ModelSlot & {
modelFile: string; modelFile: string;
configFile: string; configFile: string;
speakers: { [key: number]: string }; speakers: { [key: number]: string };
}; };
export type WebModelSlot = ModelSlot & { export type WebModelSlot = ModelSlot & {
modelFile: string; modelFile: string;
defaultTune: number; defaultTune: number;
modelType: RVCModelType; modelType: RVCModelType;
f0: boolean; f0: boolean;
samplingRate: number; samplingRate: number;
}; };
export type ModelSlotUnion = export type ModelSlotUnion = RVCModelSlot | MMVCv13ModelSlot | MMVCv15ModelSlot | SoVitsSvc40ModelSlot | DDSPSVCModelSlot | DiffusionSVCModelSlot | BeatriceModelSlot | LLVCModelSlot | WebModelSlot;
| RVCModelSlot
| MMVCv13ModelSlot
| MMVCv15ModelSlot
| SoVitsSvc40ModelSlot
| DDSPSVCModelSlot
| DiffusionSVCModelSlot
| BeatriceModelSlot
| LLVCModelSlot
| WebModelSlot;
type ServerAudioDevice = { type ServerAudioDevice = {
kind: "audioinput" | "audiooutput"; kind: "audioinput" | "audiooutput";
index: number; index: number;
name: string; name: string;
hostAPI: string; hostAPI: string;
}; };
export type ServerInfo = VoiceChangerServerSetting & { export type ServerInfo = VoiceChangerServerSetting & {
// コンフィグ対象外 (getInfoで取得のみ可能な情報) // コンフィグ対象外 (getInfoで取得のみ可能な情報)
status: string; status: string;
modelSlots: ModelSlotUnion[]; modelSlots: ModelSlotUnion[];
serverAudioInputDevices: ServerAudioDevice[]; serverAudioInputDevices: ServerAudioDevice[];
serverAudioOutputDevices: ServerAudioDevice[]; serverAudioOutputDevices: ServerAudioDevice[];
sampleModels: (RVCSampleModel | DiffusionSVCSampleModel)[]; sampleModels: (RVCSampleModel | DiffusionSVCSampleModel)[];
gpus: { gpus: {
id: number; id: number;
name: string; name: string;
memory: number; memory: number;
}[]; }[];
maxInputLength: number; // MMVCv15 maxInputLength: number; // MMVCv15
voiceChangerParams: { voiceChangerParams: {
model_dir: string; model_dir: string;
}; };
}; };
export type SampleModel = { export type SampleModel = {
id: string; id: string;
voiceChangerType: VoiceChangerType; voiceChangerType: VoiceChangerType;
lang: string; lang: string;
tag: string[]; tag: string[];
name: string; name: string;
modelUrl: string; modelUrl: string;
termsOfUseUrl: string; termsOfUseUrl: string;
icon: string; icon: string;
credit: string; credit: string;
description: string; description: string;
sampleRate: number; sampleRate: number;
modelType: string; modelType: string;
f0: boolean; f0: boolean;
}; };
export type RVCSampleModel = SampleModel & { export type RVCSampleModel = SampleModel & {
indexUrl: string; indexUrl: string;
featureUrl: string; featureUrl: string;
}; };
export type DiffusionSVCSampleModel = SampleModel & { export type DiffusionSVCSampleModel = SampleModel & {
numOfDiffLayers: number; numOfDiffLayers: number;
numOfNativeLayers: number; numOfNativeLayers: number;
maxKStep: number; maxKStep: number;
}; };
export const DefaultServerSetting: ServerInfo = { export const DefaultServerSetting: ServerInfo = {
// VC Common // VC Common
passThrough: false, passThrough: false,
inputSampleRate: 48000, inputSampleRate: 48000,
crossFadeOffsetRate: 0.0, crossFadeOffsetRate: 0.0,
crossFadeEndRate: 1.0, crossFadeEndRate: 1.0,
crossFadeOverlapSize: CrossFadeOverlapSize[1024], crossFadeOverlapSize: CrossFadeOverlapSize[1024],
recordIO: 0, recordIO: 0,
enableServerAudio: 0, enableServerAudio: 0,
serverAudioStated: 0, serverAudioStated: 0,
serverAudioSampleRate: 48000, serverAudioSampleRate: 48000,
serverInputAudioSampleRate: 48000, serverInputAudioSampleRate: 48000,
serverOutputAudioSampleRate: 48000, serverOutputAudioSampleRate: 48000,
serverMonitorAudioSampleRate: 48000, serverMonitorAudioSampleRate: 48000,
serverInputAudioBufferSize: 1024 * 24, serverInputAudioBufferSize: 1024 * 24,
serverOutputAudioBufferSize: 1024 * 24, serverOutputAudioBufferSize: 1024 * 24,
serverInputDeviceId: -1, serverInputDeviceId: -1,
serverOutputDeviceId: -1, serverOutputDeviceId: -1,
serverMonitorDeviceId: -1, serverMonitorDeviceId: -1,
serverReadChunkSize: 256, serverReadChunkSize: 256,
serverInputAudioGain: 1.0, serverInputAudioGain: 1.0,
serverOutputAudioGain: 1.0, serverOutputAudioGain: 1.0,
serverMonitorAudioGain: 1.0, serverMonitorAudioGain: 1.0,
// VC Specific // VC Specific
srcId: 0, srcId: 0,
dstId: 1, dstId: 1,
gpu: 0, gpu: 0,
f0Factor: 1.0, f0Factor: 1.0,
f0Detector: F0Detector.rmvpe_onnx, f0Detector: F0Detector.rmvpe_onnx,
tran: 0, tran: 0,
noiseScale: 0, noiseScale: 0,
predictF0: 0, predictF0: 0,
silentThreshold: 0, silentThreshold: 0,
extraConvertSize: 0, extraConvertSize: 0,
clusterInferRatio: 0, clusterInferRatio: 0,
indexRatio: 0, indexRatio: 0,
protect: 0.5, protect: 0.5,
rvcQuality: 0, rvcQuality: 0,
modelSamplingRate: 48000, modelSamplingRate: 48000,
silenceFront: 1, silenceFront: 1,
modelSlotIndex: 0, modelSlotIndex: 0,
sampleModels: [], sampleModels: [],
gpus: [], gpus: [],
useEnhancer: 0, useEnhancer: 0,
useDiff: 1, useDiff: 1,
diffMethod: "dpm-solver", diffMethod: "dpm-solver",
useDiffSilence: 0, useDiffSilence: 0,
diffAcc: 20, diffAcc: 20,
diffSpkId: 1, diffSpkId: 1,
kStep: 120, kStep: 120,
threshold: -45, threshold: -45,
speedUp: 10, speedUp: 10,
skipDiffusion: 1, skipDiffusion: 1,
enableDirectML: 0, enableDirectML: 0,
// //
status: "ok", status: "ok",
modelSlots: [], modelSlots: [],
serverAudioInputDevices: [], serverAudioInputDevices: [],
serverAudioOutputDevices: [], serverAudioOutputDevices: [],
maxInputLength: 128 * 2048, maxInputLength: 128 * 2048,
voiceChangerParams: { voiceChangerParams: {
model_dir: "", model_dir: "",
}, },
}; };
/////////////////////// ///////////////////////
@ -467,113 +453,109 @@ export const DefaultServerSetting: ServerInfo = {
/////////////////////// ///////////////////////
export type WorkletSetting = { export type WorkletSetting = {
numTrancateTreshold: number; numTrancateTreshold: number;
volTrancateThreshold: number; volTrancateThreshold: number;
volTrancateLength: number; volTrancateLength: number;
}; };
/////////////////////// ///////////////////////
// Worklet Nodeセッティング // Worklet Nodeセッティング
/////////////////////// ///////////////////////
export const Protocol = { export const Protocol = {
sio: "sio", sio: "sio",
rest: "rest", rest: "rest",
internal: "internal", internal: "internal",
} as const; } as const;
export type Protocol = (typeof Protocol)[keyof typeof Protocol]; export type Protocol = (typeof Protocol)[keyof typeof Protocol];
export const SendingSampleRate = { export const SendingSampleRate = {
"48000": 48000, "48000": 48000,
"44100": 44100, "44100": 44100,
"24000": 24000, "24000": 24000,
} as const; } as const;
export type SendingSampleRate = export type SendingSampleRate = (typeof SendingSampleRate)[keyof typeof SendingSampleRate];
(typeof SendingSampleRate)[keyof typeof SendingSampleRate];
export const DownSamplingMode = { export const DownSamplingMode = {
decimate: "decimate", decimate: "decimate",
average: "average", average: "average",
} as const; } as const;
export type DownSamplingMode = export type DownSamplingMode = (typeof DownSamplingMode)[keyof typeof DownSamplingMode];
(typeof DownSamplingMode)[keyof typeof DownSamplingMode];
export type WorkletNodeSetting = { export type WorkletNodeSetting = {
serverUrl: string; serverUrl: string;
protocol: Protocol; protocol: Protocol;
sendingSampleRate: SendingSampleRate; sendingSampleRate: SendingSampleRate;
inputChunkNum: number; inputChunkNum: number;
downSamplingMode: DownSamplingMode; downSamplingMode: DownSamplingMode;
}; };
/////////////////////// ///////////////////////
// クライアントセッティング // クライアントセッティング
/////////////////////// ///////////////////////
export const SampleRate = { export const SampleRate = {
"48000": 48000, "48000": 48000,
} as const; } as const;
export type SampleRate = (typeof SampleRate)[keyof typeof SampleRate]; export type SampleRate = (typeof SampleRate)[keyof typeof SampleRate];
export type VoiceChangerClientSetting = { export type VoiceChangerClientSetting = {
audioInput: string | MediaStream | null; audioInput: string | MediaStream | null;
sampleRate: SampleRate; // 48000Hz sampleRate: SampleRate; // 48000Hz
echoCancel: boolean; echoCancel: boolean;
noiseSuppression: boolean; noiseSuppression: boolean;
noiseSuppression2: boolean; noiseSuppression2: boolean;
inputGain: number; inputGain: number;
outputGain: number; outputGain: number;
monitorGain: number; monitorGain: number;
passThroughConfirmationSkip: boolean; passThroughConfirmationSkip: boolean;
}; };
/////////////////////// ///////////////////////
// Client セッティング // Client セッティング
/////////////////////// ///////////////////////
export type ClientSetting = { export type ClientSetting = {
workletSetting: WorkletSetting; workletSetting: WorkletSetting;
workletNodeSetting: WorkletNodeSetting; workletNodeSetting: WorkletNodeSetting;
voiceChangerClientSetting: VoiceChangerClientSetting; voiceChangerClientSetting: VoiceChangerClientSetting;
}; };
export const DefaultClientSettng: ClientSetting = { export const DefaultClientSettng: ClientSetting = {
workletSetting: { workletSetting: {
// numTrancateTreshold: 512 * 2, // numTrancateTreshold: 512 * 2,
numTrancateTreshold: 100, numTrancateTreshold: 100,
volTrancateThreshold: 0.0005, volTrancateThreshold: 0.0005,
volTrancateLength: 32, volTrancateLength: 32,
}, },
workletNodeSetting: { workletNodeSetting: {
serverUrl: "", serverUrl: "",
protocol: "sio", protocol: "sio",
sendingSampleRate: 48000, sendingSampleRate: 48000,
inputChunkNum: 192, inputChunkNum: 192,
downSamplingMode: "average", downSamplingMode: "average",
}, },
voiceChangerClientSetting: { voiceChangerClientSetting: {
audioInput: null, audioInput: null,
sampleRate: 48000, sampleRate: 48000,
echoCancel: false, echoCancel: false,
noiseSuppression: false, noiseSuppression: false,
noiseSuppression2: false, noiseSuppression2: false,
inputGain: 1.0, inputGain: 1.0,
outputGain: 1.0, outputGain: 1.0,
monitorGain: 1.0, monitorGain: 1.0,
passThroughConfirmationSkip: false, passThroughConfirmationSkip: false,
}, },
}; };
//////////////////////////////////// ////////////////////////////////////
// Exceptions // Exceptions
//////////////////////////////////// ////////////////////////////////////
export const VOICE_CHANGER_CLIENT_EXCEPTION = { export const VOICE_CHANGER_CLIENT_EXCEPTION = {
ERR_SIO_CONNECT_FAILED: "ERR_SIO_CONNECT_FAILED", ERR_SIO_CONNECT_FAILED: "ERR_SIO_CONNECT_FAILED",
ERR_SIO_INVALID_RESPONSE: "ERR_SIO_INVALID_RESPONSE", ERR_SIO_INVALID_RESPONSE: "ERR_SIO_INVALID_RESPONSE",
ERR_REST_INVALID_RESPONSE: "ERR_REST_INVALID_RESPONSE", ERR_REST_INVALID_RESPONSE: "ERR_REST_INVALID_RESPONSE",
ERR_MIC_STREAM_NOT_INITIALIZED: "ERR_MIC_STREAM_NOT_INITIALIZED", ERR_MIC_STREAM_NOT_INITIALIZED: "ERR_MIC_STREAM_NOT_INITIALIZED",
ERR_INTERNAL_AUDIO_PROCESS_CALLBACK_IS_NOT_INITIALIZED: ERR_INTERNAL_AUDIO_PROCESS_CALLBACK_IS_NOT_INITIALIZED: "ERR_INTERNAL_AUDIO_PROCESS_CALLBACK_IS_NOT_INITIALIZED",
"ERR_INTERNAL_AUDIO_PROCESS_CALLBACK_IS_NOT_INITIALIZED",
} as const; } as const;
export type VOICE_CHANGER_CLIENT_EXCEPTION = export type VOICE_CHANGER_CLIENT_EXCEPTION = (typeof VOICE_CHANGER_CLIENT_EXCEPTION)[keyof typeof VOICE_CHANGER_CLIENT_EXCEPTION];
(typeof VOICE_CHANGER_CLIENT_EXCEPTION)[keyof typeof VOICE_CHANGER_CLIENT_EXCEPTION];
//////////////////////////////////// ////////////////////////////////////
// indexedDB // indexedDB
@ -582,23 +564,22 @@ export const INDEXEDDB_DB_APP_NAME = "INDEXEDDB_KEY_VOICE_CHANGER";
export const INDEXEDDB_DB_NAME = "INDEXEDDB_KEY_VOICE_CHANGER_DB"; export const INDEXEDDB_DB_NAME = "INDEXEDDB_KEY_VOICE_CHANGER_DB";
export const INDEXEDDB_KEY_CLIENT = "INDEXEDDB_KEY_VOICE_CHANGER_LIB_CLIENT"; export const INDEXEDDB_KEY_CLIENT = "INDEXEDDB_KEY_VOICE_CHANGER_LIB_CLIENT";
export const INDEXEDDB_KEY_SERVER = "INDEXEDDB_KEY_VOICE_CHANGER_LIB_SERVER"; export const INDEXEDDB_KEY_SERVER = "INDEXEDDB_KEY_VOICE_CHANGER_LIB_SERVER";
export const INDEXEDDB_KEY_MODEL_DATA = export const INDEXEDDB_KEY_MODEL_DATA = "INDEXEDDB_KEY_VOICE_CHANGER_LIB_MODEL_DATA";
"INDEXEDDB_KEY_VOICE_CHANGER_LIB_MODEL_DATA";
// ONNX // ONNX
export type OnnxExporterInfo = { export type OnnxExporterInfo = {
status: string; status: string;
path: string; path: string;
filename: string; filename: string;
}; };
// Merge // Merge
export type MergeElement = { export type MergeElement = {
slotIndex: number; slotIndex: number;
strength: number; strength: number;
}; };
export type MergeModelRequest = { export type MergeModelRequest = {
voiceChangerType: VoiceChangerType; voiceChangerType: VoiceChangerType;
command: "mix"; command: "mix";
files: MergeElement[]; files: MergeElement[];
}; };

View File

@ -184,8 +184,8 @@ class EasyVC(VoiceChangerModel):
logger.info("[Voice Changer] Pipeline is not initialized.") logger.info("[Voice Changer] Pipeline is not initialized.")
raise PipelineNotInitializedException() raise PipelineNotInitializedException()
enableTimer = True enableTimer = False
with Timer2("infer_easyvc", enableTimer) as t: with Timer2("infer-easyvc", enableTimer) as t:
# 処理は16Kで実施(Pitch, embed, (infer)) # 処理は16Kで実施(Pitch, embed, (infer))
receivedData = cast( receivedData = cast(
@ -203,7 +203,7 @@ class EasyVC(VoiceChangerModel):
# 入力データ生成 # 入力データ生成
data = self.generate_input(receivedData, crossfade_frame, sola_search_frame, extra_frame) data = self.generate_input(receivedData, crossfade_frame, sola_search_frame, extra_frame)
t.record("generate_input") t.record("generate-input")
audio = data[0] audio = data[0]
pitchf = data[1] pitchf = data[1]

View File

@ -138,7 +138,7 @@ class Pipeline(object):
# print(f"pipeline exec input, audio:{audio.shape}, pitchf:{pitchf.shape}, feature:{feature.shape}") # print(f"pipeline exec input, audio:{audio.shape}, pitchf:{pitchf.shape}, feature:{feature.shape}")
# print(f"pipeline exec input, silence_front:{silence_front}, out_size:{out_size}") # print(f"pipeline exec input, silence_front:{silence_front}, out_size:{out_size}")
enablePipelineTimer = True enablePipelineTimer = False
with Timer2("Pipeline-Exec", enablePipelineTimer) as t: # NOQA with Timer2("Pipeline-Exec", enablePipelineTimer) as t: # NOQA
# 16000のサンプリングレートで入ってきている。以降この世界は16000で処理。 # 16000のサンプリングレートで入ってきている。以降この世界は16000で処理。
# self.t_pad = self.sr * repeat # 1秒 # self.t_pad = self.sr * repeat # 1秒