diff --git a/client/.vscode/settings.json b/client/.vscode/settings.json new file mode 100644 index 00000000..90365164 --- /dev/null +++ b/client/.vscode/settings.json @@ -0,0 +1,11 @@ +{ + "workbench.colorCustomizations": { + "tab.activeBackground": "#65952acc" + }, + "editor.defaultFormatter": "esbenp.prettier-vscode", + "prettier.printWidth": 1024, + "prettier.tabWidth": 4, + "files.associations": { + "*.css": "postcss" + } +} diff --git a/client/demo/.vscode/settings.json b/client/demo/.vscode/settings.json index 78cfadb7..90365164 100644 --- a/client/demo/.vscode/settings.json +++ b/client/demo/.vscode/settings.json @@ -1,8 +1,11 @@ { - "files.associations": { - "*.css": "postcss" - }, "workbench.colorCustomizations": { "tab.activeBackground": "#65952acc" + }, + "editor.defaultFormatter": "esbenp.prettier-vscode", + "prettier.printWidth": 1024, + "prettier.tabWidth": 4, + "files.associations": { + "*.css": "postcss" } } diff --git a/client/demo/src/components/demo/010_Demo.tsx b/client/demo/src/components/demo/010_Demo.tsx index 4725950f..71c8a9af 100644 --- a/client/demo/src/components/demo/010_Demo.tsx +++ b/client/demo/src/components/demo/010_Demo.tsx @@ -1,4 +1,4 @@ -import React from "react" +import React from "react"; import { GuiStateProvider } from "./001_GuiStateProvider"; import { Dialogs } from "./900_Dialogs"; import { ModelSlotControl } from "./b00_ModelSlotControl"; @@ -13,5 +13,5 @@ export const Demo = () => { - ) -} \ No newline at end of file + ); +}; diff --git a/client/lib/.vscode/settings.json b/client/lib/.vscode/settings.json index a1af19b9..6a7ce45e 100644 --- a/client/lib/.vscode/settings.json +++ b/client/lib/.vscode/settings.json @@ -1,8 +1,11 @@ { - "workbench.colorCustomizations": { - "tab.activeBackground": "#65952acc" - }, - "editor.defaultFormatter": "esbenp.prettier-vscode", - "prettier.printWidth": 1024, - "prettier.tabWidth": 4 + "workbench.colorCustomizations": { + "tab.activeBackground": "#65952acc" + }, + "editor.defaultFormatter": "esbenp.prettier-vscode", + "prettier.printWidth": 1024, + "prettier.tabWidth": 4, + "files.associations": { + "*.css": "postcss" + } } diff --git a/client/lib/src/const.ts b/client/lib/src/const.ts index 1ea6bfa8..6b60164b 100644 --- a/client/lib/src/const.ts +++ b/client/lib/src/const.ts @@ -4,22 +4,21 @@ // 187.5chunk -> 1sec export const VoiceChangerType = { - MMVCv15: "MMVCv15", - MMVCv13: "MMVCv13", - "so-vits-svc-40": "so-vits-svc-40", - "DDSP-SVC": "DDSP-SVC", - RVC: "RVC", - "Diffusion-SVC": "Diffusion-SVC", - Beatrice: "Beatrice", - LLVC: "LLVC", - WebModel: "WebModel", - EasyVC: "EasyVC", + MMVCv15: "MMVCv15", + MMVCv13: "MMVCv13", + "so-vits-svc-40": "so-vits-svc-40", + "DDSP-SVC": "DDSP-SVC", + RVC: "RVC", + "Diffusion-SVC": "Diffusion-SVC", + Beatrice: "Beatrice", + LLVC: "LLVC", + WebModel: "WebModel", + EasyVC: "EasyVC", } as const; -export type VoiceChangerType = - (typeof VoiceChangerType)[keyof typeof VoiceChangerType]; +export type VoiceChangerType = (typeof VoiceChangerType)[keyof typeof VoiceChangerType]; export const StaticModel = { - BeatriceJVS: "Beatrice-JVS", + BeatriceJVS: "Beatrice-JVS", } as const; export type StaticModel = (typeof StaticModel)[keyof typeof StaticModel]; @@ -27,439 +26,426 @@ export type StaticModel = (typeof StaticModel)[keyof typeof StaticModel]; // サーバセッティング /////////////////////// export const InputSampleRate = { - "48000": 48000, - "44100": 44100, - "24000": 24000, + "48000": 48000, + "44100": 44100, + "24000": 24000, } as const; -export type InputSampleRate = - (typeof InputSampleRate)[keyof typeof InputSampleRate]; +export type InputSampleRate = (typeof InputSampleRate)[keyof typeof InputSampleRate]; export const ModelSamplingRate = { - "48000": 48000, - "40000": 40000, - "32000": 32000, + "48000": 48000, + "40000": 40000, + "32000": 32000, } as const; -export type ModelSamplingRate = - (typeof InputSampleRate)[keyof typeof InputSampleRate]; +export type ModelSamplingRate = (typeof InputSampleRate)[keyof typeof InputSampleRate]; export const CrossFadeOverlapSize = { - "128": 128, - "256": 256, - "512": 512, - "1024": 1024, - "2048": 2048, - "4096": 4096, + "128": 128, + "256": 256, + "512": 512, + "1024": 1024, + "2048": 2048, + "4096": 4096, } as const; -export type CrossFadeOverlapSize = - (typeof CrossFadeOverlapSize)[keyof typeof CrossFadeOverlapSize]; +export type CrossFadeOverlapSize = (typeof CrossFadeOverlapSize)[keyof typeof CrossFadeOverlapSize]; export const F0Detector = { - dio: "dio", - harvest: "harvest", - crepe: "crepe", - crepe_full: "crepe_full", - crepe_tiny: "crepe_tiny", - rmvpe: "rmvpe", - rmvpe_onnx: "rmvpe_onnx", - fcpe: "fcpe", + dio: "dio", + harvest: "harvest", + crepe: "crepe", + crepe_full: "crepe_full", + crepe_tiny: "crepe_tiny", + rmvpe: "rmvpe", + rmvpe_onnx: "rmvpe_onnx", + fcpe: "fcpe", } as const; export type F0Detector = (typeof F0Detector)[keyof typeof F0Detector]; export const DiffMethod = { - pndm: "pndm", - "dpm-solver": "dpm-solver", + pndm: "pndm", + "dpm-solver": "dpm-solver", } as const; export type DiffMethod = (typeof DiffMethod)[keyof typeof DiffMethod]; export const RVCModelType = { - pyTorchRVC: "pyTorchRVC", - pyTorchRVCNono: "pyTorchRVCNono", - pyTorchRVCv2: "pyTorchRVCv2", - pyTorchRVCv2Nono: "pyTorchRVCv2Nono", - pyTorchWebUI: "pyTorchWebUI", - pyTorchWebUINono: "pyTorchWebUINono", - onnxRVC: "onnxRVC", - onnxRVCNono: "onnxRVCNono", + pyTorchRVC: "pyTorchRVC", + pyTorchRVCNono: "pyTorchRVCNono", + pyTorchRVCv2: "pyTorchRVCv2", + pyTorchRVCv2Nono: "pyTorchRVCv2Nono", + pyTorchWebUI: "pyTorchWebUI", + pyTorchWebUINono: "pyTorchWebUINono", + onnxRVC: "onnxRVC", + onnxRVCNono: "onnxRVCNono", } as const; export type RVCModelType = (typeof RVCModelType)[keyof typeof RVCModelType]; export const ServerSettingKey = { - passThrough: "passThrough", - srcId: "srcId", - dstId: "dstId", - gpu: "gpu", + passThrough: "passThrough", + srcId: "srcId", + dstId: "dstId", + gpu: "gpu", - crossFadeOffsetRate: "crossFadeOffsetRate", - crossFadeEndRate: "crossFadeEndRate", - crossFadeOverlapSize: "crossFadeOverlapSize", + crossFadeOffsetRate: "crossFadeOffsetRate", + crossFadeEndRate: "crossFadeEndRate", + crossFadeOverlapSize: "crossFadeOverlapSize", - framework: "framework", - onnxExecutionProvider: "onnxExecutionProvider", + framework: "framework", + onnxExecutionProvider: "onnxExecutionProvider", - f0Factor: "f0Factor", - f0Detector: "f0Detector", - recordIO: "recordIO", + f0Factor: "f0Factor", + f0Detector: "f0Detector", + recordIO: "recordIO", - enableServerAudio: "enableServerAudio", - serverAudioStated: "serverAudioStated", - serverAudioSampleRate: "serverAudioSampleRate", - serverInputAudioSampleRate: "serverInputAudioSampleRate", - serverOutputAudioSampleRate: "serverOutputAudioSampleRate", - serverMonitorAudioSampleRate: "serverMonitorAudioSampleRate", - serverInputAudioBufferSize: "serverInputAudioBufferSize", - serverOutputAudioBufferSize: "serverOutputAudioBufferSize", - serverInputDeviceId: "serverInputDeviceId", - serverOutputDeviceId: "serverOutputDeviceId", - serverMonitorDeviceId: "serverMonitorDeviceId", - serverReadChunkSize: "serverReadChunkSize", - serverInputAudioGain: "serverInputAudioGain", - serverOutputAudioGain: "serverOutputAudioGain", - serverMonitorAudioGain: "serverMonitorAudioGain", + enableServerAudio: "enableServerAudio", + serverAudioStated: "serverAudioStated", + serverAudioSampleRate: "serverAudioSampleRate", + serverInputAudioSampleRate: "serverInputAudioSampleRate", + serverOutputAudioSampleRate: "serverOutputAudioSampleRate", + serverMonitorAudioSampleRate: "serverMonitorAudioSampleRate", + serverInputAudioBufferSize: "serverInputAudioBufferSize", + serverOutputAudioBufferSize: "serverOutputAudioBufferSize", + serverInputDeviceId: "serverInputDeviceId", + serverOutputDeviceId: "serverOutputDeviceId", + serverMonitorDeviceId: "serverMonitorDeviceId", + serverReadChunkSize: "serverReadChunkSize", + serverInputAudioGain: "serverInputAudioGain", + serverOutputAudioGain: "serverOutputAudioGain", + serverMonitorAudioGain: "serverMonitorAudioGain", - tran: "tran", - noiseScale: "noiseScale", - predictF0: "predictF0", - silentThreshold: "silentThreshold", - extraConvertSize: "extraConvertSize", - clusterInferRatio: "clusterInferRatio", + tran: "tran", + noiseScale: "noiseScale", + predictF0: "predictF0", + silentThreshold: "silentThreshold", + extraConvertSize: "extraConvertSize", + clusterInferRatio: "clusterInferRatio", - indexRatio: "indexRatio", - protect: "protect", - rvcQuality: "rvcQuality", - modelSamplingRate: "modelSamplingRate", - silenceFront: "silenceFront", - modelSlotIndex: "modelSlotIndex", + indexRatio: "indexRatio", + protect: "protect", + rvcQuality: "rvcQuality", + modelSamplingRate: "modelSamplingRate", + silenceFront: "silenceFront", + modelSlotIndex: "modelSlotIndex", - useEnhancer: "useEnhancer", - useDiff: "useDiff", - // "useDiffDpm": "useDiffDpm", - diffMethod: "diffMethod", - useDiffSilence: "useDiffSilence", - diffAcc: "diffAcc", - diffSpkId: "diffSpkId", - kStep: "kStep", - threshold: "threshold", + useEnhancer: "useEnhancer", + useDiff: "useDiff", + // "useDiffDpm": "useDiffDpm", + diffMethod: "diffMethod", + useDiffSilence: "useDiffSilence", + diffAcc: "diffAcc", + diffSpkId: "diffSpkId", + kStep: "kStep", + threshold: "threshold", - speedUp: "speedUp", - skipDiffusion: "skipDiffusion", + speedUp: "speedUp", + skipDiffusion: "skipDiffusion", - inputSampleRate: "inputSampleRate", - enableDirectML: "enableDirectML", + inputSampleRate: "inputSampleRate", + enableDirectML: "enableDirectML", } as const; -export type ServerSettingKey = - (typeof ServerSettingKey)[keyof typeof ServerSettingKey]; +export type ServerSettingKey = (typeof ServerSettingKey)[keyof typeof ServerSettingKey]; export type VoiceChangerServerSetting = { - passThrough: boolean; - srcId: number; - dstId: number; - gpu: number; + passThrough: boolean; + srcId: number; + dstId: number; + gpu: number; - crossFadeOffsetRate: number; - crossFadeEndRate: number; - crossFadeOverlapSize: CrossFadeOverlapSize; + crossFadeOffsetRate: number; + crossFadeEndRate: number; + crossFadeOverlapSize: CrossFadeOverlapSize; - f0Factor: number; - f0Detector: F0Detector; // dio or harvest - recordIO: number; // 0:off, 1:on + f0Factor: number; + f0Detector: F0Detector; // dio or harvest + recordIO: number; // 0:off, 1:on - enableServerAudio: number; // 0:off, 1:on - serverAudioStated: number; // 0:off, 1:on - serverAudioSampleRate: number; - serverInputAudioSampleRate: number; - serverOutputAudioSampleRate: number; - serverMonitorAudioSampleRate: number; - serverInputAudioBufferSize: number; - serverOutputAudioBufferSize: number; - serverInputDeviceId: number; - serverOutputDeviceId: number; - serverMonitorDeviceId: number; - serverReadChunkSize: number; - serverInputAudioGain: number; - serverOutputAudioGain: number; - serverMonitorAudioGain: number; + enableServerAudio: number; // 0:off, 1:on + serverAudioStated: number; // 0:off, 1:on + serverAudioSampleRate: number; + serverInputAudioSampleRate: number; + serverOutputAudioSampleRate: number; + serverMonitorAudioSampleRate: number; + serverInputAudioBufferSize: number; + serverOutputAudioBufferSize: number; + serverInputDeviceId: number; + serverOutputDeviceId: number; + serverMonitorDeviceId: number; + serverReadChunkSize: number; + serverInputAudioGain: number; + serverOutputAudioGain: number; + serverMonitorAudioGain: number; - tran: number; // so-vits-svc - noiseScale: number; // so-vits-svc - predictF0: number; // so-vits-svc - silentThreshold: number; // so-vits-svc - extraConvertSize: number; // so-vits-svc - clusterInferRatio: number; // so-vits-svc + tran: number; // so-vits-svc + noiseScale: number; // so-vits-svc + predictF0: number; // so-vits-svc + silentThreshold: number; // so-vits-svc + extraConvertSize: number; // so-vits-svc + clusterInferRatio: number; // so-vits-svc - indexRatio: number; // RVC - protect: number; // RVC - rvcQuality: number; // 0:low, 1:high - silenceFront: number; // 0:off, 1:on - modelSamplingRate: ModelSamplingRate; // 32000,40000,48000 - modelSlotIndex: number | StaticModel; + indexRatio: number; // RVC + protect: number; // RVC + rvcQuality: number; // 0:low, 1:high + silenceFront: number; // 0:off, 1:on + modelSamplingRate: ModelSamplingRate; // 32000,40000,48000 + modelSlotIndex: number | StaticModel; - useEnhancer: number; // DDSP-SVC - useDiff: number; // DDSP-SVC - // useDiffDpm: number// DDSP-SVC - diffMethod: DiffMethod; // DDSP-SVC - useDiffSilence: number; // DDSP-SVC - diffAcc: number; // DDSP-SVC - diffSpkId: number; // DDSP-SVC - kStep: number; // DDSP-SVC - threshold: number; // DDSP-SVC + useEnhancer: number; // DDSP-SVC + useDiff: number; // DDSP-SVC + // useDiffDpm: number// DDSP-SVC + diffMethod: DiffMethod; // DDSP-SVC + useDiffSilence: number; // DDSP-SVC + diffAcc: number; // DDSP-SVC + diffSpkId: number; // DDSP-SVC + kStep: number; // DDSP-SVC + threshold: number; // DDSP-SVC - speedUp: number; // Diffusion-SVC - skipDiffusion: number; // Diffusion-SVC 0:off, 1:on + speedUp: number; // Diffusion-SVC + skipDiffusion: number; // Diffusion-SVC 0:off, 1:on - inputSampleRate: InputSampleRate; - enableDirectML: number; + inputSampleRate: InputSampleRate; + enableDirectML: number; }; type ModelSlot = { - slotIndex: number | StaticModel; - voiceChangerType: VoiceChangerType; - name: string; - description: string; - credit: string; - termsOfUseUrl: string; - iconFile: string; - speakers: { [key: number]: string }; + slotIndex: number | StaticModel; + voiceChangerType: VoiceChangerType; + name: string; + description: string; + credit: string; + termsOfUseUrl: string; + iconFile: string; + speakers: { [key: number]: string }; }; export type RVCModelSlot = ModelSlot & { - modelFile: string; - indexFile: string; - defaultIndexRatio: number; - defaultProtect: number; - defaultTune: number; - modelType: RVCModelType; + modelFile: string; + indexFile: string; + defaultIndexRatio: number; + defaultProtect: number; + defaultTune: number; + modelType: RVCModelType; - embChannels: number; - f0: boolean; - samplingRate: number; - deprecated: boolean; + embChannels: number; + f0: boolean; + samplingRate: number; + deprecated: boolean; }; export type MMVCv13ModelSlot = ModelSlot & { - modelFile: string; - configFile: string; - srcId: number; - dstId: number; + modelFile: string; + configFile: string; + srcId: number; + dstId: number; - samplingRate: number; - speakers: { [key: number]: string }; + samplingRate: number; + speakers: { [key: number]: string }; }; export type MMVCv15ModelSlot = ModelSlot & { - modelFile: string; - configFile: string; - srcId: number; - dstId: number; - f0Factor: number; - samplingRate: number; - f0: { [key: number]: number }; + modelFile: string; + configFile: string; + srcId: number; + dstId: number; + f0Factor: number; + samplingRate: number; + f0: { [key: number]: number }; }; export type SoVitsSvc40ModelSlot = ModelSlot & { - modelFile: string; - configFile: string; - clusterFile: string; - dstId: number; + modelFile: string; + configFile: string; + clusterFile: string; + dstId: number; - samplingRate: number; + samplingRate: number; - defaultTune: number; - defaultClusterInferRatio: number; - noiseScale: number; - speakers: { [key: number]: string }; + defaultTune: number; + defaultClusterInferRatio: number; + noiseScale: number; + speakers: { [key: number]: string }; }; export type DDSPSVCModelSlot = ModelSlot & { - modelFile: string; - configFile: string; - diffModelFile: string; - diffConfigFile: string; - dstId: number; + modelFile: string; + configFile: string; + diffModelFile: string; + diffConfigFile: string; + dstId: number; - samplingRate: number; + samplingRate: number; - defaultTune: number; - enhancer: boolean; - diffusion: boolean; - acc: number; - kstep: number; - speakers: { [key: number]: string }; + defaultTune: number; + enhancer: boolean; + diffusion: boolean; + acc: number; + kstep: number; + speakers: { [key: number]: string }; }; export type DiffusionSVCModelSlot = ModelSlot & { - modelFile: string; - dstId: number; + modelFile: string; + dstId: number; - samplingRate: number; + samplingRate: number; - defaultTune: number; - defaultKstep: number; - defaultSpeedup: number; - kStepMax: number; - nLayers: number; - nnLayers: number; - speakers: { [key: number]: string }; + defaultTune: number; + defaultKstep: number; + defaultSpeedup: number; + kStepMax: number; + nLayers: number; + nnLayers: number; + speakers: { [key: number]: string }; }; export type BeatriceModelSlot = ModelSlot & { - modelFile: string; - dstId: number; + modelFile: string; + dstId: number; - speakers: { [key: number]: string }; + speakers: { [key: number]: string }; }; export type LLVCModelSlot = ModelSlot & { - modelFile: string; - configFile: string; + modelFile: string; + configFile: string; - speakers: { [key: number]: string }; + speakers: { [key: number]: string }; }; export type WebModelSlot = ModelSlot & { - modelFile: string; - defaultTune: number; - modelType: RVCModelType; - f0: boolean; - samplingRate: number; + modelFile: string; + defaultTune: number; + modelType: RVCModelType; + f0: boolean; + samplingRate: number; }; -export type ModelSlotUnion = - | RVCModelSlot - | MMVCv13ModelSlot - | MMVCv15ModelSlot - | SoVitsSvc40ModelSlot - | DDSPSVCModelSlot - | DiffusionSVCModelSlot - | BeatriceModelSlot - | LLVCModelSlot - | WebModelSlot; +export type ModelSlotUnion = RVCModelSlot | MMVCv13ModelSlot | MMVCv15ModelSlot | SoVitsSvc40ModelSlot | DDSPSVCModelSlot | DiffusionSVCModelSlot | BeatriceModelSlot | LLVCModelSlot | WebModelSlot; type ServerAudioDevice = { - kind: "audioinput" | "audiooutput"; - index: number; - name: string; - hostAPI: string; + kind: "audioinput" | "audiooutput"; + index: number; + name: string; + hostAPI: string; }; export type ServerInfo = VoiceChangerServerSetting & { - // コンフィグ対象外 (getInfoで取得のみ可能な情報) - status: string; - modelSlots: ModelSlotUnion[]; - serverAudioInputDevices: ServerAudioDevice[]; - serverAudioOutputDevices: ServerAudioDevice[]; - sampleModels: (RVCSampleModel | DiffusionSVCSampleModel)[]; - gpus: { - id: number; - name: string; - memory: number; - }[]; - maxInputLength: number; // MMVCv15 - voiceChangerParams: { - model_dir: string; - }; + // コンフィグ対象外 (getInfoで取得のみ可能な情報) + status: string; + modelSlots: ModelSlotUnion[]; + serverAudioInputDevices: ServerAudioDevice[]; + serverAudioOutputDevices: ServerAudioDevice[]; + sampleModels: (RVCSampleModel | DiffusionSVCSampleModel)[]; + gpus: { + id: number; + name: string; + memory: number; + }[]; + maxInputLength: number; // MMVCv15 + voiceChangerParams: { + model_dir: string; + }; }; export type SampleModel = { - id: string; - voiceChangerType: VoiceChangerType; - lang: string; - tag: string[]; - name: string; - modelUrl: string; - termsOfUseUrl: string; - icon: string; - credit: string; - description: string; - sampleRate: number; - modelType: string; - f0: boolean; + id: string; + voiceChangerType: VoiceChangerType; + lang: string; + tag: string[]; + name: string; + modelUrl: string; + termsOfUseUrl: string; + icon: string; + credit: string; + description: string; + sampleRate: number; + modelType: string; + f0: boolean; }; export type RVCSampleModel = SampleModel & { - indexUrl: string; - featureUrl: string; + indexUrl: string; + featureUrl: string; }; export type DiffusionSVCSampleModel = SampleModel & { - numOfDiffLayers: number; - numOfNativeLayers: number; - maxKStep: number; + numOfDiffLayers: number; + numOfNativeLayers: number; + maxKStep: number; }; export const DefaultServerSetting: ServerInfo = { - // VC Common - passThrough: false, - inputSampleRate: 48000, + // VC Common + passThrough: false, + inputSampleRate: 48000, - crossFadeOffsetRate: 0.0, - crossFadeEndRate: 1.0, - crossFadeOverlapSize: CrossFadeOverlapSize[1024], + crossFadeOffsetRate: 0.0, + crossFadeEndRate: 1.0, + crossFadeOverlapSize: CrossFadeOverlapSize[1024], - recordIO: 0, + recordIO: 0, - enableServerAudio: 0, - serverAudioStated: 0, - serverAudioSampleRate: 48000, - serverInputAudioSampleRate: 48000, - serverOutputAudioSampleRate: 48000, - serverMonitorAudioSampleRate: 48000, - serverInputAudioBufferSize: 1024 * 24, - serverOutputAudioBufferSize: 1024 * 24, - serverInputDeviceId: -1, - serverOutputDeviceId: -1, - serverMonitorDeviceId: -1, - serverReadChunkSize: 256, - serverInputAudioGain: 1.0, - serverOutputAudioGain: 1.0, - serverMonitorAudioGain: 1.0, + enableServerAudio: 0, + serverAudioStated: 0, + serverAudioSampleRate: 48000, + serverInputAudioSampleRate: 48000, + serverOutputAudioSampleRate: 48000, + serverMonitorAudioSampleRate: 48000, + serverInputAudioBufferSize: 1024 * 24, + serverOutputAudioBufferSize: 1024 * 24, + serverInputDeviceId: -1, + serverOutputDeviceId: -1, + serverMonitorDeviceId: -1, + serverReadChunkSize: 256, + serverInputAudioGain: 1.0, + serverOutputAudioGain: 1.0, + serverMonitorAudioGain: 1.0, - // VC Specific - srcId: 0, - dstId: 1, - gpu: 0, + // VC Specific + srcId: 0, + dstId: 1, + gpu: 0, - f0Factor: 1.0, - f0Detector: F0Detector.rmvpe_onnx, + f0Factor: 1.0, + f0Detector: F0Detector.rmvpe_onnx, - tran: 0, - noiseScale: 0, - predictF0: 0, - silentThreshold: 0, - extraConvertSize: 0, - clusterInferRatio: 0, + tran: 0, + noiseScale: 0, + predictF0: 0, + silentThreshold: 0, + extraConvertSize: 0, + clusterInferRatio: 0, - indexRatio: 0, - protect: 0.5, - rvcQuality: 0, - modelSamplingRate: 48000, - silenceFront: 1, - modelSlotIndex: 0, - sampleModels: [], - gpus: [], + indexRatio: 0, + protect: 0.5, + rvcQuality: 0, + modelSamplingRate: 48000, + silenceFront: 1, + modelSlotIndex: 0, + sampleModels: [], + gpus: [], - useEnhancer: 0, - useDiff: 1, - diffMethod: "dpm-solver", - useDiffSilence: 0, - diffAcc: 20, - diffSpkId: 1, - kStep: 120, - threshold: -45, + useEnhancer: 0, + useDiff: 1, + diffMethod: "dpm-solver", + useDiffSilence: 0, + diffAcc: 20, + diffSpkId: 1, + kStep: 120, + threshold: -45, - speedUp: 10, - skipDiffusion: 1, + speedUp: 10, + skipDiffusion: 1, - enableDirectML: 0, - // - status: "ok", - modelSlots: [], - serverAudioInputDevices: [], - serverAudioOutputDevices: [], + enableDirectML: 0, + // + status: "ok", + modelSlots: [], + serverAudioInputDevices: [], + serverAudioOutputDevices: [], - maxInputLength: 128 * 2048, - voiceChangerParams: { - model_dir: "", - }, + maxInputLength: 128 * 2048, + voiceChangerParams: { + model_dir: "", + }, }; /////////////////////// @@ -467,113 +453,109 @@ export const DefaultServerSetting: ServerInfo = { /////////////////////// export type WorkletSetting = { - numTrancateTreshold: number; - volTrancateThreshold: number; - volTrancateLength: number; + numTrancateTreshold: number; + volTrancateThreshold: number; + volTrancateLength: number; }; /////////////////////// // Worklet Nodeセッティング /////////////////////// export const Protocol = { - sio: "sio", - rest: "rest", - internal: "internal", + sio: "sio", + rest: "rest", + internal: "internal", } as const; export type Protocol = (typeof Protocol)[keyof typeof Protocol]; export const SendingSampleRate = { - "48000": 48000, - "44100": 44100, - "24000": 24000, + "48000": 48000, + "44100": 44100, + "24000": 24000, } as const; -export type SendingSampleRate = - (typeof SendingSampleRate)[keyof typeof SendingSampleRate]; +export type SendingSampleRate = (typeof SendingSampleRate)[keyof typeof SendingSampleRate]; export const DownSamplingMode = { - decimate: "decimate", - average: "average", + decimate: "decimate", + average: "average", } as const; -export type DownSamplingMode = - (typeof DownSamplingMode)[keyof typeof DownSamplingMode]; +export type DownSamplingMode = (typeof DownSamplingMode)[keyof typeof DownSamplingMode]; export type WorkletNodeSetting = { - serverUrl: string; - protocol: Protocol; - sendingSampleRate: SendingSampleRate; - inputChunkNum: number; - downSamplingMode: DownSamplingMode; + serverUrl: string; + protocol: Protocol; + sendingSampleRate: SendingSampleRate; + inputChunkNum: number; + downSamplingMode: DownSamplingMode; }; /////////////////////// // クライアントセッティング /////////////////////// export const SampleRate = { - "48000": 48000, + "48000": 48000, } as const; export type SampleRate = (typeof SampleRate)[keyof typeof SampleRate]; export type VoiceChangerClientSetting = { - audioInput: string | MediaStream | null; - sampleRate: SampleRate; // 48000Hz - echoCancel: boolean; - noiseSuppression: boolean; - noiseSuppression2: boolean; + audioInput: string | MediaStream | null; + sampleRate: SampleRate; // 48000Hz + echoCancel: boolean; + noiseSuppression: boolean; + noiseSuppression2: boolean; - inputGain: number; - outputGain: number; - monitorGain: number; + inputGain: number; + outputGain: number; + monitorGain: number; - passThroughConfirmationSkip: boolean; + passThroughConfirmationSkip: boolean; }; /////////////////////// // Client セッティング /////////////////////// export type ClientSetting = { - workletSetting: WorkletSetting; - workletNodeSetting: WorkletNodeSetting; - voiceChangerClientSetting: VoiceChangerClientSetting; + workletSetting: WorkletSetting; + workletNodeSetting: WorkletNodeSetting; + voiceChangerClientSetting: VoiceChangerClientSetting; }; export const DefaultClientSettng: ClientSetting = { - workletSetting: { - // numTrancateTreshold: 512 * 2, - numTrancateTreshold: 100, - volTrancateThreshold: 0.0005, - volTrancateLength: 32, - }, - workletNodeSetting: { - serverUrl: "", - protocol: "sio", - sendingSampleRate: 48000, - inputChunkNum: 192, - downSamplingMode: "average", - }, - voiceChangerClientSetting: { - audioInput: null, - sampleRate: 48000, - echoCancel: false, - noiseSuppression: false, - noiseSuppression2: false, - inputGain: 1.0, - outputGain: 1.0, - monitorGain: 1.0, - passThroughConfirmationSkip: false, - }, + workletSetting: { + // numTrancateTreshold: 512 * 2, + numTrancateTreshold: 100, + volTrancateThreshold: 0.0005, + volTrancateLength: 32, + }, + workletNodeSetting: { + serverUrl: "", + protocol: "sio", + sendingSampleRate: 48000, + inputChunkNum: 192, + downSamplingMode: "average", + }, + voiceChangerClientSetting: { + audioInput: null, + sampleRate: 48000, + echoCancel: false, + noiseSuppression: false, + noiseSuppression2: false, + inputGain: 1.0, + outputGain: 1.0, + monitorGain: 1.0, + passThroughConfirmationSkip: false, + }, }; //////////////////////////////////// // Exceptions //////////////////////////////////// export const VOICE_CHANGER_CLIENT_EXCEPTION = { - ERR_SIO_CONNECT_FAILED: "ERR_SIO_CONNECT_FAILED", - ERR_SIO_INVALID_RESPONSE: "ERR_SIO_INVALID_RESPONSE", - ERR_REST_INVALID_RESPONSE: "ERR_REST_INVALID_RESPONSE", - ERR_MIC_STREAM_NOT_INITIALIZED: "ERR_MIC_STREAM_NOT_INITIALIZED", - ERR_INTERNAL_AUDIO_PROCESS_CALLBACK_IS_NOT_INITIALIZED: - "ERR_INTERNAL_AUDIO_PROCESS_CALLBACK_IS_NOT_INITIALIZED", + ERR_SIO_CONNECT_FAILED: "ERR_SIO_CONNECT_FAILED", + ERR_SIO_INVALID_RESPONSE: "ERR_SIO_INVALID_RESPONSE", + ERR_REST_INVALID_RESPONSE: "ERR_REST_INVALID_RESPONSE", + ERR_MIC_STREAM_NOT_INITIALIZED: "ERR_MIC_STREAM_NOT_INITIALIZED", + ERR_INTERNAL_AUDIO_PROCESS_CALLBACK_IS_NOT_INITIALIZED: "ERR_INTERNAL_AUDIO_PROCESS_CALLBACK_IS_NOT_INITIALIZED", } as const; -export type VOICE_CHANGER_CLIENT_EXCEPTION = - (typeof VOICE_CHANGER_CLIENT_EXCEPTION)[keyof typeof VOICE_CHANGER_CLIENT_EXCEPTION]; +export type VOICE_CHANGER_CLIENT_EXCEPTION = (typeof VOICE_CHANGER_CLIENT_EXCEPTION)[keyof typeof VOICE_CHANGER_CLIENT_EXCEPTION]; //////////////////////////////////// // indexedDB @@ -582,23 +564,22 @@ export const INDEXEDDB_DB_APP_NAME = "INDEXEDDB_KEY_VOICE_CHANGER"; export const INDEXEDDB_DB_NAME = "INDEXEDDB_KEY_VOICE_CHANGER_DB"; export const INDEXEDDB_KEY_CLIENT = "INDEXEDDB_KEY_VOICE_CHANGER_LIB_CLIENT"; export const INDEXEDDB_KEY_SERVER = "INDEXEDDB_KEY_VOICE_CHANGER_LIB_SERVER"; -export const INDEXEDDB_KEY_MODEL_DATA = - "INDEXEDDB_KEY_VOICE_CHANGER_LIB_MODEL_DATA"; +export const INDEXEDDB_KEY_MODEL_DATA = "INDEXEDDB_KEY_VOICE_CHANGER_LIB_MODEL_DATA"; // ONNX export type OnnxExporterInfo = { - status: string; - path: string; - filename: string; + status: string; + path: string; + filename: string; }; // Merge export type MergeElement = { - slotIndex: number; - strength: number; + slotIndex: number; + strength: number; }; export type MergeModelRequest = { - voiceChangerType: VoiceChangerType; - command: "mix"; - files: MergeElement[]; + voiceChangerType: VoiceChangerType; + command: "mix"; + files: MergeElement[]; }; diff --git a/server/voice_changer/EasyVC/EasyVC.py b/server/voice_changer/EasyVC/EasyVC.py index a5de4fb1..b1db617e 100644 --- a/server/voice_changer/EasyVC/EasyVC.py +++ b/server/voice_changer/EasyVC/EasyVC.py @@ -184,8 +184,8 @@ class EasyVC(VoiceChangerModel): logger.info("[Voice Changer] Pipeline is not initialized.") raise PipelineNotInitializedException() - enableTimer = True - with Timer2("infer_easyvc", enableTimer) as t: + enableTimer = False + with Timer2("infer-easyvc", enableTimer) as t: # 処理は16Kで実施(Pitch, embed, (infer)) receivedData = cast( @@ -203,7 +203,7 @@ class EasyVC(VoiceChangerModel): # 入力データ生成 data = self.generate_input(receivedData, crossfade_frame, sola_search_frame, extra_frame) - t.record("generate_input") + t.record("generate-input") audio = data[0] pitchf = data[1] diff --git a/server/voice_changer/EasyVC/pipeline/Pipeline.py b/server/voice_changer/EasyVC/pipeline/Pipeline.py index 6294cc02..98c5336b 100644 --- a/server/voice_changer/EasyVC/pipeline/Pipeline.py +++ b/server/voice_changer/EasyVC/pipeline/Pipeline.py @@ -138,7 +138,7 @@ class Pipeline(object): # print(f"pipeline exec input, audio:{audio.shape}, pitchf:{pitchf.shape}, feature:{feature.shape}") # print(f"pipeline exec input, silence_front:{silence_front}, out_size:{out_size}") - enablePipelineTimer = True + enablePipelineTimer = False with Timer2("Pipeline-Exec", enablePipelineTimer) as t: # NOQA # 16000のサンプリングレートで入ってきている。以降この世界は16000で処理。 # self.t_pad = self.sr * repeat # 1秒