voice-changer/client/lib/src/const.ts

507 lines
13 KiB
TypeScript
Raw Normal View History

2023-01-04 20:28:36 +03:00
// (★1) chunk sizeは 128サンプル, 256byte(int16)と定義。
// (★2) 256byte(最低バッファサイズ256から間引いた個数x2byte)をchunkとして管理。
2023-01-11 21:49:22 +03:00
// 24000sample -> 1sec, 128sample(1chunk) -> 5.333msec
2023-01-11 22:52:01 +03:00
// 187.5chunk -> 1sec
2023-01-04 20:28:36 +03:00
2023-06-19 05:40:16 +03:00
export const VoiceChangerType = {
"MMVCv15": "MMVCv15",
"MMVCv13": "MMVCv13",
"so-vits-svc-40": "so-vits-svc-40",
"DDSP-SVC": "DDSP-SVC",
"RVC": "RVC"
} as const
export type VoiceChangerType = typeof VoiceChangerType[keyof typeof VoiceChangerType]
///////////////////////
// サーバセッティング
///////////////////////
export const InputSampleRate = {
"48000": 48000,
2023-03-14 20:39:34 +03:00
"44100": 44100,
"24000": 24000
} as const
export type InputSampleRate = typeof InputSampleRate[keyof typeof InputSampleRate]
2023-04-07 22:39:04 +03:00
export const ModelSamplingRate = {
"48000": 48000,
"40000": 40000,
"32000": 32000
} as const
export type ModelSamplingRate = typeof InputSampleRate[keyof typeof InputSampleRate]
export const CrossFadeOverlapSize = {
"1024": 1024,
"2048": 2048,
"4096": 4096,
} as const
export type CrossFadeOverlapSize = typeof CrossFadeOverlapSize[keyof typeof CrossFadeOverlapSize]
2023-02-14 16:32:25 +03:00
export const F0Detector = {
"dio": "dio",
"harvest": "harvest",
2023-04-16 15:34:00 +03:00
"crepe": "crepe",
"crepe_full": "crepe_full",
"crepe_tiny": "crepe_tiny",
} as const
export type F0Detector = typeof F0Detector[keyof typeof F0Detector]
2023-05-13 19:04:29 +03:00
export const DiffMethod = {
"pndm": "pndm",
"dpm-solver": "dpm-solver",
} as const
export type DiffMethod = typeof DiffMethod[keyof typeof DiffMethod]
2023-02-19 13:22:00 +03:00
2023-06-19 05:40:16 +03:00
export const RVCModelType = {
2023-05-21 05:50:28 +03:00
"pyTorchRVC": "pyTorchRVC",
"pyTorchRVCNono": "pyTorchRVCNono",
"pyTorchRVCv2": "pyTorchRVCv2",
"pyTorchRVCv2Nono": "pyTorchRVCv2Nono",
"pyTorchWebUI": "pyTorchWebUI",
"pyTorchWebUINono": "pyTorchWebUINono",
"onnxRVC": "onnxRVC",
"onnxRVCNono": "onnxRVCNono",
} as const
2023-06-19 05:40:16 +03:00
export type RVCModelType = typeof RVCModelType[keyof typeof RVCModelType]
2023-02-19 13:22:00 +03:00
export const ServerSettingKey = {
"srcId": "srcId",
"dstId": "dstId",
"gpu": "gpu",
2023-01-12 15:42:02 +03:00
"crossFadeOffsetRate": "crossFadeOffsetRate",
"crossFadeEndRate": "crossFadeEndRate",
"crossFadeOverlapSize": "crossFadeOverlapSize",
"framework": "framework",
"onnxExecutionProvider": "onnxExecutionProvider",
"f0Factor": "f0Factor",
"f0Detector": "f0Detector",
"recordIO": "recordIO",
2023-05-06 22:18:18 +03:00
"enableServerAudio": "enableServerAudio",
"serverAudioStated": "serverAudioStated",
"serverAudioSampleRate": "serverAudioSampleRate",
2023-05-06 22:18:18 +03:00
"serverInputAudioSampleRate": "serverInputAudioSampleRate",
"serverOutputAudioSampleRate": "serverOutputAudioSampleRate",
"serverMonitorAudioSampleRate": "serverMonitorAudioSampleRate",
2023-05-06 22:18:18 +03:00
"serverInputAudioBufferSize": "serverInputAudioBufferSize",
"serverOutputAudioBufferSize": "serverOutputAudioBufferSize",
"serverInputDeviceId": "serverInputDeviceId",
"serverOutputDeviceId": "serverOutputDeviceId",
"serverMonitorDeviceId": "serverMonitorDeviceId",
2023-05-06 22:18:18 +03:00
"serverReadChunkSize": "serverReadChunkSize",
2023-05-26 17:53:27 +03:00
"serverInputAudioGain": "serverInputAudioGain",
"serverOutputAudioGain": "serverOutputAudioGain",
2023-05-06 22:18:18 +03:00
2023-03-11 02:21:16 +03:00
"tran": "tran",
2023-04-20 11:17:43 +03:00
"noiseScale": "noiseScale",
2023-03-11 02:21:16 +03:00
"predictF0": "predictF0",
"silentThreshold": "silentThreshold",
"extraConvertSize": "extraConvertSize",
"clusterInferRatio": "clusterInferRatio",
2023-03-11 02:21:16 +03:00
2023-04-07 21:11:37 +03:00
"indexRatio": "indexRatio",
2023-06-01 07:28:45 +03:00
"protect": "protect",
2023-04-07 22:39:04 +03:00
"rvcQuality": "rvcQuality",
"modelSamplingRate": "modelSamplingRate",
2023-04-19 01:57:19 +03:00
"silenceFront": "silenceFront",
2023-04-21 09:48:12 +03:00
"modelSlotIndex": "modelSlotIndex",
2023-04-07 22:39:04 +03:00
2023-05-08 23:04:34 +03:00
"useEnhancer": "useEnhancer",
"useDiff": "useDiff",
2023-05-13 19:04:29 +03:00
// "useDiffDpm": "useDiffDpm",
"diffMethod": "diffMethod",
2023-05-08 23:04:34 +03:00
"useDiffSilence": "useDiffSilence",
"diffAcc": "diffAcc",
"diffSpkId": "diffSpkId",
"kStep": "kStep",
"threshold": "threshold",
2023-04-07 21:11:37 +03:00
"inputSampleRate": "inputSampleRate",
2023-05-04 17:50:52 +03:00
"enableDirectML": "enableDirectML",
} as const
export type ServerSettingKey = typeof ServerSettingKey[keyof typeof ServerSettingKey]
export type VoiceChangerServerSetting = {
2023-01-04 20:28:36 +03:00
srcId: number,
dstId: number,
gpu: number,
crossFadeOffsetRate: number,
crossFadeEndRate: number,
crossFadeOverlapSize: CrossFadeOverlapSize,
2023-01-07 14:07:39 +03:00
2023-02-10 18:59:44 +03:00
f0Factor: number
f0Detector: F0Detector // dio or harvest
2023-02-14 23:02:51 +03:00
recordIO: number // 0:off, 1:on
2023-05-06 22:18:18 +03:00
enableServerAudio: number // 0:off, 1:on
serverAudioStated: number // 0:off, 1:on
serverAudioSampleRate: number
2023-05-06 22:18:18 +03:00
serverInputAudioSampleRate: number
serverOutputAudioSampleRate: number
serverMonitorAudioSampleRate: number
2023-05-06 22:18:18 +03:00
serverInputAudioBufferSize: number
serverOutputAudioBufferSize: number
serverInputDeviceId: number
serverOutputDeviceId: number
serverMonitorDeviceId: number
2023-05-06 22:18:18 +03:00
serverReadChunkSize: number
2023-05-26 17:53:27 +03:00
serverInputAudioGain: number
serverOutputAudioGain: number
2023-05-06 22:18:18 +03:00
2023-03-11 02:21:16 +03:00
tran: number // so-vits-svc
2023-04-20 11:17:43 +03:00
noiseScale: number // so-vits-svc
2023-03-11 02:21:16 +03:00
predictF0: number // so-vits-svc
silentThreshold: number // so-vits-svc
extraConvertSize: number// so-vits-svc
clusterInferRatio: number // so-vits-svc
2023-03-11 02:21:16 +03:00
2023-04-07 21:11:37 +03:00
indexRatio: number // RVC
2023-06-01 07:28:45 +03:00
protect: number // RVC
2023-04-07 22:39:04 +03:00
rvcQuality: number // 0:low, 1:high
2023-04-19 01:57:19 +03:00
silenceFront: number // 0:off, 1:on
2023-04-07 22:39:04 +03:00
modelSamplingRate: ModelSamplingRate // 32000,40000,48000
2023-04-21 09:48:12 +03:00
modelSlotIndex: number,
2023-04-07 21:11:37 +03:00
2023-05-08 23:04:34 +03:00
useEnhancer: number// DDSP-SVC
useDiff: number// DDSP-SVC
2023-05-13 19:04:29 +03:00
// useDiffDpm: number// DDSP-SVC
diffMethod: DiffMethod, // DDSP-SVC
2023-05-08 23:04:34 +03:00
useDiffSilence: number// DDSP-SVC
diffAcc: number// DDSP-SVC
diffSpkId: number// DDSP-SVC
kStep: number// DDSP-SVC
threshold: number// DDSP-SVC
2023-04-16 15:34:00 +03:00
2023-02-18 14:53:15 +03:00
inputSampleRate: InputSampleRate
2023-05-04 17:50:52 +03:00
enableDirectML: number
2023-01-04 20:28:36 +03:00
}
type ModelSlot = {
2023-06-19 05:40:16 +03:00
voiceChangerType: VoiceChangerType
name: string,
description: string,
credit: string,
termsOfUseUrl: string,
iconFile: string
2023-06-23 08:00:40 +03:00
speakers: { [key: number]: string }
2023-06-19 05:40:16 +03:00
}
export type RVCModelSlot = ModelSlot & {
2023-05-08 19:01:20 +03:00
modelFile: string
indexFile: string,
2023-05-17 06:47:14 +03:00
defaultIndexRatio: number,
2023-06-01 07:28:45 +03:00
defaultProtect: number,
2023-06-19 05:40:16 +03:00
defaultTune: number,
modelType: RVCModelType,
embChannels: number,
f0: boolean,
samplingRate: number
deprecated: boolean
}
2023-06-20 21:32:18 +03:00
export type MMVCv13ModelSlot = ModelSlot & {
modelFile: string
configFile: string,
srcId: number
dstId: number
samplingRate: number
2023-06-23 08:00:40 +03:00
speakers: { [key: number]: string }
2023-06-20 21:32:18 +03:00
}
export type MMVCv15ModelSlot = ModelSlot & {
modelFile: string
configFile: string,
srcId: number
dstId: number
f0Factor: number
samplingRate: number
2023-06-25 12:02:43 +03:00
f0: { [key: number]: number }
2023-06-20 21:32:18 +03:00
}
export type SoVitsSvc40ModelSlot = ModelSlot & {
modelFile: string
configFile: string,
clusterFile: string,
dstId: number
samplingRate: number
defaultTune: number
defaultClusterInferRatio: number
noiseScale: number
2023-06-23 08:00:40 +03:00
speakers: { [key: number]: string }
2023-06-20 21:32:18 +03:00
}
export type DDSPSVCModelSlot = ModelSlot & {
modelFile: string
configFile: string,
diffModelFile: string
diffConfigFile: string
dstId: number
samplingRate: number
defaultTune: number
enhancer: boolean
diffusion: boolean
acc: number
kstep: number
2023-06-23 08:00:40 +03:00
speakers: { [key: number]: string }
2023-06-20 21:32:18 +03:00
}
export type ModelSlotUnion = RVCModelSlot | MMVCv13ModelSlot | MMVCv15ModelSlot | SoVitsSvc40ModelSlot | DDSPSVCModelSlot
2023-06-19 05:40:16 +03:00
2023-05-06 22:18:18 +03:00
type ServerAudioDevice = {
kind: "audioinput" | "audiooutput",
index: number,
name: string
hostAPI: string
}
export type ServerInfo = VoiceChangerServerSetting & {
2023-06-25 10:39:18 +03:00
// コンフィグ対象外 (getInfoで取得のみ可能な情報)
status: string
2023-06-19 05:40:16 +03:00
modelSlots: ModelSlotUnion[]
2023-05-06 22:18:18 +03:00
serverAudioInputDevices: ServerAudioDevice[]
serverAudioOutputDevices: ServerAudioDevice[]
2023-05-16 04:38:23 +03:00
sampleModels: RVCSampleModel[]
2023-06-09 13:57:08 +03:00
gpus: {
id: number,
name: string,
memory: number,
}[]
maxInputLength: number // MMVCv15
2023-05-06 22:18:18 +03:00
}
2023-04-20 11:17:43 +03:00
2023-05-16 04:38:23 +03:00
export type RVCSampleModel = {
id: string
name: string
modelUrl: string
indexUrl: string
featureUrl: string
2023-05-17 16:18:28 +03:00
termsOfUseUrl: string
2023-05-16 04:38:23 +03:00
credit: string
description: string
2023-05-17 22:08:13 +03:00
lang: string
2023-06-07 21:08:59 +03:00
tag: string[]
2023-06-07 01:30:09 +03:00
icon: string
2023-06-07 21:08:59 +03:00
f0: boolean
sampleRate: number
modelType: string
2023-05-16 04:38:23 +03:00
}
2023-04-20 11:17:43 +03:00
export const DefaultServerSetting: ServerInfo = {
// VC Common
inputSampleRate: 48000,
2023-02-20 22:07:43 +03:00
crossFadeOffsetRate: 0.0,
crossFadeEndRate: 1.0,
crossFadeOverlapSize: CrossFadeOverlapSize[1024],
2023-03-09 00:55:57 +03:00
recordIO: 0,
2023-05-06 22:18:18 +03:00
enableServerAudio: 0,
serverAudioStated: 0,
serverAudioSampleRate: 48000,
2023-05-06 22:18:18 +03:00
serverInputAudioSampleRate: 48000,
serverOutputAudioSampleRate: 48000,
serverMonitorAudioSampleRate: 48000,
2023-05-06 22:18:18 +03:00
serverInputAudioBufferSize: 1024 * 24,
serverOutputAudioBufferSize: 1024 * 24,
serverInputDeviceId: -1,
serverOutputDeviceId: -1,
serverMonitorDeviceId: -1,
2023-05-06 22:18:18 +03:00
serverReadChunkSize: 256,
2023-05-26 17:53:27 +03:00
serverInputAudioGain: 1.0,
serverOutputAudioGain: 1.0,
2023-03-11 02:21:16 +03:00
2023-04-20 11:17:43 +03:00
// VC Specific
srcId: 0,
dstId: 1,
2023-03-09 00:55:57 +03:00
gpu: 0,
f0Factor: 1.0,
f0Detector: F0Detector.dio,
2023-03-11 02:21:16 +03:00
tran: 0,
2023-04-20 11:17:43 +03:00
noiseScale: 0,
2023-03-11 02:21:16 +03:00
predictF0: 0,
silentThreshold: 0,
extraConvertSize: 0,
clusterInferRatio: 0,
2023-03-11 02:21:16 +03:00
2023-04-07 21:11:37 +03:00
indexRatio: 0,
2023-06-01 07:28:45 +03:00
protect: 0.5,
2023-04-07 22:39:04 +03:00
rvcQuality: 0,
modelSamplingRate: 48000,
2023-04-20 11:17:43 +03:00
silenceFront: 1,
2023-04-21 09:48:12 +03:00
modelSlotIndex: 0,
2023-05-16 04:38:23 +03:00
sampleModels: [],
2023-06-09 13:57:08 +03:00
gpus: [],
2023-04-07 22:39:04 +03:00
2023-05-08 23:04:34 +03:00
useEnhancer: 0,
useDiff: 1,
2023-05-13 19:04:29 +03:00
diffMethod: "dpm-solver",
2023-05-08 23:04:34 +03:00
useDiffSilence: 0,
diffAcc: 20,
diffSpkId: 1,
kStep: 120,
threshold: -45,
2023-04-07 21:11:37 +03:00
2023-05-04 17:50:52 +03:00
enableDirectML: 0,
2023-03-09 00:55:57 +03:00
//
status: "ok",
2023-05-06 22:18:18 +03:00
modelSlots: [],
serverAudioInputDevices: [],
serverAudioOutputDevices: [],
maxInputLength: 128 * 2048
2023-03-09 00:55:57 +03:00
}
2023-04-05 22:03:09 +03:00
///////////////////////
// Workletセッティング
///////////////////////
2023-01-11 22:52:01 +03:00
export type WorkletSetting = {
numTrancateTreshold: number,
volTrancateThreshold: number,
volTrancateLength: number
}
2023-02-19 08:20:37 +03:00
///////////////////////
2023-02-19 20:21:51 +03:00
// Worklet Nodeセッティング
2023-02-19 08:20:37 +03:00
///////////////////////
2023-01-05 05:45:42 +03:00
export const Protocol = {
2023-01-04 20:28:36 +03:00
"sio": "sio",
"rest": "rest",
} as const
2023-01-05 05:45:42 +03:00
export type Protocol = typeof Protocol[keyof typeof Protocol]
2023-01-04 20:28:36 +03:00
2023-02-19 08:20:37 +03:00
export const SendingSampleRate = {
"48000": 48000,
2023-03-14 20:39:34 +03:00
"44100": 44100,
2023-02-19 08:20:37 +03:00
"24000": 24000
2023-01-04 20:28:36 +03:00
} as const
2023-02-19 08:20:37 +03:00
export type SendingSampleRate = typeof SendingSampleRate[keyof typeof SendingSampleRate]
2023-01-04 20:28:36 +03:00
2023-02-14 16:32:25 +03:00
export const DownSamplingMode = {
"decimate": "decimate",
"average": "average"
} as const
export type DownSamplingMode = typeof DownSamplingMode[keyof typeof DownSamplingMode]
2023-02-19 08:20:37 +03:00
2023-02-19 20:21:51 +03:00
export type WorkletNodeSetting = {
2023-02-19 08:20:37 +03:00
serverUrl: string,
protocol: Protocol,
sendingSampleRate: SendingSampleRate,
inputChunkNum: number,
downSamplingMode: DownSamplingMode,
}
2023-04-05 22:03:09 +03:00
2023-02-19 08:20:37 +03:00
///////////////////////
// クライアントセッティング
///////////////////////
2023-01-04 20:28:36 +03:00
export const SampleRate = {
"48000": 48000,
} as const
export type SampleRate = typeof SampleRate[keyof typeof SampleRate]
2023-02-19 08:20:37 +03:00
export type VoiceChangerClientSetting = {
audioInput: string | MediaStream | null,
sampleRate: SampleRate, // 48000Hz
echoCancel: boolean,
noiseSuppression: boolean,
noiseSuppression2: boolean
inputGain: number
outputGain: number
}
2023-01-05 05:45:42 +03:00
2023-06-25 10:39:18 +03:00
///////////////////////
// Client セッティング
///////////////////////
export type ClientSetting = {
workletSetting: WorkletSetting
workletNodeSetting: WorkletNodeSetting
voiceChangerClientSetting: VoiceChangerClientSetting
}
export const DefaultClientSettng: ClientSetting = {
workletSetting: {
numTrancateTreshold: 100,
volTrancateThreshold: 0.0005,
volTrancateLength: 32
},
workletNodeSetting: {
serverUrl: "",
protocol: "sio",
sendingSampleRate: 48000,
inputChunkNum: 48,
downSamplingMode: "average"
},
voiceChangerClientSetting: {
audioInput: null,
sampleRate: 48000,
echoCancel: false,
noiseSuppression: false,
noiseSuppression2: false,
inputGain: 1.0,
outputGain: 1.0
}
2023-01-04 20:28:36 +03:00
}
2023-01-07 14:07:39 +03:00
2023-02-19 08:20:37 +03:00
////////////////////////////////////
// Exceptions
////////////////////////////////////
2023-01-05 05:45:42 +03:00
export const VOICE_CHANGER_CLIENT_EXCEPTION = {
ERR_SIO_CONNECT_FAILED: "ERR_SIO_CONNECT_FAILED",
ERR_SIO_INVALID_RESPONSE: "ERR_SIO_INVALID_RESPONSE",
2023-01-07 14:07:39 +03:00
ERR_REST_INVALID_RESPONSE: "ERR_REST_INVALID_RESPONSE",
ERR_MIC_STREAM_NOT_INITIALIZED: "ERR_MIC_STREAM_NOT_INITIALIZED"
2023-01-05 05:45:42 +03:00
} as const
export type VOICE_CHANGER_CLIENT_EXCEPTION = typeof VOICE_CHANGER_CLIENT_EXCEPTION[keyof typeof VOICE_CHANGER_CLIENT_EXCEPTION]
2023-01-04 20:28:36 +03:00
2023-01-29 03:42:45 +03:00
////////////////////////////////////
// indexedDB
////////////////////////////////////
2023-03-02 03:56:42 +03:00
export const INDEXEDDB_DB_APP_NAME = "INDEXEDDB_KEY_VOICE_CHANGER"
export const INDEXEDDB_DB_NAME = "INDEXEDDB_KEY_VOICE_CHANGER_DB"
2023-01-29 03:42:45 +03:00
export const INDEXEDDB_KEY_CLIENT = "INDEXEDDB_KEY_VOICE_CHANGER_LIB_CLIENT"
export const INDEXEDDB_KEY_SERVER = "INDEXEDDB_KEY_VOICE_CHANGER_LIB_SERVER"
2023-01-29 09:25:44 +03:00
export const INDEXEDDB_KEY_MODEL_DATA = "INDEXEDDB_KEY_VOICE_CHANGER_LIB_MODEL_DATA"
2023-04-13 02:00:28 +03:00
// ONNX
export type OnnxExporterInfo = {
"status": string
"path": string
"filename": string
2023-04-21 09:48:12 +03:00
}
2023-04-30 20:34:01 +03:00
// Merge
export type MergeElement = {
filename: string
strength: number
}
export type MergeModelRequest = {
2023-06-23 08:54:39 +03:00
voiceChangerType: VoiceChangerType
2023-04-30 20:34:01 +03:00
command: "mix",
files: MergeElement[]
}