diff --git a/client/demo/src/107_advanced_setting.tsx b/client/demo/src/107_advanced_setting.tsx index e3fb1895..e42b3f2a 100644 --- a/client/demo/src/107_advanced_setting.tsx +++ b/client/demo/src/107_advanced_setting.tsx @@ -1,4 +1,4 @@ -import { BufferSize, DownSamplingMode, InputSampleRate, Protocol, SampleRate, VoiceChangerMode } from "@dannadori/voice-changer-client-js" +import { BufferSize, CrossFadeOverlapSize, DownSamplingMode, InputSampleRate, Protocol, SampleRate, VoiceChangerMode } from "@dannadori/voice-changer-client-js" import React, { useMemo } from "react" import { useAppState } from "./001_provider/001_AppStateProvider"; import { AnimationTypes, HeaderButton, HeaderButtonProps } from "./components/101_HeaderButton"; @@ -164,6 +164,26 @@ export const useAdvancedSetting = (): AdvancedSettingState => { ) }, [appState.serverSetting.setting.crossFadeOverlapRate, appState.serverSetting.setCrossFadeOverlapRate]) + + const crossFadeOverlapSizeRow = useMemo(() => { + return ( +
+
Cross Fade Overlap Size
+
+ +
+
+ ) + }, [appState.serverSetting.setting.crossFadeOverlapSize, appState.serverSetting.setCrossFadeOverlapSize]) + const crossFadeOffsetRateRow = useMemo(() => { return (
@@ -290,6 +310,7 @@ export const useAdvancedSetting = (): AdvancedSettingState => { {convertChunkNumRow} {minConvertSizeRow} {crossFadeOverlapRateRow} + {crossFadeOverlapSizeRow} {crossFadeOffsetRateRow} {crossFadeEndRateRow}
@@ -301,7 +322,7 @@ export const useAdvancedSetting = (): AdvancedSettingState => { ) - }, [mmvcServerUrlRow, protocolRow, sampleRateRow, sendingSampleRateRow, bufferSizeRow, convertChunkNumRow, minConvertSizeRow, crossFadeOverlapRateRow, crossFadeOffsetRateRow, crossFadeEndRateRow, voiceChangeModeRow, workletSettingRow, downSamplingModeRow]) + }, [mmvcServerUrlRow, protocolRow, sampleRateRow, sendingSampleRateRow, bufferSizeRow, convertChunkNumRow, minConvertSizeRow, crossFadeOverlapRateRow, crossFadeOverlapSizeRow, crossFadeOffsetRateRow, crossFadeEndRateRow, voiceChangeModeRow, workletSettingRow, downSamplingModeRow]) const advancedSetting = useMemo(() => { diff --git a/client/lib/src/VoiceChangerClient.ts b/client/lib/src/VoiceChangerClient.ts index 38fa9495..72f7bfaa 100644 --- a/client/lib/src/VoiceChangerClient.ts +++ b/client/lib/src/VoiceChangerClient.ts @@ -259,9 +259,14 @@ export class VoiceChangerClient { this.audioStreamer.setSendingSampleRate(val) } - // configure worklet + + ///////////////////////////////////////////////////// + // コンポーネント設定、操作 + ///////////////////////////////////////////////////// + //## Server ##// + + //## Worklet ##// configureWorklet = (setting: WorkletSetting) => { - console.log("configureWorklet", setting) this.vcNode.configure(setting) } startOutputRecordingWorklet = () => { diff --git a/client/lib/src/const.ts b/client/lib/src/const.ts index fb32dc56..ab2069ca 100644 --- a/client/lib/src/const.ts +++ b/client/lib/src/const.ts @@ -7,7 +7,10 @@ // types export type VoiceChangerServerSetting = { - convertChunkNum: number, // VITSに入力する変換サイズ。(入力データの2倍以上の大きさで指定。それより小さいものが指定された場合は、サーバ側で自動的に入力の2倍のサイズが設定される。) + + // VITSに入力する変換サイズ。(入力データの2倍以上の大きさで指定。それより小さいものが指定された場合は、 + // サーバ側で自動的に入力の2倍のサイズが設定される。) + convertChunkNum: number, minConvertSize: number, // この値より小さい場合にこの値に揃える。 srcId: number, @@ -18,6 +21,7 @@ export type VoiceChangerServerSetting = { crossFadeOffsetRate: number, crossFadeEndRate: number, crossFadeOverlapRate: number, + crossFadeOverlapSize: number, framework: Framework onnxExecutionProvider: OnnxExecutionProvider, @@ -76,6 +80,7 @@ export type ServerInfo = { crossFadeOffsetRate: number, crossFadeEndRate: number, crossFadeOverlapRate: number, + crossFadeOverlapSize: number, gpu: number, srcId: number, dstId: number, @@ -160,15 +165,22 @@ export type OnnxExecutionProvider = typeof OnnxExecutionProvider[keyof typeof On export const Framework = { "PyTorch": "PyTorch", "ONNX": "ONNX", -} +} as const export type Framework = typeof Framework[keyof typeof Framework] export const F0Detector = { "dio": "dio", "harvest": "harvest", -} +} as const export type F0Detector = typeof F0Detector[keyof typeof F0Detector] +export const CrossFadeOverlapSize = { + "1024": 1024, + "2048": 2048, + "4096": 4096, +} as const +export type CrossFadeOverlapSize = typeof CrossFadeOverlapSize[keyof typeof CrossFadeOverlapSize] + export const ServerSettingKey = { "srcId": "srcId", "dstId": "dstId", @@ -178,6 +190,7 @@ export const ServerSettingKey = { "crossFadeOffsetRate": "crossFadeOffsetRate", "crossFadeEndRate": "crossFadeEndRate", "crossFadeOverlapRate": "crossFadeOverlapRate", + "crossFadeOverlapSize": "crossFadeOverlapSize", "framework": "framework", "onnxExecutionProvider": "onnxExecutionProvider", "f0Factor": "f0Factor", @@ -199,6 +212,7 @@ export const DefaultVoiceChangerServerSetting: VoiceChangerServerSetting = { crossFadeOffsetRate: 0.1, crossFadeEndRate: 0.9, crossFadeOverlapRate: 0.5, + crossFadeOverlapSize: CrossFadeOverlapSize[4096], framework: "PyTorch", f0Factor: 1.0, onnxExecutionProvider: "CPUExecutionProvider", diff --git a/client/lib/src/hooks/useClient.ts b/client/lib/src/hooks/useClient.ts index 5bb95585..57e0c5b9 100644 --- a/client/lib/src/hooks/useClient.ts +++ b/client/lib/src/hooks/useClient.ts @@ -107,7 +107,7 @@ export const useClient = (props: UseClientProps): ClientState => { const getInfo = useMemo(() => { return async () => { await initializedPromise - await clientSetting.reloadClientSetting() + await clientSetting.reloadClientSetting() // 実質的な処理の意味はない await serverSetting.reloadServerInfo() } }, [clientSetting, serverSetting]) diff --git a/client/lib/src/hooks/useServerSetting.ts b/client/lib/src/hooks/useServerSetting.ts index f6fb8253..d68b5753 100644 --- a/client/lib/src/hooks/useServerSetting.ts +++ b/client/lib/src/hooks/useServerSetting.ts @@ -48,6 +48,7 @@ export type ServerSettingState = { setCrossFadeOffsetRate: (num: number) => Promise; setCrossFadeEndRate: (num: number) => Promise; setCrossFadeOverlapRate: (num: number) => Promise; + setCrossFadeOverlapSize: (num: number) => Promise; setF0Factor: (num: number) => Promise; setF0Detector: (val: string) => Promise; setRecordIO: (num: number) => Promise; @@ -69,7 +70,7 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta const { setItem, getItem, removeItem } = useIndexedDB() - // 初期化 その1 DBから取得 + // DBから設定取得(キャッシュによる初期化) useEffect(() => { const loadCache = async () => { const setting = await getItem(INDEXEDDB_KEY_SERVER) @@ -88,7 +89,8 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta loadCache() }, []) - // 初期化 その2 クライアントに設定 + + // クライアントへ設定反映 初期化, 設定変更 useEffect(() => { if (!props.voiceChangerClient) return props.voiceChangerClient.updateServerSettings(ServerSettingKey.framework, setting.framework) @@ -106,7 +108,11 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta props.voiceChangerClient.updateServerSettings(ServerSettingKey.recordIO, "" + setting.recordIO) - }, [props.voiceChangerClient]) + // setting["convertChunkNum"] = 1 + // const a = "convertChunkNum" + // setting[a] = "" + + }, [props.voiceChangerClient, setting]) ////////////// // 設定 @@ -129,6 +135,7 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta crossFadeOffsetRate: res.crossFadeOffsetRate, crossFadeEndRate: res.crossFadeEndRate, crossFadeOverlapRate: res.crossFadeOverlapRate, + crossFadeOverlapSize: res.crossFadeOverlapSize, framework: res.framework, onnxExecutionProvider: (!!res.onnxExecutionProvider && res.onnxExecutionProvider.length > 0) ? res.onnxExecutionProvider[0] as OnnxExecutionProvider : DefaultVoiceChangerServerSetting.onnxExecutionProvider, f0Factor: res.f0Factor, @@ -146,6 +153,47 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta } + // // New Trial + // // 設定 _setSettingがトリガでuseEffectが呼ばれて、workletに設定が飛ぶ + // const setSetting = useMemo(() => { + // return (setting: ) => { + + + // if (!props.voiceChangerClient) return false + + // const res = await props.voiceChangerClient.updateServerSettings(key, "" + newVal) + + // _setServerInfo(res) + // if (newVal == res[key]) { + // const newSetting: VoiceChangerServerSetting = { + // ...settingRef.current, + // convertChunkNum: res.convertChunkNum, + // minConvertSize: res.minConvertSize, + // srcId: res.srcId, + // dstId: res.dstId, + // gpu: res.gpu, + // crossFadeOffsetRate: res.crossFadeOffsetRate, + // crossFadeEndRate: res.crossFadeEndRate, + // crossFadeOverlapRate: res.crossFadeOverlapRate, + // crossFadeOverlapSize: res.crossFadeOverlapSize, + // framework: res.framework, + // onnxExecutionProvider: (!!res.onnxExecutionProvider && res.onnxExecutionProvider.length > 0) ? res.onnxExecutionProvider[0] as OnnxExecutionProvider : DefaultVoiceChangerServerSetting.onnxExecutionProvider, + // f0Factor: res.f0Factor, + // f0Detector: res.f0Detector, + // recordIO: res.recordIO + + // } + // _setSetting(newSetting) + // setItem(INDEXEDDB_KEY_SERVER, newSetting) + // return true + // } else { + // alert(`[ServerSetting] 設定が反映されていません([key:${key}, new:${newVal}, res:${res[key]}])。モデルの切り替えの場合、処理が非同期で行われるため反映されていないように見える場合があります。サーバコントロールのリロードボタンを押すとGUIに反映されるます。`) + // return false + // } + + // } + // }, [props.voiceChangerClient]) + const setFramework = useMemo(() => { return async (framework: Framework) => { return await _set_and_store(ServerSettingKey.framework, "" + framework) @@ -204,6 +252,12 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta return await _set_and_store(ServerSettingKey.crossFadeOverlapRate, "" + num) } }, [props.voiceChangerClient]) + const setCrossFadeOverlapSize = useMemo(() => { + return async (num: number) => { + return await _set_and_store(ServerSettingKey.crossFadeOverlapSize, "" + num) + } + }, [props.voiceChangerClient]) + const setF0Factor = useMemo(() => { return async (num: number) => { @@ -370,6 +424,7 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta setCrossFadeOffsetRate, setCrossFadeEndRate, setCrossFadeOverlapRate, + setCrossFadeOverlapSize, setF0Factor, setF0Detector, setRecordIO, diff --git a/client/lib/src/hooks/useWorkletSetting.ts b/client/lib/src/hooks/useWorkletSetting.ts index 8c509d86..dfabbe2c 100644 --- a/client/lib/src/hooks/useWorkletSetting.ts +++ b/client/lib/src/hooks/useWorkletSetting.ts @@ -18,7 +18,7 @@ export type WorkletSettingState = { export const useWorkletSetting = (props: UseWorkletSettingProps): WorkletSettingState => { const [setting, _setSetting] = useState(DefaultWorkletSetting) const { setItem, getItem, removeItem } = useIndexedDB() - // 初期化 その1 DBから取得 + // DBから設定取得(キャッシュによる初期化) useEffect(() => { const loadCache = async () => { const setting = await getItem(INDEXEDDB_KEY_WORKLET) @@ -48,7 +48,7 @@ export const useWorkletSetting = (props: UseWorkletSettingProps): WorkletSetting loadCache() }, []) - // クライアントに設定 初期化, 設定変更 + // クライアントへ設定反映 初期化, 設定変更 useEffect(() => { if (!props.voiceChangerClient) return props.voiceChangerClient.configureWorklet(setting) diff --git a/server/voice_changer/VoiceChanger.py b/server/voice_changer/VoiceChanger.py index f00bf5b4..36bef690 100755 --- a/server/voice_changer/VoiceChanger.py +++ b/server/voice_changer/VoiceChanger.py @@ -91,7 +91,7 @@ class VocieChangerSettings(): dstId: int = 100 crossFadeOffsetRate: float = 0.1 crossFadeEndRate: float = 0.9 - crossFadeOverlapRate: float = 0.9 + crossFadeOverlapSize: int = 4096 convertChunkNum: int = 32 minConvertSize: int = 0 framework: str = "PyTorch" # PyTorch or ONNX @@ -106,8 +106,8 @@ class VocieChangerSettings(): configFile: str = "" # ↓mutableな物だけ列挙 - intData = ["gpu", "srcId", "dstId", "convertChunkNum", "minConvertSize", "recordIO", "inputSampleRate"] - floatData = ["crossFadeOffsetRate", "crossFadeEndRate", "crossFadeOverlapRate", "f0Factor"] + intData = ["gpu", "srcId", "dstId", "convertChunkNum", "minConvertSize", "recordIO", "inputSampleRate", "crossFadeOverlapSize"] + floatData = ["crossFadeOffsetRate", "crossFadeEndRate", "f0Factor"] strData = ["framework", "f0Detector", "serverMicProps"] @@ -125,7 +125,7 @@ class VoiceChanger(): self.onnx_session = None self.currentCrossFadeOffsetRate = 0 self.currentCrossFadeEndRate = 0 - self.currentCrossFadeOverlapRate = 0 + self.currentCrossFadeOverlapSize = 0 self.gpu_num = torch.cuda.device_count() self.text_norm = torch.LongTensor([0, 6, 0]) @@ -324,14 +324,17 @@ class VoiceChanger(): def _generate_strength(self, unpackedData): - if self.unpackedData_length != unpackedData.shape[0] or self.currentCrossFadeOffsetRate != self.settings.crossFadeOffsetRate or self.currentCrossFadeEndRate != self.settings.crossFadeEndRate or self.currentCrossFadeOverlapRate != self.settings.crossFadeOverlapRate: + if self.unpackedData_length != unpackedData.shape[0] or \ + self.currentCrossFadeOffsetRate != self.settings.crossFadeOffsetRate or \ + self.currentCrossFadeEndRate != self.settings.crossFadeEndRate or \ + self.currentCrossFadeOverlapSize != self.settings.crossFadeOverlapSize: + self.unpackedData_length = unpackedData.shape[0] self.currentCrossFadeOffsetRate = self.settings.crossFadeOffsetRate self.currentCrossFadeEndRate = self.settings.crossFadeEndRate - self.currentCrossFadeOverlapRate = self.settings.crossFadeOverlapRate - - overlapSize = int(unpackedData.shape[0] * self.settings.crossFadeOverlapRate) + self.currentCrossFadeOverlapSize = self.settings.crossFadeOverlapSize + overlapSize = min(self.settings.crossFadeOverlapSize, self.unpackedData_length) cf_offset = int(overlapSize * self.settings.crossFadeOffsetRate) cf_end = int(overlapSize * self.settings.crossFadeEndRate) cf_range = cf_end - cf_offset @@ -413,7 +416,7 @@ class VoiceChanger(): "sid_tgt": sid_tgt1.numpy() })[0][0, 0] * self.hps.data.max_wav_value if hasattr(self, 'np_prev_audio1') == True: - overlapSize = int(inputSize * self.settings.crossFadeOverlapRate) + overlapSize = min(self.settings.crossFadeOverlapSize, inputSize) prev_overlap = self.np_prev_audio1[-1 * overlapSize:] cur_overlap = audio1[-1 * (inputSize + overlapSize):-1 * inputSize] # print(prev_overlap.shape, self.np_prev_strength.shape, cur_overlap.shape, self.np_cur_strength.shape) @@ -454,7 +457,7 @@ class VoiceChanger(): self.cur_strength = self.cur_strength.cpu() if hasattr(self, 'prev_audio1') == True and self.prev_audio1.device == torch.device('cpu'): # prev_audio1が所望のデバイスに無い場合は一回休み。 - overlapSize = int(inputSize * self.settings.crossFadeOverlapRate) + overlapSize = min(self.settings.crossFadeOverlapSize, inputSize) prev_overlap = self.prev_audio1[-1 * overlapSize:] cur_overlap = audio1[-1 * (inputSize + overlapSize):-1 * inputSize] powered_prev = prev_overlap * self.prev_strength @@ -495,13 +498,16 @@ class VoiceChanger(): self.cur_strength = self.cur_strength.cuda(self.settings.gpu) if hasattr(self, 'prev_audio1') == True and self.prev_audio1.device == torch.device('cuda', self.settings.gpu): - overlapSize = int(inputSize * self.settings.crossFadeOverlapRate) + overlapSize = min(self.settings.crossFadeOverlapSize, inputSize) prev_overlap = self.prev_audio1[-1 * overlapSize:] cur_overlap = audio1[-1 * (inputSize + overlapSize):-1 * inputSize] powered_prev = prev_overlap * self.prev_strength powered_cur = cur_overlap * self.cur_strength powered_result = powered_prev + powered_cur + print(overlapSize, prev_overlap.shape, cur_overlap.shape, self.prev_strength.shape, self.cur_strength.shape) + print(self.prev_audio1.shape, audio1.shape, inputSize, overlapSize) + cur = audio1[-1 * inputSize:-1 * overlapSize] # 今回のインプットの生部分。(インプット - 次回のCrossfade部分)。 result = torch.cat([powered_result, cur], axis=0) # Crossfadeと今回のインプットの生部分を結合 @@ -517,15 +523,11 @@ class VoiceChanger(): if self.settings.inputSampleRate != 24000: print("convert sampling rate!", self.settings.inputSampleRate) unpackedData = resampy.resample(unpackedData, 48000, 24000) - convertSize = self.settings.convertChunkNum * 128 # 128sample/1chunk - # print("convsize:", unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate)) - if unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate) + 1024 > convertSize: - convertSize = int(unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate)) + 1024 - if convertSize < self.settings.minConvertSize: - convertSize = self.settings.minConvertSize - # print("convert Size", unpackedData.shape[0], unpackedData.shape[0]*(1 + self.settings.crossFadeOverlapRate), convertSize, self.settings.minConvertSize) - # convertSize = 8192 + convertSize = unpackedData.shape[0] + min(self.settings.crossFadeOverlapSize, unpackedData.shape[0]) + print(convertSize, unpackedData.shape[0]) + if convertSize < 8192: + convertSize = 8192 self._generate_strength(unpackedData) # f0はデバッグ用