WIP: refactoring, change overlaprate to overlapsize

This commit is contained in:
wataru 2023-02-19 06:25:22 +09:00
parent 6b78c57204
commit 096ada6e3d
7 changed files with 129 additions and 32 deletions

View File

@ -1,4 +1,4 @@
import { BufferSize, DownSamplingMode, InputSampleRate, Protocol, SampleRate, VoiceChangerMode } from "@dannadori/voice-changer-client-js"
import { BufferSize, CrossFadeOverlapSize, DownSamplingMode, InputSampleRate, Protocol, SampleRate, VoiceChangerMode } from "@dannadori/voice-changer-client-js"
import React, { useMemo } from "react"
import { useAppState } from "./001_provider/001_AppStateProvider";
import { AnimationTypes, HeaderButton, HeaderButtonProps } from "./components/101_HeaderButton";
@ -164,6 +164,26 @@ export const useAdvancedSetting = (): AdvancedSettingState => {
)
}, [appState.serverSetting.setting.crossFadeOverlapRate, appState.serverSetting.setCrossFadeOverlapRate])
const crossFadeOverlapSizeRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Cross Fade Overlap Size</div>
<div className="body-select-container">
<select className="body-select" value={appState.serverSetting.setting.crossFadeOverlapSize} onChange={(e) => {
appState.serverSetting.setCrossFadeOverlapSize(Number(e.target.value) as CrossFadeOverlapSize)
}}>
{
Object.values(CrossFadeOverlapSize).map(x => {
return <option key={x} value={x}>{x}</option>
})
}
</select>
</div>
</div>
)
}, [appState.serverSetting.setting.crossFadeOverlapSize, appState.serverSetting.setCrossFadeOverlapSize])
const crossFadeOffsetRateRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
@ -290,6 +310,7 @@ export const useAdvancedSetting = (): AdvancedSettingState => {
{convertChunkNumRow}
{minConvertSizeRow}
{crossFadeOverlapRateRow}
{crossFadeOverlapSizeRow}
{crossFadeOffsetRateRow}
{crossFadeEndRateRow}
<div className="body-row divider"></div>
@ -301,7 +322,7 @@ export const useAdvancedSetting = (): AdvancedSettingState => {
</>
)
}, [mmvcServerUrlRow, protocolRow, sampleRateRow, sendingSampleRateRow, bufferSizeRow, convertChunkNumRow, minConvertSizeRow, crossFadeOverlapRateRow, crossFadeOffsetRateRow, crossFadeEndRateRow, voiceChangeModeRow, workletSettingRow, downSamplingModeRow])
}, [mmvcServerUrlRow, protocolRow, sampleRateRow, sendingSampleRateRow, bufferSizeRow, convertChunkNumRow, minConvertSizeRow, crossFadeOverlapRateRow, crossFadeOverlapSizeRow, crossFadeOffsetRateRow, crossFadeEndRateRow, voiceChangeModeRow, workletSettingRow, downSamplingModeRow])
const advancedSetting = useMemo(() => {

View File

@ -259,9 +259,14 @@ export class VoiceChangerClient {
this.audioStreamer.setSendingSampleRate(val)
}
// configure worklet
/////////////////////////////////////////////////////
// コンポーネント設定、操作
/////////////////////////////////////////////////////
//## Server ##//
//## Worklet ##//
configureWorklet = (setting: WorkletSetting) => {
console.log("configureWorklet", setting)
this.vcNode.configure(setting)
}
startOutputRecordingWorklet = () => {

View File

@ -7,7 +7,10 @@
// types
export type VoiceChangerServerSetting = {
convertChunkNum: number, // VITSに入力する変換サイズ。(入力データの2倍以上の大きさで指定。それより小さいものが指定された場合は、サーバ側で自動的に入力の2倍のサイズが設定される。)
// VITSに入力する変換サイズ。(入力データの2倍以上の大きさで指定。それより小さいものが指定された場合は、
// サーバ側で自動的に入力の2倍のサイズが設定される。)
convertChunkNum: number,
minConvertSize: number, // この値より小さい場合にこの値に揃える。
srcId: number,
@ -18,6 +21,7 @@ export type VoiceChangerServerSetting = {
crossFadeOffsetRate: number,
crossFadeEndRate: number,
crossFadeOverlapRate: number,
crossFadeOverlapSize: number,
framework: Framework
onnxExecutionProvider: OnnxExecutionProvider,
@ -76,6 +80,7 @@ export type ServerInfo = {
crossFadeOffsetRate: number,
crossFadeEndRate: number,
crossFadeOverlapRate: number,
crossFadeOverlapSize: number,
gpu: number,
srcId: number,
dstId: number,
@ -160,15 +165,22 @@ export type OnnxExecutionProvider = typeof OnnxExecutionProvider[keyof typeof On
export const Framework = {
"PyTorch": "PyTorch",
"ONNX": "ONNX",
}
} as const
export type Framework = typeof Framework[keyof typeof Framework]
export const F0Detector = {
"dio": "dio",
"harvest": "harvest",
}
} as const
export type F0Detector = typeof F0Detector[keyof typeof F0Detector]
export const CrossFadeOverlapSize = {
"1024": 1024,
"2048": 2048,
"4096": 4096,
} as const
export type CrossFadeOverlapSize = typeof CrossFadeOverlapSize[keyof typeof CrossFadeOverlapSize]
export const ServerSettingKey = {
"srcId": "srcId",
"dstId": "dstId",
@ -178,6 +190,7 @@ export const ServerSettingKey = {
"crossFadeOffsetRate": "crossFadeOffsetRate",
"crossFadeEndRate": "crossFadeEndRate",
"crossFadeOverlapRate": "crossFadeOverlapRate",
"crossFadeOverlapSize": "crossFadeOverlapSize",
"framework": "framework",
"onnxExecutionProvider": "onnxExecutionProvider",
"f0Factor": "f0Factor",
@ -199,6 +212,7 @@ export const DefaultVoiceChangerServerSetting: VoiceChangerServerSetting = {
crossFadeOffsetRate: 0.1,
crossFadeEndRate: 0.9,
crossFadeOverlapRate: 0.5,
crossFadeOverlapSize: CrossFadeOverlapSize[4096],
framework: "PyTorch",
f0Factor: 1.0,
onnxExecutionProvider: "CPUExecutionProvider",

View File

@ -107,7 +107,7 @@ export const useClient = (props: UseClientProps): ClientState => {
const getInfo = useMemo(() => {
return async () => {
await initializedPromise
await clientSetting.reloadClientSetting()
await clientSetting.reloadClientSetting() // 実質的な処理の意味はない
await serverSetting.reloadServerInfo()
}
}, [clientSetting, serverSetting])

View File

@ -48,6 +48,7 @@ export type ServerSettingState = {
setCrossFadeOffsetRate: (num: number) => Promise<boolean>;
setCrossFadeEndRate: (num: number) => Promise<boolean>;
setCrossFadeOverlapRate: (num: number) => Promise<boolean>;
setCrossFadeOverlapSize: (num: number) => Promise<boolean>;
setF0Factor: (num: number) => Promise<boolean>;
setF0Detector: (val: string) => Promise<boolean>;
setRecordIO: (num: number) => Promise<boolean>;
@ -69,7 +70,7 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
const { setItem, getItem, removeItem } = useIndexedDB()
// 初期化 その1 DBから取得
// DBから設定取得(キャッシュによる初期化)
useEffect(() => {
const loadCache = async () => {
const setting = await getItem(INDEXEDDB_KEY_SERVER)
@ -88,7 +89,8 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
loadCache()
}, [])
// 初期化 その2 クライアントに設定
// クライアントへ設定反映 初期化, 設定変更
useEffect(() => {
if (!props.voiceChangerClient) return
props.voiceChangerClient.updateServerSettings(ServerSettingKey.framework, setting.framework)
@ -106,7 +108,11 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
props.voiceChangerClient.updateServerSettings(ServerSettingKey.recordIO, "" + setting.recordIO)
}, [props.voiceChangerClient])
// setting["convertChunkNum"] = 1
// const a = "convertChunkNum"
// setting[a] = ""
}, [props.voiceChangerClient, setting])
//////////////
// 設定
@ -129,6 +135,7 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
crossFadeOffsetRate: res.crossFadeOffsetRate,
crossFadeEndRate: res.crossFadeEndRate,
crossFadeOverlapRate: res.crossFadeOverlapRate,
crossFadeOverlapSize: res.crossFadeOverlapSize,
framework: res.framework,
onnxExecutionProvider: (!!res.onnxExecutionProvider && res.onnxExecutionProvider.length > 0) ? res.onnxExecutionProvider[0] as OnnxExecutionProvider : DefaultVoiceChangerServerSetting.onnxExecutionProvider,
f0Factor: res.f0Factor,
@ -146,6 +153,47 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
}
// // New Trial
// // 設定 _setSettingがトリガでuseEffectが呼ばれて、workletに設定が飛ぶ
// const setSetting = useMemo(() => {
// return (setting: ) => {
// if (!props.voiceChangerClient) return false
// const res = await props.voiceChangerClient.updateServerSettings(key, "" + newVal)
// _setServerInfo(res)
// if (newVal == res[key]) {
// const newSetting: VoiceChangerServerSetting = {
// ...settingRef.current,
// convertChunkNum: res.convertChunkNum,
// minConvertSize: res.minConvertSize,
// srcId: res.srcId,
// dstId: res.dstId,
// gpu: res.gpu,
// crossFadeOffsetRate: res.crossFadeOffsetRate,
// crossFadeEndRate: res.crossFadeEndRate,
// crossFadeOverlapRate: res.crossFadeOverlapRate,
// crossFadeOverlapSize: res.crossFadeOverlapSize,
// framework: res.framework,
// onnxExecutionProvider: (!!res.onnxExecutionProvider && res.onnxExecutionProvider.length > 0) ? res.onnxExecutionProvider[0] as OnnxExecutionProvider : DefaultVoiceChangerServerSetting.onnxExecutionProvider,
// f0Factor: res.f0Factor,
// f0Detector: res.f0Detector,
// recordIO: res.recordIO
// }
// _setSetting(newSetting)
// setItem(INDEXEDDB_KEY_SERVER, newSetting)
// return true
// } else {
// alert(`[ServerSetting] 設定が反映されていません([key:${key}, new:${newVal}, res:${res[key]}])。モデルの切り替えの場合、処理が非同期で行われるため反映されていないように見える場合があります。サーバコントロールのリロードボタンを押すとGUIに反映されるます。`)
// return false
// }
// }
// }, [props.voiceChangerClient])
const setFramework = useMemo(() => {
return async (framework: Framework) => {
return await _set_and_store(ServerSettingKey.framework, "" + framework)
@ -204,6 +252,12 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
return await _set_and_store(ServerSettingKey.crossFadeOverlapRate, "" + num)
}
}, [props.voiceChangerClient])
const setCrossFadeOverlapSize = useMemo(() => {
return async (num: number) => {
return await _set_and_store(ServerSettingKey.crossFadeOverlapSize, "" + num)
}
}, [props.voiceChangerClient])
const setF0Factor = useMemo(() => {
return async (num: number) => {
@ -370,6 +424,7 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
setCrossFadeOffsetRate,
setCrossFadeEndRate,
setCrossFadeOverlapRate,
setCrossFadeOverlapSize,
setF0Factor,
setF0Detector,
setRecordIO,

View File

@ -18,7 +18,7 @@ export type WorkletSettingState = {
export const useWorkletSetting = (props: UseWorkletSettingProps): WorkletSettingState => {
const [setting, _setSetting] = useState<WorkletSetting>(DefaultWorkletSetting)
const { setItem, getItem, removeItem } = useIndexedDB()
// 初期化 その1 DBから取得
// DBから設定取得(キャッシュによる初期化)
useEffect(() => {
const loadCache = async () => {
const setting = await getItem(INDEXEDDB_KEY_WORKLET)
@ -48,7 +48,7 @@ export const useWorkletSetting = (props: UseWorkletSettingProps): WorkletSetting
loadCache()
}, [])
// クライアントに設定 初期化, 設定変更
// クライアントへ設定反映 初期化, 設定変更
useEffect(() => {
if (!props.voiceChangerClient) return
props.voiceChangerClient.configureWorklet(setting)

View File

@ -91,7 +91,7 @@ class VocieChangerSettings():
dstId: int = 100
crossFadeOffsetRate: float = 0.1
crossFadeEndRate: float = 0.9
crossFadeOverlapRate: float = 0.9
crossFadeOverlapSize: int = 4096
convertChunkNum: int = 32
minConvertSize: int = 0
framework: str = "PyTorch" # PyTorch or ONNX
@ -106,8 +106,8 @@ class VocieChangerSettings():
configFile: str = ""
# ↓mutableな物だけ列挙
intData = ["gpu", "srcId", "dstId", "convertChunkNum", "minConvertSize", "recordIO", "inputSampleRate"]
floatData = ["crossFadeOffsetRate", "crossFadeEndRate", "crossFadeOverlapRate", "f0Factor"]
intData = ["gpu", "srcId", "dstId", "convertChunkNum", "minConvertSize", "recordIO", "inputSampleRate", "crossFadeOverlapSize"]
floatData = ["crossFadeOffsetRate", "crossFadeEndRate", "f0Factor"]
strData = ["framework", "f0Detector", "serverMicProps"]
@ -125,7 +125,7 @@ class VoiceChanger():
self.onnx_session = None
self.currentCrossFadeOffsetRate = 0
self.currentCrossFadeEndRate = 0
self.currentCrossFadeOverlapRate = 0
self.currentCrossFadeOverlapSize = 0
self.gpu_num = torch.cuda.device_count()
self.text_norm = torch.LongTensor([0, 6, 0])
@ -324,14 +324,17 @@ class VoiceChanger():
def _generate_strength(self, unpackedData):
if self.unpackedData_length != unpackedData.shape[0] or self.currentCrossFadeOffsetRate != self.settings.crossFadeOffsetRate or self.currentCrossFadeEndRate != self.settings.crossFadeEndRate or self.currentCrossFadeOverlapRate != self.settings.crossFadeOverlapRate:
if self.unpackedData_length != unpackedData.shape[0] or \
self.currentCrossFadeOffsetRate != self.settings.crossFadeOffsetRate or \
self.currentCrossFadeEndRate != self.settings.crossFadeEndRate or \
self.currentCrossFadeOverlapSize != self.settings.crossFadeOverlapSize:
self.unpackedData_length = unpackedData.shape[0]
self.currentCrossFadeOffsetRate = self.settings.crossFadeOffsetRate
self.currentCrossFadeEndRate = self.settings.crossFadeEndRate
self.currentCrossFadeOverlapRate = self.settings.crossFadeOverlapRate
overlapSize = int(unpackedData.shape[0] * self.settings.crossFadeOverlapRate)
self.currentCrossFadeOverlapSize = self.settings.crossFadeOverlapSize
overlapSize = min(self.settings.crossFadeOverlapSize, self.unpackedData_length)
cf_offset = int(overlapSize * self.settings.crossFadeOffsetRate)
cf_end = int(overlapSize * self.settings.crossFadeEndRate)
cf_range = cf_end - cf_offset
@ -413,7 +416,7 @@ class VoiceChanger():
"sid_tgt": sid_tgt1.numpy()
})[0][0, 0] * self.hps.data.max_wav_value
if hasattr(self, 'np_prev_audio1') == True:
overlapSize = int(inputSize * self.settings.crossFadeOverlapRate)
overlapSize = min(self.settings.crossFadeOverlapSize, inputSize)
prev_overlap = self.np_prev_audio1[-1 * overlapSize:]
cur_overlap = audio1[-1 * (inputSize + overlapSize):-1 * inputSize]
# print(prev_overlap.shape, self.np_prev_strength.shape, cur_overlap.shape, self.np_cur_strength.shape)
@ -454,7 +457,7 @@ class VoiceChanger():
self.cur_strength = self.cur_strength.cpu()
if hasattr(self, 'prev_audio1') == True and self.prev_audio1.device == torch.device('cpu'): # prev_audio1が所望のデバイスに無い場合は一回休み。
overlapSize = int(inputSize * self.settings.crossFadeOverlapRate)
overlapSize = min(self.settings.crossFadeOverlapSize, inputSize)
prev_overlap = self.prev_audio1[-1 * overlapSize:]
cur_overlap = audio1[-1 * (inputSize + overlapSize):-1 * inputSize]
powered_prev = prev_overlap * self.prev_strength
@ -495,13 +498,16 @@ class VoiceChanger():
self.cur_strength = self.cur_strength.cuda(self.settings.gpu)
if hasattr(self, 'prev_audio1') == True and self.prev_audio1.device == torch.device('cuda', self.settings.gpu):
overlapSize = int(inputSize * self.settings.crossFadeOverlapRate)
overlapSize = min(self.settings.crossFadeOverlapSize, inputSize)
prev_overlap = self.prev_audio1[-1 * overlapSize:]
cur_overlap = audio1[-1 * (inputSize + overlapSize):-1 * inputSize]
powered_prev = prev_overlap * self.prev_strength
powered_cur = cur_overlap * self.cur_strength
powered_result = powered_prev + powered_cur
print(overlapSize, prev_overlap.shape, cur_overlap.shape, self.prev_strength.shape, self.cur_strength.shape)
print(self.prev_audio1.shape, audio1.shape, inputSize, overlapSize)
cur = audio1[-1 * inputSize:-1 * overlapSize] # 今回のインプットの生部分。(インプット - 次回のCrossfade部分)。
result = torch.cat([powered_result, cur], axis=0) # Crossfadeと今回のインプットの生部分を結合
@ -517,15 +523,11 @@ class VoiceChanger():
if self.settings.inputSampleRate != 24000:
print("convert sampling rate!", self.settings.inputSampleRate)
unpackedData = resampy.resample(unpackedData, 48000, 24000)
convertSize = self.settings.convertChunkNum * 128 # 128sample/1chunk
# print("convsize:", unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate))
if unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate) + 1024 > convertSize:
convertSize = int(unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate)) + 1024
if convertSize < self.settings.minConvertSize:
convertSize = self.settings.minConvertSize
# print("convert Size", unpackedData.shape[0], unpackedData.shape[0]*(1 + self.settings.crossFadeOverlapRate), convertSize, self.settings.minConvertSize)
# convertSize = 8192
convertSize = unpackedData.shape[0] + min(self.settings.crossFadeOverlapSize, unpackedData.shape[0])
print(convertSize, unpackedData.shape[0])
if convertSize < 8192:
convertSize = 8192
self._generate_strength(unpackedData)
# f0はデバッグ用