mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-01-23 13:35:12 +03:00
WIP: refactoring, change overlaprate to overlapsize
This commit is contained in:
parent
6b78c57204
commit
096ada6e3d
@ -1,4 +1,4 @@
|
||||
import { BufferSize, DownSamplingMode, InputSampleRate, Protocol, SampleRate, VoiceChangerMode } from "@dannadori/voice-changer-client-js"
|
||||
import { BufferSize, CrossFadeOverlapSize, DownSamplingMode, InputSampleRate, Protocol, SampleRate, VoiceChangerMode } from "@dannadori/voice-changer-client-js"
|
||||
import React, { useMemo } from "react"
|
||||
import { useAppState } from "./001_provider/001_AppStateProvider";
|
||||
import { AnimationTypes, HeaderButton, HeaderButtonProps } from "./components/101_HeaderButton";
|
||||
@ -164,6 +164,26 @@ export const useAdvancedSetting = (): AdvancedSettingState => {
|
||||
)
|
||||
}, [appState.serverSetting.setting.crossFadeOverlapRate, appState.serverSetting.setCrossFadeOverlapRate])
|
||||
|
||||
|
||||
const crossFadeOverlapSizeRow = useMemo(() => {
|
||||
return (
|
||||
<div className="body-row split-3-7 left-padding-1 guided">
|
||||
<div className="body-item-title left-padding-1">Cross Fade Overlap Size</div>
|
||||
<div className="body-select-container">
|
||||
<select className="body-select" value={appState.serverSetting.setting.crossFadeOverlapSize} onChange={(e) => {
|
||||
appState.serverSetting.setCrossFadeOverlapSize(Number(e.target.value) as CrossFadeOverlapSize)
|
||||
}}>
|
||||
{
|
||||
Object.values(CrossFadeOverlapSize).map(x => {
|
||||
return <option key={x} value={x}>{x}</option>
|
||||
})
|
||||
}
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}, [appState.serverSetting.setting.crossFadeOverlapSize, appState.serverSetting.setCrossFadeOverlapSize])
|
||||
|
||||
const crossFadeOffsetRateRow = useMemo(() => {
|
||||
return (
|
||||
<div className="body-row split-3-7 left-padding-1 guided">
|
||||
@ -290,6 +310,7 @@ export const useAdvancedSetting = (): AdvancedSettingState => {
|
||||
{convertChunkNumRow}
|
||||
{minConvertSizeRow}
|
||||
{crossFadeOverlapRateRow}
|
||||
{crossFadeOverlapSizeRow}
|
||||
{crossFadeOffsetRateRow}
|
||||
{crossFadeEndRateRow}
|
||||
<div className="body-row divider"></div>
|
||||
@ -301,7 +322,7 @@ export const useAdvancedSetting = (): AdvancedSettingState => {
|
||||
|
||||
</>
|
||||
)
|
||||
}, [mmvcServerUrlRow, protocolRow, sampleRateRow, sendingSampleRateRow, bufferSizeRow, convertChunkNumRow, minConvertSizeRow, crossFadeOverlapRateRow, crossFadeOffsetRateRow, crossFadeEndRateRow, voiceChangeModeRow, workletSettingRow, downSamplingModeRow])
|
||||
}, [mmvcServerUrlRow, protocolRow, sampleRateRow, sendingSampleRateRow, bufferSizeRow, convertChunkNumRow, minConvertSizeRow, crossFadeOverlapRateRow, crossFadeOverlapSizeRow, crossFadeOffsetRateRow, crossFadeEndRateRow, voiceChangeModeRow, workletSettingRow, downSamplingModeRow])
|
||||
|
||||
|
||||
const advancedSetting = useMemo(() => {
|
||||
|
@ -259,9 +259,14 @@ export class VoiceChangerClient {
|
||||
this.audioStreamer.setSendingSampleRate(val)
|
||||
}
|
||||
|
||||
// configure worklet
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// コンポーネント設定、操作
|
||||
/////////////////////////////////////////////////////
|
||||
//## Server ##//
|
||||
|
||||
//## Worklet ##//
|
||||
configureWorklet = (setting: WorkletSetting) => {
|
||||
console.log("configureWorklet", setting)
|
||||
this.vcNode.configure(setting)
|
||||
}
|
||||
startOutputRecordingWorklet = () => {
|
||||
|
@ -7,7 +7,10 @@
|
||||
|
||||
// types
|
||||
export type VoiceChangerServerSetting = {
|
||||
convertChunkNum: number, // VITSに入力する変換サイズ。(入力データの2倍以上の大きさで指定。それより小さいものが指定された場合は、サーバ側で自動的に入力の2倍のサイズが設定される。)
|
||||
|
||||
// VITSに入力する変換サイズ。(入力データの2倍以上の大きさで指定。それより小さいものが指定された場合は、
|
||||
// サーバ側で自動的に入力の2倍のサイズが設定される。)
|
||||
convertChunkNum: number,
|
||||
minConvertSize: number, // この値より小さい場合にこの値に揃える。
|
||||
|
||||
srcId: number,
|
||||
@ -18,6 +21,7 @@ export type VoiceChangerServerSetting = {
|
||||
crossFadeOffsetRate: number,
|
||||
crossFadeEndRate: number,
|
||||
crossFadeOverlapRate: number,
|
||||
crossFadeOverlapSize: number,
|
||||
|
||||
framework: Framework
|
||||
onnxExecutionProvider: OnnxExecutionProvider,
|
||||
@ -76,6 +80,7 @@ export type ServerInfo = {
|
||||
crossFadeOffsetRate: number,
|
||||
crossFadeEndRate: number,
|
||||
crossFadeOverlapRate: number,
|
||||
crossFadeOverlapSize: number,
|
||||
gpu: number,
|
||||
srcId: number,
|
||||
dstId: number,
|
||||
@ -160,15 +165,22 @@ export type OnnxExecutionProvider = typeof OnnxExecutionProvider[keyof typeof On
|
||||
export const Framework = {
|
||||
"PyTorch": "PyTorch",
|
||||
"ONNX": "ONNX",
|
||||
}
|
||||
} as const
|
||||
export type Framework = typeof Framework[keyof typeof Framework]
|
||||
|
||||
export const F0Detector = {
|
||||
"dio": "dio",
|
||||
"harvest": "harvest",
|
||||
}
|
||||
} as const
|
||||
export type F0Detector = typeof F0Detector[keyof typeof F0Detector]
|
||||
|
||||
export const CrossFadeOverlapSize = {
|
||||
"1024": 1024,
|
||||
"2048": 2048,
|
||||
"4096": 4096,
|
||||
} as const
|
||||
export type CrossFadeOverlapSize = typeof CrossFadeOverlapSize[keyof typeof CrossFadeOverlapSize]
|
||||
|
||||
export const ServerSettingKey = {
|
||||
"srcId": "srcId",
|
||||
"dstId": "dstId",
|
||||
@ -178,6 +190,7 @@ export const ServerSettingKey = {
|
||||
"crossFadeOffsetRate": "crossFadeOffsetRate",
|
||||
"crossFadeEndRate": "crossFadeEndRate",
|
||||
"crossFadeOverlapRate": "crossFadeOverlapRate",
|
||||
"crossFadeOverlapSize": "crossFadeOverlapSize",
|
||||
"framework": "framework",
|
||||
"onnxExecutionProvider": "onnxExecutionProvider",
|
||||
"f0Factor": "f0Factor",
|
||||
@ -199,6 +212,7 @@ export const DefaultVoiceChangerServerSetting: VoiceChangerServerSetting = {
|
||||
crossFadeOffsetRate: 0.1,
|
||||
crossFadeEndRate: 0.9,
|
||||
crossFadeOverlapRate: 0.5,
|
||||
crossFadeOverlapSize: CrossFadeOverlapSize[4096],
|
||||
framework: "PyTorch",
|
||||
f0Factor: 1.0,
|
||||
onnxExecutionProvider: "CPUExecutionProvider",
|
||||
|
@ -107,7 +107,7 @@ export const useClient = (props: UseClientProps): ClientState => {
|
||||
const getInfo = useMemo(() => {
|
||||
return async () => {
|
||||
await initializedPromise
|
||||
await clientSetting.reloadClientSetting()
|
||||
await clientSetting.reloadClientSetting() // 実質的な処理の意味はない
|
||||
await serverSetting.reloadServerInfo()
|
||||
}
|
||||
}, [clientSetting, serverSetting])
|
||||
|
@ -48,6 +48,7 @@ export type ServerSettingState = {
|
||||
setCrossFadeOffsetRate: (num: number) => Promise<boolean>;
|
||||
setCrossFadeEndRate: (num: number) => Promise<boolean>;
|
||||
setCrossFadeOverlapRate: (num: number) => Promise<boolean>;
|
||||
setCrossFadeOverlapSize: (num: number) => Promise<boolean>;
|
||||
setF0Factor: (num: number) => Promise<boolean>;
|
||||
setF0Detector: (val: string) => Promise<boolean>;
|
||||
setRecordIO: (num: number) => Promise<boolean>;
|
||||
@ -69,7 +70,7 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
||||
const { setItem, getItem, removeItem } = useIndexedDB()
|
||||
|
||||
|
||||
// 初期化 その1 DBから取得
|
||||
// DBから設定取得(キャッシュによる初期化)
|
||||
useEffect(() => {
|
||||
const loadCache = async () => {
|
||||
const setting = await getItem(INDEXEDDB_KEY_SERVER)
|
||||
@ -88,7 +89,8 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
||||
|
||||
loadCache()
|
||||
}, [])
|
||||
// 初期化 その2 クライアントに設定
|
||||
|
||||
// クライアントへ設定反映 初期化, 設定変更
|
||||
useEffect(() => {
|
||||
if (!props.voiceChangerClient) return
|
||||
props.voiceChangerClient.updateServerSettings(ServerSettingKey.framework, setting.framework)
|
||||
@ -106,7 +108,11 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
||||
props.voiceChangerClient.updateServerSettings(ServerSettingKey.recordIO, "" + setting.recordIO)
|
||||
|
||||
|
||||
}, [props.voiceChangerClient])
|
||||
// setting["convertChunkNum"] = 1
|
||||
// const a = "convertChunkNum"
|
||||
// setting[a] = ""
|
||||
|
||||
}, [props.voiceChangerClient, setting])
|
||||
|
||||
//////////////
|
||||
// 設定
|
||||
@ -129,6 +135,7 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
||||
crossFadeOffsetRate: res.crossFadeOffsetRate,
|
||||
crossFadeEndRate: res.crossFadeEndRate,
|
||||
crossFadeOverlapRate: res.crossFadeOverlapRate,
|
||||
crossFadeOverlapSize: res.crossFadeOverlapSize,
|
||||
framework: res.framework,
|
||||
onnxExecutionProvider: (!!res.onnxExecutionProvider && res.onnxExecutionProvider.length > 0) ? res.onnxExecutionProvider[0] as OnnxExecutionProvider : DefaultVoiceChangerServerSetting.onnxExecutionProvider,
|
||||
f0Factor: res.f0Factor,
|
||||
@ -146,6 +153,47 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
||||
|
||||
}
|
||||
|
||||
// // New Trial
|
||||
// // 設定 _setSettingがトリガでuseEffectが呼ばれて、workletに設定が飛ぶ
|
||||
// const setSetting = useMemo(() => {
|
||||
// return (setting: ) => {
|
||||
|
||||
|
||||
// if (!props.voiceChangerClient) return false
|
||||
|
||||
// const res = await props.voiceChangerClient.updateServerSettings(key, "" + newVal)
|
||||
|
||||
// _setServerInfo(res)
|
||||
// if (newVal == res[key]) {
|
||||
// const newSetting: VoiceChangerServerSetting = {
|
||||
// ...settingRef.current,
|
||||
// convertChunkNum: res.convertChunkNum,
|
||||
// minConvertSize: res.minConvertSize,
|
||||
// srcId: res.srcId,
|
||||
// dstId: res.dstId,
|
||||
// gpu: res.gpu,
|
||||
// crossFadeOffsetRate: res.crossFadeOffsetRate,
|
||||
// crossFadeEndRate: res.crossFadeEndRate,
|
||||
// crossFadeOverlapRate: res.crossFadeOverlapRate,
|
||||
// crossFadeOverlapSize: res.crossFadeOverlapSize,
|
||||
// framework: res.framework,
|
||||
// onnxExecutionProvider: (!!res.onnxExecutionProvider && res.onnxExecutionProvider.length > 0) ? res.onnxExecutionProvider[0] as OnnxExecutionProvider : DefaultVoiceChangerServerSetting.onnxExecutionProvider,
|
||||
// f0Factor: res.f0Factor,
|
||||
// f0Detector: res.f0Detector,
|
||||
// recordIO: res.recordIO
|
||||
|
||||
// }
|
||||
// _setSetting(newSetting)
|
||||
// setItem(INDEXEDDB_KEY_SERVER, newSetting)
|
||||
// return true
|
||||
// } else {
|
||||
// alert(`[ServerSetting] 設定が反映されていません([key:${key}, new:${newVal}, res:${res[key]}])。モデルの切り替えの場合、処理が非同期で行われるため反映されていないように見える場合があります。サーバコントロールのリロードボタンを押すとGUIに反映されるます。`)
|
||||
// return false
|
||||
// }
|
||||
|
||||
// }
|
||||
// }, [props.voiceChangerClient])
|
||||
|
||||
const setFramework = useMemo(() => {
|
||||
return async (framework: Framework) => {
|
||||
return await _set_and_store(ServerSettingKey.framework, "" + framework)
|
||||
@ -204,6 +252,12 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
||||
return await _set_and_store(ServerSettingKey.crossFadeOverlapRate, "" + num)
|
||||
}
|
||||
}, [props.voiceChangerClient])
|
||||
const setCrossFadeOverlapSize = useMemo(() => {
|
||||
return async (num: number) => {
|
||||
return await _set_and_store(ServerSettingKey.crossFadeOverlapSize, "" + num)
|
||||
}
|
||||
}, [props.voiceChangerClient])
|
||||
|
||||
|
||||
const setF0Factor = useMemo(() => {
|
||||
return async (num: number) => {
|
||||
@ -370,6 +424,7 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
||||
setCrossFadeOffsetRate,
|
||||
setCrossFadeEndRate,
|
||||
setCrossFadeOverlapRate,
|
||||
setCrossFadeOverlapSize,
|
||||
setF0Factor,
|
||||
setF0Detector,
|
||||
setRecordIO,
|
||||
|
@ -18,7 +18,7 @@ export type WorkletSettingState = {
|
||||
export const useWorkletSetting = (props: UseWorkletSettingProps): WorkletSettingState => {
|
||||
const [setting, _setSetting] = useState<WorkletSetting>(DefaultWorkletSetting)
|
||||
const { setItem, getItem, removeItem } = useIndexedDB()
|
||||
// 初期化 その1 DBから取得
|
||||
// DBから設定取得(キャッシュによる初期化)
|
||||
useEffect(() => {
|
||||
const loadCache = async () => {
|
||||
const setting = await getItem(INDEXEDDB_KEY_WORKLET)
|
||||
@ -48,7 +48,7 @@ export const useWorkletSetting = (props: UseWorkletSettingProps): WorkletSetting
|
||||
loadCache()
|
||||
}, [])
|
||||
|
||||
// クライアントに設定 初期化, 設定変更
|
||||
// クライアントへ設定反映 初期化, 設定変更
|
||||
useEffect(() => {
|
||||
if (!props.voiceChangerClient) return
|
||||
props.voiceChangerClient.configureWorklet(setting)
|
||||
|
@ -91,7 +91,7 @@ class VocieChangerSettings():
|
||||
dstId: int = 100
|
||||
crossFadeOffsetRate: float = 0.1
|
||||
crossFadeEndRate: float = 0.9
|
||||
crossFadeOverlapRate: float = 0.9
|
||||
crossFadeOverlapSize: int = 4096
|
||||
convertChunkNum: int = 32
|
||||
minConvertSize: int = 0
|
||||
framework: str = "PyTorch" # PyTorch or ONNX
|
||||
@ -106,8 +106,8 @@ class VocieChangerSettings():
|
||||
configFile: str = ""
|
||||
|
||||
# ↓mutableな物だけ列挙
|
||||
intData = ["gpu", "srcId", "dstId", "convertChunkNum", "minConvertSize", "recordIO", "inputSampleRate"]
|
||||
floatData = ["crossFadeOffsetRate", "crossFadeEndRate", "crossFadeOverlapRate", "f0Factor"]
|
||||
intData = ["gpu", "srcId", "dstId", "convertChunkNum", "minConvertSize", "recordIO", "inputSampleRate", "crossFadeOverlapSize"]
|
||||
floatData = ["crossFadeOffsetRate", "crossFadeEndRate", "f0Factor"]
|
||||
strData = ["framework", "f0Detector", "serverMicProps"]
|
||||
|
||||
|
||||
@ -125,7 +125,7 @@ class VoiceChanger():
|
||||
self.onnx_session = None
|
||||
self.currentCrossFadeOffsetRate = 0
|
||||
self.currentCrossFadeEndRate = 0
|
||||
self.currentCrossFadeOverlapRate = 0
|
||||
self.currentCrossFadeOverlapSize = 0
|
||||
|
||||
self.gpu_num = torch.cuda.device_count()
|
||||
self.text_norm = torch.LongTensor([0, 6, 0])
|
||||
@ -324,14 +324,17 @@ class VoiceChanger():
|
||||
|
||||
def _generate_strength(self, unpackedData):
|
||||
|
||||
if self.unpackedData_length != unpackedData.shape[0] or self.currentCrossFadeOffsetRate != self.settings.crossFadeOffsetRate or self.currentCrossFadeEndRate != self.settings.crossFadeEndRate or self.currentCrossFadeOverlapRate != self.settings.crossFadeOverlapRate:
|
||||
if self.unpackedData_length != unpackedData.shape[0] or \
|
||||
self.currentCrossFadeOffsetRate != self.settings.crossFadeOffsetRate or \
|
||||
self.currentCrossFadeEndRate != self.settings.crossFadeEndRate or \
|
||||
self.currentCrossFadeOverlapSize != self.settings.crossFadeOverlapSize:
|
||||
|
||||
self.unpackedData_length = unpackedData.shape[0]
|
||||
self.currentCrossFadeOffsetRate = self.settings.crossFadeOffsetRate
|
||||
self.currentCrossFadeEndRate = self.settings.crossFadeEndRate
|
||||
self.currentCrossFadeOverlapRate = self.settings.crossFadeOverlapRate
|
||||
|
||||
overlapSize = int(unpackedData.shape[0] * self.settings.crossFadeOverlapRate)
|
||||
self.currentCrossFadeOverlapSize = self.settings.crossFadeOverlapSize
|
||||
|
||||
overlapSize = min(self.settings.crossFadeOverlapSize, self.unpackedData_length)
|
||||
cf_offset = int(overlapSize * self.settings.crossFadeOffsetRate)
|
||||
cf_end = int(overlapSize * self.settings.crossFadeEndRate)
|
||||
cf_range = cf_end - cf_offset
|
||||
@ -413,7 +416,7 @@ class VoiceChanger():
|
||||
"sid_tgt": sid_tgt1.numpy()
|
||||
})[0][0, 0] * self.hps.data.max_wav_value
|
||||
if hasattr(self, 'np_prev_audio1') == True:
|
||||
overlapSize = int(inputSize * self.settings.crossFadeOverlapRate)
|
||||
overlapSize = min(self.settings.crossFadeOverlapSize, inputSize)
|
||||
prev_overlap = self.np_prev_audio1[-1 * overlapSize:]
|
||||
cur_overlap = audio1[-1 * (inputSize + overlapSize):-1 * inputSize]
|
||||
# print(prev_overlap.shape, self.np_prev_strength.shape, cur_overlap.shape, self.np_cur_strength.shape)
|
||||
@ -454,7 +457,7 @@ class VoiceChanger():
|
||||
self.cur_strength = self.cur_strength.cpu()
|
||||
|
||||
if hasattr(self, 'prev_audio1') == True and self.prev_audio1.device == torch.device('cpu'): # prev_audio1が所望のデバイスに無い場合は一回休み。
|
||||
overlapSize = int(inputSize * self.settings.crossFadeOverlapRate)
|
||||
overlapSize = min(self.settings.crossFadeOverlapSize, inputSize)
|
||||
prev_overlap = self.prev_audio1[-1 * overlapSize:]
|
||||
cur_overlap = audio1[-1 * (inputSize + overlapSize):-1 * inputSize]
|
||||
powered_prev = prev_overlap * self.prev_strength
|
||||
@ -495,13 +498,16 @@ class VoiceChanger():
|
||||
self.cur_strength = self.cur_strength.cuda(self.settings.gpu)
|
||||
|
||||
if hasattr(self, 'prev_audio1') == True and self.prev_audio1.device == torch.device('cuda', self.settings.gpu):
|
||||
overlapSize = int(inputSize * self.settings.crossFadeOverlapRate)
|
||||
overlapSize = min(self.settings.crossFadeOverlapSize, inputSize)
|
||||
prev_overlap = self.prev_audio1[-1 * overlapSize:]
|
||||
cur_overlap = audio1[-1 * (inputSize + overlapSize):-1 * inputSize]
|
||||
powered_prev = prev_overlap * self.prev_strength
|
||||
powered_cur = cur_overlap * self.cur_strength
|
||||
powered_result = powered_prev + powered_cur
|
||||
|
||||
print(overlapSize, prev_overlap.shape, cur_overlap.shape, self.prev_strength.shape, self.cur_strength.shape)
|
||||
print(self.prev_audio1.shape, audio1.shape, inputSize, overlapSize)
|
||||
|
||||
cur = audio1[-1 * inputSize:-1 * overlapSize] # 今回のインプットの生部分。(インプット - 次回のCrossfade部分)。
|
||||
result = torch.cat([powered_result, cur], axis=0) # Crossfadeと今回のインプットの生部分を結合
|
||||
|
||||
@ -517,15 +523,11 @@ class VoiceChanger():
|
||||
if self.settings.inputSampleRate != 24000:
|
||||
print("convert sampling rate!", self.settings.inputSampleRate)
|
||||
unpackedData = resampy.resample(unpackedData, 48000, 24000)
|
||||
convertSize = self.settings.convertChunkNum * 128 # 128sample/1chunk
|
||||
# print("convsize:", unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate))
|
||||
if unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate) + 1024 > convertSize:
|
||||
convertSize = int(unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate)) + 1024
|
||||
if convertSize < self.settings.minConvertSize:
|
||||
convertSize = self.settings.minConvertSize
|
||||
# print("convert Size", unpackedData.shape[0], unpackedData.shape[0]*(1 + self.settings.crossFadeOverlapRate), convertSize, self.settings.minConvertSize)
|
||||
|
||||
# convertSize = 8192
|
||||
convertSize = unpackedData.shape[0] + min(self.settings.crossFadeOverlapSize, unpackedData.shape[0])
|
||||
print(convertSize, unpackedData.shape[0])
|
||||
if convertSize < 8192:
|
||||
convertSize = 8192
|
||||
|
||||
self._generate_strength(unpackedData)
|
||||
# f0はデバッグ用
|
||||
|
Loading…
Reference in New Issue
Block a user