overlap control

This commit is contained in:
wataru 2023-01-12 01:05:38 +09:00
parent 6ecd5f0236
commit 3a04b7cd1f
8 changed files with 73 additions and 25 deletions

File diff suppressed because one or more lines are too long

View File

@ -84,6 +84,22 @@ export const useConvertSetting = (props: UseConvertSettingProps): ConvertSetting
) )
}, [props.clientState.settingState]) }, [props.clientState.settingState])
const crossFadeOverlapRateRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Cross Fade Overlap Rate</div>
<div className="body-input-container">
<input type="number" min={0.1} max={1} step={0.1} value={props.clientState.settingState.crossFadeOverlapRate} onChange={(e) => {
props.clientState.setSettingState({
...props.clientState.settingState,
crossFadeOverlapRate: Number(e.target.value)
})
}} />
</div>
</div>
)
}, [props.clientState.settingState])
const crossFadeOffsetRateRow = useMemo(() => { const crossFadeOffsetRateRow = useMemo(() => {
return ( return (
<div className="body-row split-3-7 left-padding-1 guided"> <div className="body-row split-3-7 left-padding-1 guided">
@ -128,11 +144,12 @@ export const useConvertSetting = (props: UseConvertSettingProps): ConvertSetting
{inputChunkNumRow} {inputChunkNumRow}
{convertChunkNumRow} {convertChunkNumRow}
{gpuRow} {gpuRow}
{crossFadeOverlapRateRow}
{crossFadeOffsetRateRow} {crossFadeOffsetRateRow}
{crossFadeEndRateRow} {crossFadeEndRateRow}
</> </>
) )
}, [bufferSizeRow, inputChunkNumRow, convertChunkNumRow, gpuRow, crossFadeOffsetRateRow, crossFadeEndRateRow]) }, [bufferSizeRow, inputChunkNumRow, convertChunkNumRow, gpuRow, crossFadeOverlapRateRow, crossFadeOffsetRateRow, crossFadeEndRateRow])
return { return {
convertSetting, convertSetting,

View File

@ -34,6 +34,7 @@ export type SettingState = {
gpu: number gpu: number
crossFadeOffsetRate: number crossFadeOffsetRate: number
crossFadeEndRate: number crossFadeEndRate: number
crossFadeOverlapRate: number
// advanced setting // advanced setting
vfForceDisabled: boolean vfForceDisabled: boolean
@ -64,6 +65,8 @@ const InitialSettingState: SettingState = {
gpu: DefaultVoiceChangerRequestParamas.gpu, gpu: DefaultVoiceChangerRequestParamas.gpu,
crossFadeOffsetRate: DefaultVoiceChangerRequestParamas.crossFadeOffsetRate, crossFadeOffsetRate: DefaultVoiceChangerRequestParamas.crossFadeOffsetRate,
crossFadeEndRate: DefaultVoiceChangerRequestParamas.crossFadeEndRate, crossFadeEndRate: DefaultVoiceChangerRequestParamas.crossFadeEndRate,
crossFadeOverlapRate: DefaultVoiceChangerRequestParamas.crossFadeOverlapRate,
vfForceDisabled: DefaultVoiceChangerOptions.forceVfDisable, vfForceDisabled: DefaultVoiceChangerOptions.forceVfDisable,
voiceChangerMode: DefaultVoiceChangerOptions.voiceChangerMode voiceChangerMode: DefaultVoiceChangerOptions.voiceChangerMode
} }
@ -325,6 +328,16 @@ export const useClient = (props: UseClientProps): ClientState => {
})() })()
}, [settingState.crossFadeEndRate]) }, [settingState.crossFadeEndRate])
// (f) crossfade設定3
useEffect(() => {
(async () => {
await initializedPromise
const info = await voiceChangerClientRef.current!.updateServerSettings(ServerSettingKey.crossFadeOverlapRate, "" + settingState.crossFadeOverlapRate)
setServerInfo(info)
})()
}, [settingState.crossFadeOverlapRate])
// (2-5) advanced setting // (2-5) advanced setting
//// VFDisableはinput設定で合わせて設定。 //// VFDisableはinput設定で合わせて設定。
// (a) voice changer mode // (a) voice changer mode
@ -394,7 +407,8 @@ export const useClient = (props: UseClientProps): ClientState => {
if (colab == "true") { if (colab == "true") {
setSettingState({ setSettingState({
...settingState, ...settingState,
protocol: "rest" protocol: "rest",
inputChunkNum: 64
}) })
} }
}, []) }, [])

View File

@ -1,12 +1,12 @@
{ {
"name": "@dannadori/voice-changer-client-js", "name": "@dannadori/voice-changer-client-js",
"version": "1.0.5", "version": "1.0.6",
"lockfileVersion": 2, "lockfileVersion": 2,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "@dannadori/voice-changer-client-js", "name": "@dannadori/voice-changer-client-js",
"version": "1.0.5", "version": "1.0.6",
"license": "ISC", "license": "ISC",
"dependencies": { "dependencies": {
"@types/readable-stream": "^2.3.15", "@types/readable-stream": "^2.3.15",

View File

@ -1,6 +1,6 @@
{ {
"name": "@dannadori/voice-changer-client-js", "name": "@dannadori/voice-changer-client-js",
"version": "1.0.5", "version": "1.0.6",
"description": "", "description": "",
"main": "dist/index.js", "main": "dist/index.js",
"directories": { "directories": {

View File

@ -13,6 +13,7 @@ export type VoiceChangerRequestParamas = {
crossFadeLowerValue: number, crossFadeLowerValue: number,
crossFadeOffsetRate: number, crossFadeOffsetRate: number,
crossFadeEndRate: number, crossFadeEndRate: number,
crossFadeOverlapRate: number,
} }
@ -106,6 +107,7 @@ export const ServerSettingKey = {
"gpu": "gpu", "gpu": "gpu",
"crossFadeOffsetRate": "crossFadeOffsetRate", "crossFadeOffsetRate": "crossFadeOffsetRate",
"crossFadeEndRate": "crossFadeEndRate", "crossFadeEndRate": "crossFadeEndRate",
"crossFadeOverlapRate": "crossFadeOverlapRate",
"framework": "framework", "framework": "framework",
"onnxExecutionProvider": "onnxExecutionProvider" "onnxExecutionProvider": "onnxExecutionProvider"
} as const } as const
@ -119,7 +121,8 @@ export const DefaultVoiceChangerRequestParamas: VoiceChangerRequestParamas = {
gpu: 0, gpu: 0,
crossFadeLowerValue: 0.1, crossFadeLowerValue: 0.1,
crossFadeOffsetRate: 0.1, crossFadeOffsetRate: 0.1,
crossFadeEndRate: 0.9 crossFadeEndRate: 0.9,
crossFadeOverlapRate: 0.5
} }
export const DefaultVoiceChangerOptions: VoiceChangerOptions = { export const DefaultVoiceChangerOptions: VoiceChangerOptions = {

View File

@ -78,6 +78,9 @@ if __name__ == thisFilename or args.colab == True:
MODEL = args.m if args.m != None else None MODEL = args.m if args.m != None else None
ONNX_MODEL = args.o if args.o != None else None ONNX_MODEL = args.o if args.o != None else None
if args.colab == True:
os.environ["colab"] = True
# if os.getenv("EX_TB_PORT"): # if os.getenv("EX_TB_PORT"):
# EX_TB_PORT = os.environ["EX_TB_PORT"] # EX_TB_PORT = os.environ["EX_TB_PORT"]
# exApplitionInfo.external_tensorboard_port = int(EX_TB_PORT) # exApplitionInfo.external_tensorboard_port = int(EX_TB_PORT)

View File

@ -24,6 +24,7 @@ class VocieChangerSettings():
dstId:int = 100 dstId:int = 100
crossFadeOffsetRate:float = 0.1 crossFadeOffsetRate:float = 0.1
crossFadeEndRate:float = 0.9 crossFadeEndRate:float = 0.9
crossFadeOverlapRate:float = 0.9
convertChunkNum:int = 32 convertChunkNum:int = 32
framework:str = "PyTorch" # PyTorch or ONNX framework:str = "PyTorch" # PyTorch or ONNX
pyTorchModelFile:str = "" pyTorchModelFile:str = ""
@ -31,7 +32,7 @@ class VocieChangerSettings():
configFile:str = "" configFile:str = ""
# ↓mutableな物だけ列挙 # ↓mutableな物だけ列挙
intData = ["gpu","srcId", "dstId", "convertChunkNum"] intData = ["gpu","srcId", "dstId", "convertChunkNum"]
floatData = [ "crossFadeOffsetRate", "crossFadeEndRate",] floatData = [ "crossFadeOffsetRate", "crossFadeEndRate", "crossFadeOverlapRate"]
strData = ["framework"] strData = ["framework"]
class VoiceChanger(): class VoiceChanger():
@ -44,6 +45,8 @@ class VoiceChanger():
self.onnx_session = None self.onnx_session = None
self.currentCrossFadeOffsetRate=0 self.currentCrossFadeOffsetRate=0
self.currentCrossFadeEndRate=0 self.currentCrossFadeEndRate=0
self.currentCrossFadeOverlapRate=0
# 共通で使用する情報を収集 # 共通で使用する情報を収集
self.hps = utils.get_hparams_from_file(config) self.hps = utils.get_hparams_from_file(config)
self.gpu_num = torch.cuda.device_count() self.gpu_num = torch.cuda.device_count()
@ -133,20 +136,24 @@ class VoiceChanger():
def _generate_strength(self, unpackedData): def _generate_strength(self, unpackedData):
if self.unpackedData_length != unpackedData.shape[0] or self.currentCrossFadeOffsetRate != self.settings.crossFadeOffsetRate or self.currentCrossFadeEndRate != self.settings.crossFadeEndRate : if self.unpackedData_length != unpackedData.shape[0] or self.currentCrossFadeOffsetRate != self.settings.crossFadeOffsetRate or self.currentCrossFadeEndRate != self.settings.crossFadeEndRate or self.currentCrossFadeOverlapRate != self.settings.crossFadeOverlapRate:
self.unpackedData_length = unpackedData.shape[0] self.unpackedData_length = unpackedData.shape[0]
self.currentCrossFadeOffsetRate = self.settings.crossFadeOffsetRate self.currentCrossFadeOffsetRate = self.settings.crossFadeOffsetRate
self.currentCrossFadeEndRate = self.settings.crossFadeEndRate self.currentCrossFadeEndRate = self.settings.crossFadeEndRate
cf_offset = int(unpackedData.shape[0] * self.settings.crossFadeOffsetRate) self.currentCrossFadeOverlapRate = self.settings.crossFadeOverlapRate
cf_end = int(unpackedData.shape[0] * self.settings.crossFadeEndRate)
overlapSize = int(unpackedData.shape[0] * self.settings.crossFadeOverlapRate)
cf_offset = int(overlapSize * self.settings.crossFadeOffsetRate)
cf_end = int(overlapSize * self.settings.crossFadeEndRate)
cf_range = cf_end - cf_offset cf_range = cf_end - cf_offset
percent = np.arange(cf_range) / cf_range percent = np.arange(cf_range) / cf_range
np_prev_strength = np.cos(percent * 0.5 * np.pi) ** 2 np_prev_strength = np.cos(percent * 0.5 * np.pi) ** 2
np_cur_strength = np.cos((1-percent) * 0.5 * np.pi) ** 2 np_cur_strength = np.cos((1-percent) * 0.5 * np.pi) ** 2
self.np_prev_strength = np.concatenate([np.ones(cf_offset), np_prev_strength, np.zeros(unpackedData.shape[0]-cf_offset-len(np_prev_strength))]) self.np_prev_strength = np.concatenate([np.ones(cf_offset), np_prev_strength, np.zeros(overlapSize - cf_offset - len(np_prev_strength))])
self.np_cur_strength = np.concatenate([np.zeros(cf_offset), np_cur_strength, np.ones(unpackedData.shape[0]-cf_offset-len(np_cur_strength))]) self.np_cur_strength = np.concatenate([np.zeros(cf_offset), np_cur_strength, np.ones(overlapSize - cf_offset - len(np_cur_strength))])
self.prev_strength = torch.FloatTensor(self.np_prev_strength) self.prev_strength = torch.FloatTensor(self.np_prev_strength)
self.cur_strength = torch.FloatTensor(self.np_cur_strength) self.cur_strength = torch.FloatTensor(self.np_cur_strength)
@ -199,16 +206,19 @@ class VoiceChanger():
"sid_tgt": sid_tgt1.numpy() "sid_tgt": sid_tgt1.numpy()
})[0][0,0] * self.hps.data.max_wav_value })[0][0,0] * self.hps.data.max_wav_value
if hasattr(self, 'np_prev_audio1') == True: if hasattr(self, 'np_prev_audio1') == True:
prev = self.np_prev_audio1[-1*inputSize:] overlapSize = int(inputSize * self.settings.crossFadeOverlapRate)
cur = audio1[-2*inputSize:-1*inputSize] prev_overlap = self.np_prev_audio1[-1*overlapSize:]
# print(prev.shape, self.np_prev_strength.shape, cur.shape, self.np_cur_strength.shape) cur_overlap = audio1[-1*(inputSize + overlapSize) :-1*inputSize]
powered_prev = prev * self.np_prev_strength # print(prev_overlap.shape, self.np_prev_strength.shape, cur_overlap.shape, self.np_cur_strength.shape)
powered_cur = cur * self.np_cur_strength # print(">>>>>>>>>>>", -1*(inputSize + overlapSize) , -1*inputSize)
result = powered_prev + powered_cur powered_prev = prev_overlap * self.np_prev_strength
#result = prev * self.np_prev_strength + cur * self.np_cur_strength powered_cur = cur_overlap * self.np_cur_strength
powered_result = powered_prev + powered_cur
cur = audio1[-1*inputSize:-1*overlapSize]
result = np.concatenate([powered_result, cur],axis=0)
else: else:
cur = audio1[-2*inputSize:-1*inputSize] result = np.zeros(1).astype(np.int16)
result = cur
self.np_prev_audio1 = audio1 self.np_prev_audio1 = audio1
return result return result
@ -274,10 +284,11 @@ class VoiceChanger():
def on_request(self, unpackedData:any): def on_request(self, unpackedData:any):
convertSize = self.settings.convertChunkNum * 128 # 128sample/1chunk convertSize = self.settings.convertChunkNum * 128 # 128sample/1chunk
if unpackedData.shape[0] * 2 > convertSize:
convertSize = unpackedData.shape[0] * 2
# print("convert Size", convertChunkNum, convertSize) if unpackedData.shape[0]*(1 + self.settings.crossFadeOverlapRate) + 1024 > convertSize:
convertSize = int(unpackedData.shape[0]*(1 + self.settings.crossFadeOverlapRate)) + 1024
# print("convert Size", unpackedData.shape[0], unpackedData.shape[0]*(1 + self.settings.crossFadeOverlapRate), convertSize)
self._generate_strength(unpackedData) self._generate_strength(unpackedData)
data = self._generate_input(unpackedData, convertSize) data = self._generate_input(unpackedData, convertSize)