mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-02-02 16:23:58 +03:00
overlap control
This commit is contained in:
parent
6ecd5f0236
commit
3a04b7cd1f
2
client/demo/dist/index.js
vendored
2
client/demo/dist/index.js
vendored
File diff suppressed because one or more lines are too long
@ -84,6 +84,22 @@ export const useConvertSetting = (props: UseConvertSettingProps): ConvertSetting
|
|||||||
)
|
)
|
||||||
}, [props.clientState.settingState])
|
}, [props.clientState.settingState])
|
||||||
|
|
||||||
|
const crossFadeOverlapRateRow = useMemo(() => {
|
||||||
|
return (
|
||||||
|
<div className="body-row split-3-7 left-padding-1 guided">
|
||||||
|
<div className="body-item-title left-padding-1">Cross Fade Overlap Rate</div>
|
||||||
|
<div className="body-input-container">
|
||||||
|
<input type="number" min={0.1} max={1} step={0.1} value={props.clientState.settingState.crossFadeOverlapRate} onChange={(e) => {
|
||||||
|
props.clientState.setSettingState({
|
||||||
|
...props.clientState.settingState,
|
||||||
|
crossFadeOverlapRate: Number(e.target.value)
|
||||||
|
})
|
||||||
|
}} />
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}, [props.clientState.settingState])
|
||||||
|
|
||||||
const crossFadeOffsetRateRow = useMemo(() => {
|
const crossFadeOffsetRateRow = useMemo(() => {
|
||||||
return (
|
return (
|
||||||
<div className="body-row split-3-7 left-padding-1 guided">
|
<div className="body-row split-3-7 left-padding-1 guided">
|
||||||
@ -128,11 +144,12 @@ export const useConvertSetting = (props: UseConvertSettingProps): ConvertSetting
|
|||||||
{inputChunkNumRow}
|
{inputChunkNumRow}
|
||||||
{convertChunkNumRow}
|
{convertChunkNumRow}
|
||||||
{gpuRow}
|
{gpuRow}
|
||||||
|
{crossFadeOverlapRateRow}
|
||||||
{crossFadeOffsetRateRow}
|
{crossFadeOffsetRateRow}
|
||||||
{crossFadeEndRateRow}
|
{crossFadeEndRateRow}
|
||||||
</>
|
</>
|
||||||
)
|
)
|
||||||
}, [bufferSizeRow, inputChunkNumRow, convertChunkNumRow, gpuRow, crossFadeOffsetRateRow, crossFadeEndRateRow])
|
}, [bufferSizeRow, inputChunkNumRow, convertChunkNumRow, gpuRow, crossFadeOverlapRateRow, crossFadeOffsetRateRow, crossFadeEndRateRow])
|
||||||
|
|
||||||
return {
|
return {
|
||||||
convertSetting,
|
convertSetting,
|
||||||
|
@ -34,6 +34,7 @@ export type SettingState = {
|
|||||||
gpu: number
|
gpu: number
|
||||||
crossFadeOffsetRate: number
|
crossFadeOffsetRate: number
|
||||||
crossFadeEndRate: number
|
crossFadeEndRate: number
|
||||||
|
crossFadeOverlapRate: number
|
||||||
|
|
||||||
// advanced setting
|
// advanced setting
|
||||||
vfForceDisabled: boolean
|
vfForceDisabled: boolean
|
||||||
@ -64,6 +65,8 @@ const InitialSettingState: SettingState = {
|
|||||||
gpu: DefaultVoiceChangerRequestParamas.gpu,
|
gpu: DefaultVoiceChangerRequestParamas.gpu,
|
||||||
crossFadeOffsetRate: DefaultVoiceChangerRequestParamas.crossFadeOffsetRate,
|
crossFadeOffsetRate: DefaultVoiceChangerRequestParamas.crossFadeOffsetRate,
|
||||||
crossFadeEndRate: DefaultVoiceChangerRequestParamas.crossFadeEndRate,
|
crossFadeEndRate: DefaultVoiceChangerRequestParamas.crossFadeEndRate,
|
||||||
|
crossFadeOverlapRate: DefaultVoiceChangerRequestParamas.crossFadeOverlapRate,
|
||||||
|
|
||||||
vfForceDisabled: DefaultVoiceChangerOptions.forceVfDisable,
|
vfForceDisabled: DefaultVoiceChangerOptions.forceVfDisable,
|
||||||
voiceChangerMode: DefaultVoiceChangerOptions.voiceChangerMode
|
voiceChangerMode: DefaultVoiceChangerOptions.voiceChangerMode
|
||||||
}
|
}
|
||||||
@ -325,6 +328,16 @@ export const useClient = (props: UseClientProps): ClientState => {
|
|||||||
})()
|
})()
|
||||||
}, [settingState.crossFadeEndRate])
|
}, [settingState.crossFadeEndRate])
|
||||||
|
|
||||||
|
// (f) crossfade設定3
|
||||||
|
useEffect(() => {
|
||||||
|
(async () => {
|
||||||
|
await initializedPromise
|
||||||
|
const info = await voiceChangerClientRef.current!.updateServerSettings(ServerSettingKey.crossFadeOverlapRate, "" + settingState.crossFadeOverlapRate)
|
||||||
|
setServerInfo(info)
|
||||||
|
})()
|
||||||
|
}, [settingState.crossFadeOverlapRate])
|
||||||
|
|
||||||
|
|
||||||
// (2-5) advanced setting
|
// (2-5) advanced setting
|
||||||
//// VFDisableはinput設定で合わせて設定。
|
//// VFDisableはinput設定で合わせて設定。
|
||||||
// (a) voice changer mode
|
// (a) voice changer mode
|
||||||
@ -394,7 +407,8 @@ export const useClient = (props: UseClientProps): ClientState => {
|
|||||||
if (colab == "true") {
|
if (colab == "true") {
|
||||||
setSettingState({
|
setSettingState({
|
||||||
...settingState,
|
...settingState,
|
||||||
protocol: "rest"
|
protocol: "rest",
|
||||||
|
inputChunkNum: 64
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}, [])
|
}, [])
|
||||||
|
4
client/lib/package-lock.json
generated
4
client/lib/package-lock.json
generated
@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "@dannadori/voice-changer-client-js",
|
"name": "@dannadori/voice-changer-client-js",
|
||||||
"version": "1.0.5",
|
"version": "1.0.6",
|
||||||
"lockfileVersion": 2,
|
"lockfileVersion": 2,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "@dannadori/voice-changer-client-js",
|
"name": "@dannadori/voice-changer-client-js",
|
||||||
"version": "1.0.5",
|
"version": "1.0.6",
|
||||||
"license": "ISC",
|
"license": "ISC",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@types/readable-stream": "^2.3.15",
|
"@types/readable-stream": "^2.3.15",
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@dannadori/voice-changer-client-js",
|
"name": "@dannadori/voice-changer-client-js",
|
||||||
"version": "1.0.5",
|
"version": "1.0.6",
|
||||||
"description": "",
|
"description": "",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"directories": {
|
"directories": {
|
||||||
|
@ -13,6 +13,7 @@ export type VoiceChangerRequestParamas = {
|
|||||||
crossFadeLowerValue: number,
|
crossFadeLowerValue: number,
|
||||||
crossFadeOffsetRate: number,
|
crossFadeOffsetRate: number,
|
||||||
crossFadeEndRate: number,
|
crossFadeEndRate: number,
|
||||||
|
crossFadeOverlapRate: number,
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -106,6 +107,7 @@ export const ServerSettingKey = {
|
|||||||
"gpu": "gpu",
|
"gpu": "gpu",
|
||||||
"crossFadeOffsetRate": "crossFadeOffsetRate",
|
"crossFadeOffsetRate": "crossFadeOffsetRate",
|
||||||
"crossFadeEndRate": "crossFadeEndRate",
|
"crossFadeEndRate": "crossFadeEndRate",
|
||||||
|
"crossFadeOverlapRate": "crossFadeOverlapRate",
|
||||||
"framework": "framework",
|
"framework": "framework",
|
||||||
"onnxExecutionProvider": "onnxExecutionProvider"
|
"onnxExecutionProvider": "onnxExecutionProvider"
|
||||||
} as const
|
} as const
|
||||||
@ -119,7 +121,8 @@ export const DefaultVoiceChangerRequestParamas: VoiceChangerRequestParamas = {
|
|||||||
gpu: 0,
|
gpu: 0,
|
||||||
crossFadeLowerValue: 0.1,
|
crossFadeLowerValue: 0.1,
|
||||||
crossFadeOffsetRate: 0.1,
|
crossFadeOffsetRate: 0.1,
|
||||||
crossFadeEndRate: 0.9
|
crossFadeEndRate: 0.9,
|
||||||
|
crossFadeOverlapRate: 0.5
|
||||||
}
|
}
|
||||||
|
|
||||||
export const DefaultVoiceChangerOptions: VoiceChangerOptions = {
|
export const DefaultVoiceChangerOptions: VoiceChangerOptions = {
|
||||||
|
@ -78,6 +78,9 @@ if __name__ == thisFilename or args.colab == True:
|
|||||||
MODEL = args.m if args.m != None else None
|
MODEL = args.m if args.m != None else None
|
||||||
ONNX_MODEL = args.o if args.o != None else None
|
ONNX_MODEL = args.o if args.o != None else None
|
||||||
|
|
||||||
|
|
||||||
|
if args.colab == True:
|
||||||
|
os.environ["colab"] = True
|
||||||
# if os.getenv("EX_TB_PORT"):
|
# if os.getenv("EX_TB_PORT"):
|
||||||
# EX_TB_PORT = os.environ["EX_TB_PORT"]
|
# EX_TB_PORT = os.environ["EX_TB_PORT"]
|
||||||
# exApplitionInfo.external_tensorboard_port = int(EX_TB_PORT)
|
# exApplitionInfo.external_tensorboard_port = int(EX_TB_PORT)
|
||||||
|
@ -24,6 +24,7 @@ class VocieChangerSettings():
|
|||||||
dstId:int = 100
|
dstId:int = 100
|
||||||
crossFadeOffsetRate:float = 0.1
|
crossFadeOffsetRate:float = 0.1
|
||||||
crossFadeEndRate:float = 0.9
|
crossFadeEndRate:float = 0.9
|
||||||
|
crossFadeOverlapRate:float = 0.9
|
||||||
convertChunkNum:int = 32
|
convertChunkNum:int = 32
|
||||||
framework:str = "PyTorch" # PyTorch or ONNX
|
framework:str = "PyTorch" # PyTorch or ONNX
|
||||||
pyTorchModelFile:str = ""
|
pyTorchModelFile:str = ""
|
||||||
@ -31,7 +32,7 @@ class VocieChangerSettings():
|
|||||||
configFile:str = ""
|
configFile:str = ""
|
||||||
# ↓mutableな物だけ列挙
|
# ↓mutableな物だけ列挙
|
||||||
intData = ["gpu","srcId", "dstId", "convertChunkNum"]
|
intData = ["gpu","srcId", "dstId", "convertChunkNum"]
|
||||||
floatData = [ "crossFadeOffsetRate", "crossFadeEndRate",]
|
floatData = [ "crossFadeOffsetRate", "crossFadeEndRate", "crossFadeOverlapRate"]
|
||||||
strData = ["framework"]
|
strData = ["framework"]
|
||||||
|
|
||||||
class VoiceChanger():
|
class VoiceChanger():
|
||||||
@ -44,6 +45,8 @@ class VoiceChanger():
|
|||||||
self.onnx_session = None
|
self.onnx_session = None
|
||||||
self.currentCrossFadeOffsetRate=0
|
self.currentCrossFadeOffsetRate=0
|
||||||
self.currentCrossFadeEndRate=0
|
self.currentCrossFadeEndRate=0
|
||||||
|
self.currentCrossFadeOverlapRate=0
|
||||||
|
|
||||||
# 共通で使用する情報を収集
|
# 共通で使用する情報を収集
|
||||||
self.hps = utils.get_hparams_from_file(config)
|
self.hps = utils.get_hparams_from_file(config)
|
||||||
self.gpu_num = torch.cuda.device_count()
|
self.gpu_num = torch.cuda.device_count()
|
||||||
@ -133,20 +136,24 @@ class VoiceChanger():
|
|||||||
|
|
||||||
def _generate_strength(self, unpackedData):
|
def _generate_strength(self, unpackedData):
|
||||||
|
|
||||||
if self.unpackedData_length != unpackedData.shape[0] or self.currentCrossFadeOffsetRate != self.settings.crossFadeOffsetRate or self.currentCrossFadeEndRate != self.settings.crossFadeEndRate :
|
if self.unpackedData_length != unpackedData.shape[0] or self.currentCrossFadeOffsetRate != self.settings.crossFadeOffsetRate or self.currentCrossFadeEndRate != self.settings.crossFadeEndRate or self.currentCrossFadeOverlapRate != self.settings.crossFadeOverlapRate:
|
||||||
self.unpackedData_length = unpackedData.shape[0]
|
self.unpackedData_length = unpackedData.shape[0]
|
||||||
self.currentCrossFadeOffsetRate = self.settings.crossFadeOffsetRate
|
self.currentCrossFadeOffsetRate = self.settings.crossFadeOffsetRate
|
||||||
self.currentCrossFadeEndRate = self.settings.crossFadeEndRate
|
self.currentCrossFadeEndRate = self.settings.crossFadeEndRate
|
||||||
cf_offset = int(unpackedData.shape[0] * self.settings.crossFadeOffsetRate)
|
self.currentCrossFadeOverlapRate = self.settings.crossFadeOverlapRate
|
||||||
cf_end = int(unpackedData.shape[0] * self.settings.crossFadeEndRate)
|
|
||||||
|
overlapSize = int(unpackedData.shape[0] * self.settings.crossFadeOverlapRate)
|
||||||
|
|
||||||
|
cf_offset = int(overlapSize * self.settings.crossFadeOffsetRate)
|
||||||
|
cf_end = int(overlapSize * self.settings.crossFadeEndRate)
|
||||||
cf_range = cf_end - cf_offset
|
cf_range = cf_end - cf_offset
|
||||||
percent = np.arange(cf_range) / cf_range
|
percent = np.arange(cf_range) / cf_range
|
||||||
|
|
||||||
np_prev_strength = np.cos(percent * 0.5 * np.pi) ** 2
|
np_prev_strength = np.cos(percent * 0.5 * np.pi) ** 2
|
||||||
np_cur_strength = np.cos((1-percent) * 0.5 * np.pi) ** 2
|
np_cur_strength = np.cos((1-percent) * 0.5 * np.pi) ** 2
|
||||||
|
|
||||||
self.np_prev_strength = np.concatenate([np.ones(cf_offset), np_prev_strength, np.zeros(unpackedData.shape[0]-cf_offset-len(np_prev_strength))])
|
self.np_prev_strength = np.concatenate([np.ones(cf_offset), np_prev_strength, np.zeros(overlapSize - cf_offset - len(np_prev_strength))])
|
||||||
self.np_cur_strength = np.concatenate([np.zeros(cf_offset), np_cur_strength, np.ones(unpackedData.shape[0]-cf_offset-len(np_cur_strength))])
|
self.np_cur_strength = np.concatenate([np.zeros(cf_offset), np_cur_strength, np.ones(overlapSize - cf_offset - len(np_cur_strength))])
|
||||||
|
|
||||||
self.prev_strength = torch.FloatTensor(self.np_prev_strength)
|
self.prev_strength = torch.FloatTensor(self.np_prev_strength)
|
||||||
self.cur_strength = torch.FloatTensor(self.np_cur_strength)
|
self.cur_strength = torch.FloatTensor(self.np_cur_strength)
|
||||||
@ -199,16 +206,19 @@ class VoiceChanger():
|
|||||||
"sid_tgt": sid_tgt1.numpy()
|
"sid_tgt": sid_tgt1.numpy()
|
||||||
})[0][0,0] * self.hps.data.max_wav_value
|
})[0][0,0] * self.hps.data.max_wav_value
|
||||||
if hasattr(self, 'np_prev_audio1') == True:
|
if hasattr(self, 'np_prev_audio1') == True:
|
||||||
prev = self.np_prev_audio1[-1*inputSize:]
|
overlapSize = int(inputSize * self.settings.crossFadeOverlapRate)
|
||||||
cur = audio1[-2*inputSize:-1*inputSize]
|
prev_overlap = self.np_prev_audio1[-1*overlapSize:]
|
||||||
# print(prev.shape, self.np_prev_strength.shape, cur.shape, self.np_cur_strength.shape)
|
cur_overlap = audio1[-1*(inputSize + overlapSize) :-1*inputSize]
|
||||||
powered_prev = prev * self.np_prev_strength
|
# print(prev_overlap.shape, self.np_prev_strength.shape, cur_overlap.shape, self.np_cur_strength.shape)
|
||||||
powered_cur = cur * self.np_cur_strength
|
# print(">>>>>>>>>>>", -1*(inputSize + overlapSize) , -1*inputSize)
|
||||||
result = powered_prev + powered_cur
|
powered_prev = prev_overlap * self.np_prev_strength
|
||||||
#result = prev * self.np_prev_strength + cur * self.np_cur_strength
|
powered_cur = cur_overlap * self.np_cur_strength
|
||||||
|
powered_result = powered_prev + powered_cur
|
||||||
|
|
||||||
|
cur = audio1[-1*inputSize:-1*overlapSize]
|
||||||
|
result = np.concatenate([powered_result, cur],axis=0)
|
||||||
else:
|
else:
|
||||||
cur = audio1[-2*inputSize:-1*inputSize]
|
result = np.zeros(1).astype(np.int16)
|
||||||
result = cur
|
|
||||||
self.np_prev_audio1 = audio1
|
self.np_prev_audio1 = audio1
|
||||||
return result
|
return result
|
||||||
|
|
||||||
@ -274,10 +284,11 @@ class VoiceChanger():
|
|||||||
|
|
||||||
def on_request(self, unpackedData:any):
|
def on_request(self, unpackedData:any):
|
||||||
convertSize = self.settings.convertChunkNum * 128 # 128sample/1chunk
|
convertSize = self.settings.convertChunkNum * 128 # 128sample/1chunk
|
||||||
if unpackedData.shape[0] * 2 > convertSize:
|
|
||||||
convertSize = unpackedData.shape[0] * 2
|
|
||||||
|
|
||||||
# print("convert Size", convertChunkNum, convertSize)
|
if unpackedData.shape[0]*(1 + self.settings.crossFadeOverlapRate) + 1024 > convertSize:
|
||||||
|
convertSize = int(unpackedData.shape[0]*(1 + self.settings.crossFadeOverlapRate)) + 1024
|
||||||
|
|
||||||
|
# print("convert Size", unpackedData.shape[0], unpackedData.shape[0]*(1 + self.settings.crossFadeOverlapRate), convertSize)
|
||||||
|
|
||||||
self._generate_strength(unpackedData)
|
self._generate_strength(unpackedData)
|
||||||
data = self._generate_input(unpackedData, convertSize)
|
data = self._generate_input(unpackedData, convertSize)
|
||||||
|
Loading…
Reference in New Issue
Block a user