mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-02-02 16:23:58 +03:00
overlap control
This commit is contained in:
parent
6ecd5f0236
commit
3a04b7cd1f
2
client/demo/dist/index.js
vendored
2
client/demo/dist/index.js
vendored
File diff suppressed because one or more lines are too long
@ -84,6 +84,22 @@ export const useConvertSetting = (props: UseConvertSettingProps): ConvertSetting
|
||||
)
|
||||
}, [props.clientState.settingState])
|
||||
|
||||
const crossFadeOverlapRateRow = useMemo(() => {
|
||||
return (
|
||||
<div className="body-row split-3-7 left-padding-1 guided">
|
||||
<div className="body-item-title left-padding-1">Cross Fade Overlap Rate</div>
|
||||
<div className="body-input-container">
|
||||
<input type="number" min={0.1} max={1} step={0.1} value={props.clientState.settingState.crossFadeOverlapRate} onChange={(e) => {
|
||||
props.clientState.setSettingState({
|
||||
...props.clientState.settingState,
|
||||
crossFadeOverlapRate: Number(e.target.value)
|
||||
})
|
||||
}} />
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}, [props.clientState.settingState])
|
||||
|
||||
const crossFadeOffsetRateRow = useMemo(() => {
|
||||
return (
|
||||
<div className="body-row split-3-7 left-padding-1 guided">
|
||||
@ -128,11 +144,12 @@ export const useConvertSetting = (props: UseConvertSettingProps): ConvertSetting
|
||||
{inputChunkNumRow}
|
||||
{convertChunkNumRow}
|
||||
{gpuRow}
|
||||
{crossFadeOverlapRateRow}
|
||||
{crossFadeOffsetRateRow}
|
||||
{crossFadeEndRateRow}
|
||||
</>
|
||||
)
|
||||
}, [bufferSizeRow, inputChunkNumRow, convertChunkNumRow, gpuRow, crossFadeOffsetRateRow, crossFadeEndRateRow])
|
||||
}, [bufferSizeRow, inputChunkNumRow, convertChunkNumRow, gpuRow, crossFadeOverlapRateRow, crossFadeOffsetRateRow, crossFadeEndRateRow])
|
||||
|
||||
return {
|
||||
convertSetting,
|
||||
|
@ -34,6 +34,7 @@ export type SettingState = {
|
||||
gpu: number
|
||||
crossFadeOffsetRate: number
|
||||
crossFadeEndRate: number
|
||||
crossFadeOverlapRate: number
|
||||
|
||||
// advanced setting
|
||||
vfForceDisabled: boolean
|
||||
@ -64,6 +65,8 @@ const InitialSettingState: SettingState = {
|
||||
gpu: DefaultVoiceChangerRequestParamas.gpu,
|
||||
crossFadeOffsetRate: DefaultVoiceChangerRequestParamas.crossFadeOffsetRate,
|
||||
crossFadeEndRate: DefaultVoiceChangerRequestParamas.crossFadeEndRate,
|
||||
crossFadeOverlapRate: DefaultVoiceChangerRequestParamas.crossFadeOverlapRate,
|
||||
|
||||
vfForceDisabled: DefaultVoiceChangerOptions.forceVfDisable,
|
||||
voiceChangerMode: DefaultVoiceChangerOptions.voiceChangerMode
|
||||
}
|
||||
@ -325,6 +328,16 @@ export const useClient = (props: UseClientProps): ClientState => {
|
||||
})()
|
||||
}, [settingState.crossFadeEndRate])
|
||||
|
||||
// (f) crossfade設定3
|
||||
useEffect(() => {
|
||||
(async () => {
|
||||
await initializedPromise
|
||||
const info = await voiceChangerClientRef.current!.updateServerSettings(ServerSettingKey.crossFadeOverlapRate, "" + settingState.crossFadeOverlapRate)
|
||||
setServerInfo(info)
|
||||
})()
|
||||
}, [settingState.crossFadeOverlapRate])
|
||||
|
||||
|
||||
// (2-5) advanced setting
|
||||
//// VFDisableはinput設定で合わせて設定。
|
||||
// (a) voice changer mode
|
||||
@ -394,7 +407,8 @@ export const useClient = (props: UseClientProps): ClientState => {
|
||||
if (colab == "true") {
|
||||
setSettingState({
|
||||
...settingState,
|
||||
protocol: "rest"
|
||||
protocol: "rest",
|
||||
inputChunkNum: 64
|
||||
})
|
||||
}
|
||||
}, [])
|
||||
|
4
client/lib/package-lock.json
generated
4
client/lib/package-lock.json
generated
@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "@dannadori/voice-changer-client-js",
|
||||
"version": "1.0.5",
|
||||
"version": "1.0.6",
|
||||
"lockfileVersion": 2,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@dannadori/voice-changer-client-js",
|
||||
"version": "1.0.5",
|
||||
"version": "1.0.6",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"@types/readable-stream": "^2.3.15",
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@dannadori/voice-changer-client-js",
|
||||
"version": "1.0.5",
|
||||
"version": "1.0.6",
|
||||
"description": "",
|
||||
"main": "dist/index.js",
|
||||
"directories": {
|
||||
|
@ -13,6 +13,7 @@ export type VoiceChangerRequestParamas = {
|
||||
crossFadeLowerValue: number,
|
||||
crossFadeOffsetRate: number,
|
||||
crossFadeEndRate: number,
|
||||
crossFadeOverlapRate: number,
|
||||
|
||||
}
|
||||
|
||||
@ -106,6 +107,7 @@ export const ServerSettingKey = {
|
||||
"gpu": "gpu",
|
||||
"crossFadeOffsetRate": "crossFadeOffsetRate",
|
||||
"crossFadeEndRate": "crossFadeEndRate",
|
||||
"crossFadeOverlapRate": "crossFadeOverlapRate",
|
||||
"framework": "framework",
|
||||
"onnxExecutionProvider": "onnxExecutionProvider"
|
||||
} as const
|
||||
@ -119,7 +121,8 @@ export const DefaultVoiceChangerRequestParamas: VoiceChangerRequestParamas = {
|
||||
gpu: 0,
|
||||
crossFadeLowerValue: 0.1,
|
||||
crossFadeOffsetRate: 0.1,
|
||||
crossFadeEndRate: 0.9
|
||||
crossFadeEndRate: 0.9,
|
||||
crossFadeOverlapRate: 0.5
|
||||
}
|
||||
|
||||
export const DefaultVoiceChangerOptions: VoiceChangerOptions = {
|
||||
|
@ -78,6 +78,9 @@ if __name__ == thisFilename or args.colab == True:
|
||||
MODEL = args.m if args.m != None else None
|
||||
ONNX_MODEL = args.o if args.o != None else None
|
||||
|
||||
|
||||
if args.colab == True:
|
||||
os.environ["colab"] = True
|
||||
# if os.getenv("EX_TB_PORT"):
|
||||
# EX_TB_PORT = os.environ["EX_TB_PORT"]
|
||||
# exApplitionInfo.external_tensorboard_port = int(EX_TB_PORT)
|
||||
|
@ -24,6 +24,7 @@ class VocieChangerSettings():
|
||||
dstId:int = 100
|
||||
crossFadeOffsetRate:float = 0.1
|
||||
crossFadeEndRate:float = 0.9
|
||||
crossFadeOverlapRate:float = 0.9
|
||||
convertChunkNum:int = 32
|
||||
framework:str = "PyTorch" # PyTorch or ONNX
|
||||
pyTorchModelFile:str = ""
|
||||
@ -31,7 +32,7 @@ class VocieChangerSettings():
|
||||
configFile:str = ""
|
||||
# ↓mutableな物だけ列挙
|
||||
intData = ["gpu","srcId", "dstId", "convertChunkNum"]
|
||||
floatData = [ "crossFadeOffsetRate", "crossFadeEndRate",]
|
||||
floatData = [ "crossFadeOffsetRate", "crossFadeEndRate", "crossFadeOverlapRate"]
|
||||
strData = ["framework"]
|
||||
|
||||
class VoiceChanger():
|
||||
@ -44,6 +45,8 @@ class VoiceChanger():
|
||||
self.onnx_session = None
|
||||
self.currentCrossFadeOffsetRate=0
|
||||
self.currentCrossFadeEndRate=0
|
||||
self.currentCrossFadeOverlapRate=0
|
||||
|
||||
# 共通で使用する情報を収集
|
||||
self.hps = utils.get_hparams_from_file(config)
|
||||
self.gpu_num = torch.cuda.device_count()
|
||||
@ -133,20 +136,24 @@ class VoiceChanger():
|
||||
|
||||
def _generate_strength(self, unpackedData):
|
||||
|
||||
if self.unpackedData_length != unpackedData.shape[0] or self.currentCrossFadeOffsetRate != self.settings.crossFadeOffsetRate or self.currentCrossFadeEndRate != self.settings.crossFadeEndRate :
|
||||
if self.unpackedData_length != unpackedData.shape[0] or self.currentCrossFadeOffsetRate != self.settings.crossFadeOffsetRate or self.currentCrossFadeEndRate != self.settings.crossFadeEndRate or self.currentCrossFadeOverlapRate != self.settings.crossFadeOverlapRate:
|
||||
self.unpackedData_length = unpackedData.shape[0]
|
||||
self.currentCrossFadeOffsetRate = self.settings.crossFadeOffsetRate
|
||||
self.currentCrossFadeEndRate = self.settings.crossFadeEndRate
|
||||
cf_offset = int(unpackedData.shape[0] * self.settings.crossFadeOffsetRate)
|
||||
cf_end = int(unpackedData.shape[0] * self.settings.crossFadeEndRate)
|
||||
self.currentCrossFadeOverlapRate = self.settings.crossFadeOverlapRate
|
||||
|
||||
overlapSize = int(unpackedData.shape[0] * self.settings.crossFadeOverlapRate)
|
||||
|
||||
cf_offset = int(overlapSize * self.settings.crossFadeOffsetRate)
|
||||
cf_end = int(overlapSize * self.settings.crossFadeEndRate)
|
||||
cf_range = cf_end - cf_offset
|
||||
percent = np.arange(cf_range) / cf_range
|
||||
|
||||
np_prev_strength = np.cos(percent * 0.5 * np.pi) ** 2
|
||||
np_cur_strength = np.cos((1-percent) * 0.5 * np.pi) ** 2
|
||||
|
||||
self.np_prev_strength = np.concatenate([np.ones(cf_offset), np_prev_strength, np.zeros(unpackedData.shape[0]-cf_offset-len(np_prev_strength))])
|
||||
self.np_cur_strength = np.concatenate([np.zeros(cf_offset), np_cur_strength, np.ones(unpackedData.shape[0]-cf_offset-len(np_cur_strength))])
|
||||
self.np_prev_strength = np.concatenate([np.ones(cf_offset), np_prev_strength, np.zeros(overlapSize - cf_offset - len(np_prev_strength))])
|
||||
self.np_cur_strength = np.concatenate([np.zeros(cf_offset), np_cur_strength, np.ones(overlapSize - cf_offset - len(np_cur_strength))])
|
||||
|
||||
self.prev_strength = torch.FloatTensor(self.np_prev_strength)
|
||||
self.cur_strength = torch.FloatTensor(self.np_cur_strength)
|
||||
@ -199,16 +206,19 @@ class VoiceChanger():
|
||||
"sid_tgt": sid_tgt1.numpy()
|
||||
})[0][0,0] * self.hps.data.max_wav_value
|
||||
if hasattr(self, 'np_prev_audio1') == True:
|
||||
prev = self.np_prev_audio1[-1*inputSize:]
|
||||
cur = audio1[-2*inputSize:-1*inputSize]
|
||||
# print(prev.shape, self.np_prev_strength.shape, cur.shape, self.np_cur_strength.shape)
|
||||
powered_prev = prev * self.np_prev_strength
|
||||
powered_cur = cur * self.np_cur_strength
|
||||
result = powered_prev + powered_cur
|
||||
#result = prev * self.np_prev_strength + cur * self.np_cur_strength
|
||||
overlapSize = int(inputSize * self.settings.crossFadeOverlapRate)
|
||||
prev_overlap = self.np_prev_audio1[-1*overlapSize:]
|
||||
cur_overlap = audio1[-1*(inputSize + overlapSize) :-1*inputSize]
|
||||
# print(prev_overlap.shape, self.np_prev_strength.shape, cur_overlap.shape, self.np_cur_strength.shape)
|
||||
# print(">>>>>>>>>>>", -1*(inputSize + overlapSize) , -1*inputSize)
|
||||
powered_prev = prev_overlap * self.np_prev_strength
|
||||
powered_cur = cur_overlap * self.np_cur_strength
|
||||
powered_result = powered_prev + powered_cur
|
||||
|
||||
cur = audio1[-1*inputSize:-1*overlapSize]
|
||||
result = np.concatenate([powered_result, cur],axis=0)
|
||||
else:
|
||||
cur = audio1[-2*inputSize:-1*inputSize]
|
||||
result = cur
|
||||
result = np.zeros(1).astype(np.int16)
|
||||
self.np_prev_audio1 = audio1
|
||||
return result
|
||||
|
||||
@ -274,10 +284,11 @@ class VoiceChanger():
|
||||
|
||||
def on_request(self, unpackedData:any):
|
||||
convertSize = self.settings.convertChunkNum * 128 # 128sample/1chunk
|
||||
if unpackedData.shape[0] * 2 > convertSize:
|
||||
convertSize = unpackedData.shape[0] * 2
|
||||
|
||||
# print("convert Size", convertChunkNum, convertSize)
|
||||
if unpackedData.shape[0]*(1 + self.settings.crossFadeOverlapRate) + 1024 > convertSize:
|
||||
convertSize = int(unpackedData.shape[0]*(1 + self.settings.crossFadeOverlapRate)) + 1024
|
||||
|
||||
# print("convert Size", unpackedData.shape[0], unpackedData.shape[0]*(1 + self.settings.crossFadeOverlapRate), convertSize)
|
||||
|
||||
self._generate_strength(unpackedData)
|
||||
data = self._generate_input(unpackedData, convertSize)
|
||||
|
Loading…
Reference in New Issue
Block a user