From 84860767791fcb9e383563747d229ec9182b28d6 Mon Sep 17 00:00:00 2001 From: wataru Date: Thu, 5 Jan 2023 18:56:02 +0900 Subject: [PATCH] cpu --- client/demo/src/index.tsx | 2 +- client/demo/src/options_microphone.tsx | 5 +--- .../src/voice-changer-worklet-processor.ts | 2 +- server/voice_changer/VoiceChanger.py | 27 +++++++++++++++---- 4 files changed, 25 insertions(+), 11 deletions(-) diff --git a/client/demo/src/index.tsx b/client/demo/src/index.tsx index 687ef912..d4a427be 100644 --- a/client/demo/src/index.tsx +++ b/client/demo/src/index.tsx @@ -149,7 +149,7 @@ const App = () => { <>
monitor:
-
vol(db):{volume.toFixed(4)}
+
vol(rms):{volume.toFixed(4)}
buf(ms):{bufferingTime}
res(ms):{responseTime}
diff --git a/client/demo/src/options_microphone.tsx b/client/demo/src/options_microphone.tsx index 1963ffd7..e297f653 100644 --- a/client/demo/src/options_microphone.tsx +++ b/client/demo/src/options_microphone.tsx @@ -133,12 +133,9 @@ export const useMicrophoneOptions = (audioContext?: AudioContext): MicrophoneOpt }, [inputAudioDeviceInfo, audioInput]) const audioMediaInputRow = useMemo(() => { - console.log("GEN:audioMediaInputRow1") if (audioInput != "file") { - console.log("GEN:audioMediaInputRow2") return <> } - console.log("GEN:audioMediaInputRow3") const onFileLoadClicked = async () => { const url = await fileSelectorAsDataURL("") @@ -185,7 +182,7 @@ export const useMicrophoneOptions = (audioContext?: AudioContext): MicrophoneOpt
) }, [audioInput, audioOutput]) - console.log("GEN:audioMediaInputRow3") + useEffect(() => { if (!audioContext) { return diff --git a/client/lib/worklet/src/voice-changer-worklet-processor.ts b/client/lib/worklet/src/voice-changer-worklet-processor.ts index c7d660d7..3de70845 100644 --- a/client/lib/worklet/src/voice-changer-worklet-processor.ts +++ b/client/lib/worklet/src/voice-changer-worklet-processor.ts @@ -58,7 +58,7 @@ class VoiceChangerWorkletProcessor extends AudioWorkletProcessor { } if (this.playBuffer.length === 0) { - console.log("[worklet] no play buffer") + // console.log("[worklet] no play buffer") return true } diff --git a/server/voice_changer/VoiceChanger.py b/server/voice_changer/VoiceChanger.py index 390770c6..3567c2c8 100755 --- a/server/voice_changer/VoiceChanger.py +++ b/server/voice_changer/VoiceChanger.py @@ -112,8 +112,25 @@ class VoiceChanger(): x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [ x.cpu() for x in data] sid_tgt1 = torch.LongTensor([dstId]).cpu() - audio1 = (self.net_g.cpu().voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt1)[ - 0][0, 0].data * self.hps.data.max_wav_value).cpu().float().numpy() + audio1 = (self.net_g.cpu().voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt1)[0][0, 0].data * self.hps.data.max_wav_value) + + if self.prev_strength.device != torch.device('cpu'): + print(f"prev_strength move from {self.prev_strength.device} to cpu") + self.prev_strength = self.prev_strength.cpu() + if self.cur_strength.device != torch.device('cpu'): + print(f"cur_strength move from {self.cur_strength.device} to cpu") + self.cur_strength = self.cur_strength.cpu() + + if hasattr(self, 'prev_audio1') == True and self.prev_audio1.device == torch.device('cpu'): + prev = self.prev_audio1[-1*unpackedData.shape[0]:] + cur = audio1[-2*unpackedData.shape[0]:-1*unpackedData.shape[0]] + result = prev * self.prev_strength + cur * self.cur_strength + else: + cur = audio1[-2*unpackedData.shape[0]:-1*unpackedData.shape[0]] + result = cur + + self.prev_audio1 = audio1 + result = result.cpu().float().numpy() # elif self.mps_enabled == True: # MPS doesnt support aten::weight_norm_interface, and PYTORCH_ENABLE_MPS_FALLBACK=1 cause a big dely. # x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [ # x.to("mps") for x in data] @@ -137,15 +154,15 @@ class VoiceChanger(): - if hasattr(self, 'prev_audio1') == True: + if hasattr(self, 'prev_audio1') == True and self.prev_audio1.device == torch.device('cuda', gpu): prev = self.prev_audio1[-1*unpackedData.shape[0]:] cur = audio1[-2*unpackedData.shape[0]:-1*unpackedData.shape[0]] result = prev * self.prev_strength + cur * self.cur_strength - # print("merging...", prev.shape, cur.shape) + print("merging...", prev.shape, cur.shape) else: cur = audio1[-2*unpackedData.shape[0]:-1*unpackedData.shape[0]] result = cur - # print("no merging...", cur.shape) + print("no merging...", cur.shape) self.prev_audio1 = audio1 #print(result)