cpu

2025-02-02 16:23:58 +03:00 · 2023-01-05 18:56:02 +09:00 · 2023-01-05 18:56:02 +09:00 · 8486076779
commit 8486076779
parent 07a14e6bcd
4 changed files with 25 additions and 11 deletions
--- a/client/demo/src/index.tsx
+++ b/client/demo/src/index.tsx
@ -149,7 +149,7 @@ const App = () => {
            <>
                <div className="body-row split-3-1-1-1-4 left-padding-1 highlight">
                    <div className="body-item-title">monitor:</div>
-                    <div className="body-item-text">vol(db):{volume.toFixed(4)}</div>
+                    <div className="body-item-text">vol(rms):{volume.toFixed(4)}</div>
                    <div className="body-item-text">buf(ms):{bufferingTime}</div>
                    <div className="body-item-text">res(ms):{responseTime}</div>
                    <div className="body-item-text"></div>
--- a/client/demo/src/options_microphone.tsx
+++ b/client/demo/src/options_microphone.tsx
@ -133,12 +133,9 @@ export const useMicrophoneOptions = (audioContext?: AudioContext): MicrophoneOpt
    }, [inputAudioDeviceInfo, audioInput])

    const audioMediaInputRow = useMemo(() => {
-        console.log("GEN:audioMediaInputRow1")
        if (audioInput != "file") {
-            console.log("GEN:audioMediaInputRow2")
            return <></>
        }
-        console.log("GEN:audioMediaInputRow3")

        const onFileLoadClicked = async () => {
            const url = await fileSelectorAsDataURL("")
@ -185,7 +182,7 @@ export const useMicrophoneOptions = (audioContext?: AudioContext): MicrophoneOpt
            </div>
        )
    }, [audioInput, audioOutput])
-    console.log("GEN:audioMediaInputRow3")
+
    useEffect(() => {
        if (!audioContext) {
            return
--- a/client/lib/worklet/src/voice-changer-worklet-processor.ts
+++ b/client/lib/worklet/src/voice-changer-worklet-processor.ts
@ -58,7 +58,7 @@ class VoiceChangerWorkletProcessor extends AudioWorkletProcessor {
        }

        if (this.playBuffer.length === 0) {
-            console.log("[worklet] no play buffer")
+            // console.log("[worklet] no play buffer")
            return true
        }

--- a/server/voice_changer/VoiceChanger.py
+++ b/server/voice_changer/VoiceChanger.py
@ -112,8 +112,25 @@ class VoiceChanger():
                    x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [
                        x.cpu() for x in data]
                    sid_tgt1 = torch.LongTensor([dstId]).cpu()
-                    audio1 = (self.net_g.cpu().voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt1)[
-                              0][0, 0].data * self.hps.data.max_wav_value).cpu().float().numpy()
+                    audio1 = (self.net_g.cpu().voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt1)[0][0, 0].data * self.hps.data.max_wav_value)
+
+                    if self.prev_strength.device != torch.device('cpu'):
+                        print(f"prev_strength move from {self.prev_strength.device} to cpu")
+                        self.prev_strength = self.prev_strength.cpu()
+                    if self.cur_strength.device != torch.device('cpu'):
+                        print(f"cur_strength move from {self.cur_strength.device} to cpu")
+                        self.cur_strength = self.cur_strength.cpu()
+
+                    if hasattr(self, 'prev_audio1') == True and self.prev_audio1.device == torch.device('cpu'):
+                        prev = self.prev_audio1[-1*unpackedData.shape[0]:]
+                        cur  = audio1[-2*unpackedData.shape[0]:-1*unpackedData.shape[0]]
+                        result = prev * self.prev_strength + cur * self.cur_strength
+                    else:
+                        cur = audio1[-2*unpackedData.shape[0]:-1*unpackedData.shape[0]]
+                        result = cur
+
+                    self.prev_audio1 = audio1
+                    result = result.cpu().float().numpy()
            # elif self.mps_enabled == True: # MPS doesnt support aten::weight_norm_interface, and PYTORCH_ENABLE_MPS_FALLBACK=1 cause a big dely.
            #         x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [
            #             x.to("mps") for x in data]
@ -137,15 +154,15 @@ class VoiceChanger():



-                    if hasattr(self, 'prev_audio1') == True:
+                    if hasattr(self, 'prev_audio1') == True and self.prev_audio1.device == torch.device('cuda', gpu):
                        prev = self.prev_audio1[-1*unpackedData.shape[0]:]
                        cur  = audio1[-2*unpackedData.shape[0]:-1*unpackedData.shape[0]]
                        result = prev * self.prev_strength + cur * self.cur_strength
-                        # print("merging...", prev.shape, cur.shape)
+                        print("merging...", prev.shape, cur.shape)
                    else:
                        cur = audio1[-2*unpackedData.shape[0]:-1*unpackedData.shape[0]]
                        result = cur
-                        # print("no merging...", cur.shape)
+                        print("no merging...", cur.shape)
                    self.prev_audio1 = audio1

                    #print(result)