From 808278364462002107eadc07968f0db3afbc8884 Mon Sep 17 00:00:00 2001
From: wataru <wataru@fdev.local.com>
Date: Sun, 8 Jan 2023 17:58:27 +0900
Subject: [PATCH] WIP: alpha

---
 client/demo/src/100_options_microphone.tsx  |  43 ++--
 client/demo/src/103_speaker_setting.tsx     |   1 -
 client/demo/src/106_server_control.tsx      |   2 +-
 client/demo/src/hooks/useClient.ts          |   2 -
 client/lib/src/VoiceChangerClient.ts        |   5 +-
 server/const.py                             |   5 +
 server/sio/MMVC_Namespace.py                |  20 +-
 server/voice_changer/VoiceChanger.py        | 220 ++++++++++----------
 server/voice_changer/VoiceChangerManager.py |  16 +-
 9 files changed, 141 insertions(+), 173 deletions(-)

diff --git a/client/demo/src/100_options_microphone.tsx b/client/demo/src/100_options_microphone.tsx
index 323ac667..fd6e7200 100644
--- a/client/demo/src/100_options_microphone.tsx
+++ b/client/demo/src/100_options_microphone.tsx
@@ -53,22 +53,22 @@ export const useMicrophoneOptions = () => {
     useEffect(() => {
         if (!clientState.clientInitialized) return
         clientState.setServerUrl(serverSetting.mmvcServerUrl)
-    }, [serverSetting.mmvcServerUrl])
+    }, [clientState.clientInitialized, serverSetting.mmvcServerUrl])
     //// プロトコル変更
     useEffect(() => {
         if (!clientState.clientInitialized) return
         clientState.setProtocol(serverSetting.protocol)
-    }, [serverSetting.protocol])
+    }, [clientState.clientInitialized, serverSetting.protocol])
     //// フレームワーク変更
     useEffect(() => {
         if (!clientState.clientInitialized) return
         clientState.updateSettings(ServerSettingKey.framework, serverSetting.framework)
-    }, [serverSetting.framework])
+    }, [clientState.clientInitialized, serverSetting.framework])
     //// OnnxExecutionProvider変更
     useEffect(() => {
         if (!clientState.clientInitialized) return
         clientState.updateSettings(ServerSettingKey.onnxExecutionProvider, serverSetting.onnxExecutionProvider)
-    }, [serverSetting.onnxExecutionProvider])
+    }, [clientState.clientInitialized, serverSetting.onnxExecutionProvider])
 
     // 102 DeviceSetting
     //// 入力情報の設定
@@ -82,58 +82,39 @@ export const useMicrophoneOptions = () => {
     useEffect(() => {
         if (!clientState.clientInitialized) return
         clientState.updateSettings(ServerSettingKey.srcId, speakerSetting.srcId)
-    }, [speakerSetting.srcId])
+    }, [clientState.clientInitialized, speakerSetting.srcId])
     useEffect(() => {
         if (!clientState.clientInitialized) return
         clientState.updateSettings(ServerSettingKey.dstId, speakerSetting.dstId)
-    }, [speakerSetting.dstId])
+    }, [clientState.clientInitialized, speakerSetting.dstId])
 
     // 104 ConvertSetting
     useEffect(() => {
         if (!clientState.clientInitialized) return
         clientState.setInputChunkNum(convertSetting.inputChunkNum)
-    }, [convertSetting.inputChunkNum])
+    }, [clientState.clientInitialized, convertSetting.inputChunkNum])
     useEffect(() => {
         if (!clientState.clientInitialized) return
         clientState.updateSettings(ServerSettingKey.convertChunkNum, convertSetting.convertChunkNum)
-    }, [convertSetting.convertChunkNum])
+    }, [clientState.clientInitialized, convertSetting.convertChunkNum])
     useEffect(() => {
         if (!clientState.clientInitialized) return
         clientState.updateSettings(ServerSettingKey.gpu, convertSetting.gpu)
-    }, [convertSetting.gpu])
+    }, [clientState.clientInitialized, convertSetting.gpu])
     useEffect(() => {
         if (!clientState.clientInitialized) return
         clientState.updateSettings(ServerSettingKey.crossFadeOffsetRate, convertSetting.crossFadeOffsetRate)
-    }, [convertSetting.crossFadeOffsetRate])
+    }, [clientState.clientInitialized, convertSetting.crossFadeOffsetRate])
     useEffect(() => {
         if (!clientState.clientInitialized) return
         clientState.updateSettings(ServerSettingKey.crossFadeEndRate, convertSetting.crossFadeEndRate)
-    }, [convertSetting.crossFadeEndRate])
+    }, [clientState.clientInitialized, convertSetting.crossFadeEndRate])
 
     // 105 AdvancedSetting
     useEffect(() => {
         if (!clientState.clientInitialized) return
         clientState.setVoiceChangerMode(advancedSetting.voiceChangerMode)
-    }, [advancedSetting.voiceChangerMode])
-
-
-    // // const [options, setOptions] = useState<MicrophoneOptionsState>(InitMicrophoneOptionsState)
-    // const [params, setParams] = useState<VoiceChangerRequestParamas>(DefaultVoiceChangerRequestParamas)
-    // const [options, setOptions] = useState<VoiceChangerOptions>(DefaultVoiceChangerOptions)
-    // const [isStarted, setIsStarted] = useState<boolean>(false)
-
-
-    // useEffect(() => {
-    //     const storeOptions = async () => {
-    //         if (CHROME_EXTENSION) {
-    //             // @ts-ignore
-    //             await chrome.storage.local.set({ microphoneOptions: options })
-    //         }
-    //     }
-    //     storeOptions()
-    // }, [options]) // loadより前に持ってくるとstorage内が初期化されるのでだめかも。（要検証）
-
-
+    }, [clientState.clientInitialized, advancedSetting.voiceChangerMode])
 
 
     const voiceChangerSetting = useMemo(() => {
diff --git a/client/demo/src/103_speaker_setting.tsx b/client/demo/src/103_speaker_setting.tsx
index 543a01c9..4564c261 100644
--- a/client/demo/src/103_speaker_setting.tsx
+++ b/client/demo/src/103_speaker_setting.tsx
@@ -86,7 +86,6 @@ export const useSpeakerSetting = () => {
                     <div className="body-button" onClick={onSetSpeakerMappingClicked}>set</div>
                 </div>
             </div>
-
         )
     }, [speakers, editSpeakerTargetId, editSpeakerTargetName])
 
diff --git a/client/demo/src/106_server_control.tsx b/client/demo/src/106_server_control.tsx
index bdfd054f..6d664ed8 100644
--- a/client/demo/src/106_server_control.tsx
+++ b/client/demo/src/106_server_control.tsx
@@ -38,7 +38,7 @@ export const useServerControl = (props: UseServerControlProps) => {
             </div>
 
         )
-    }, [isStarted])
+    }, [isStarted, props.convertStart, props.convertStop])
 
     const performanceRow = useMemo(() => {
         return (
diff --git a/client/demo/src/hooks/useClient.ts b/client/demo/src/hooks/useClient.ts
index a2286331..1e9259aa 100644
--- a/client/demo/src/hooks/useClient.ts
+++ b/client/demo/src/hooks/useClient.ts
@@ -94,7 +94,6 @@ export const useClient = (props: UseClientProps): ClientState => {
                 return
             }
             voiceChangerClientRef.current.setProtocol(protocol)
-            voiceChangerClientRef.current.stop()
         }
     }, [])
 
@@ -105,7 +104,6 @@ export const useClient = (props: UseClientProps): ClientState => {
                 return
             }
             voiceChangerClientRef.current.setInputChunkNum(num)
-            voiceChangerClientRef.current.stop()
         }
     }, [])
 
diff --git a/client/lib/src/VoiceChangerClient.ts b/client/lib/src/VoiceChangerClient.ts
index ed416ce0..8a2e3c27 100644
--- a/client/lib/src/VoiceChangerClient.ts
+++ b/client/lib/src/VoiceChangerClient.ts
@@ -33,6 +33,8 @@ export class VoiceChnagerClient {
     private promiseForInitialize: Promise<void>
     private _isVoiceChanging = false
 
+    private sslCertified: string[] = []
+
     private callbacks: Callbacks = {
         onVoiceReceived: (voiceChangerMode: VoiceChangerMode, data: ArrayBuffer): void => {
             // console.log(voiceChangerMode, data)
@@ -176,11 +178,12 @@ export class VoiceChnagerClient {
         const pageUrl = `${location.protocol}//${location.host}`
         console.log("SERVER CHECK", url, pageUrl)
 
-        if (url != pageUrl && location.protocol == "https:") {
+        if (url != pageUrl && location.protocol == "https:" && this.sslCertified.includes(url) == false) {
             if (openTab) {
                 const value = window.confirm("MMVC Server is different from this page's origin. Open tab to open ssl connection. OK? (You can close the opened tab after ssl connection succeed.)");
                 if (value) {
                     window.open(url, '_blank')
+                    this.sslCertified.push(url)
                 } else {
                     alert("Your voice conversion may fail...")
                 }
diff --git a/server/const.py b/server/const.py
index 47e9ac93..35fca3a1 100644
--- a/server/const.py
+++ b/server/const.py
@@ -1 +1,6 @@
 frontend_path = "../client/demo/dist"
+
+
+ERROR_NO_ONNX_SESSION = "ERROR_NO_ONNX_SESSION"
+
+
diff --git a/server/sio/MMVC_Namespace.py b/server/sio/MMVC_Namespace.py
index 6b8c03c1..dfde39ac 100644
--- a/server/sio/MMVC_Namespace.py
+++ b/server/sio/MMVC_Namespace.py
@@ -20,24 +20,14 @@ class MMVC_Namespace(socketio.AsyncNamespace):
         pass
 
     async def on_request_message(self, sid, msg):
-        # print("on_request_message", torch.cuda.memory_allocated())
-        gpu = int(msg[0])
-        srcId = int(msg[1])
-        dstId = int(msg[2])
-        timestamp = int(msg[3])
-        convertChunkNum = int(msg[4])
-        crossFadeLowerValue = float(msg[5])
-        crossFadeOffsetRate = float(msg[6])
-        crossFadeEndRate = float(msg[7])
-        data = msg[8]
+        timestamp = int(msg[0])
+        data = msg[1]
         unpackedData = np.array(struct.unpack('<%sh' % (len(data) // struct.calcsize('<h')), data))
-
-        audio1 = self.voiceChangerManager.changeVoice(
-            gpu, srcId, dstId, timestamp, convertChunkNum, crossFadeLowerValue, crossFadeOffsetRate, crossFadeEndRate, unpackedData)
+        audio1 = self.voiceChangerManager.changeVoice(unpackedData)
         
         # print("sio result:", len(audio1), audio1.shape)
-        # bin = struct.pack('<%sh' % len(audio1), *audio1)
-        # await self.emit('response', [timestamp, bin])
+        bin = struct.pack('<%sh' % len(audio1), *audio1)
+        await self.emit('response', [timestamp, bin])
 
     def on_disconnect(self, sid):
         # print('[{}] disconnect'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
diff --git a/server/voice_changer/VoiceChanger.py b/server/voice_changer/VoiceChanger.py
index 6baef42f..96baed36 100755
--- a/server/voice_changer/VoiceChanger.py
+++ b/server/voice_changer/VoiceChanger.py
@@ -1,3 +1,4 @@
+from const import ERROR_NO_ONNX_SESSION
 import torch
 import math, os, traceback
 from scipy.io.wavfile import write, read
@@ -24,13 +25,13 @@ class VocieChangerSettings():
     crossFadeOffsetRate:float = 0.1
     crossFadeEndRate:float = 0.9
     convertChunkNum:int = 32
-    framework:str = "PyTorch"
+    framework:str = "PyTorch" # PyTorch or ONNX
     pyTorch_model_file:str = ""
     onnx_model_file:str = ""
     config_file:str = ""
     # ↓mutableな物だけ列挙
-    intData = ["srcId", "dstId", "convertChunkNum"]
-    floatData = ["gpu", "crossFadeOffsetRate", "crossFadeEndRate",]
+    intData = ["gpu","srcId", "dstId", "convertChunkNum"]
+    floatData = [ "crossFadeOffsetRate", "crossFadeEndRate",]
     strData = ["framework"]
 
 class VoiceChanger():
@@ -98,10 +99,20 @@ class VoiceChanger():
 
     def update_setteings(self, key:str, val:any):
         if key == "onnxExecutionProvider":
-            self.onnx_session.set_providers(providers=[val])
+            if val == "CUDAExecutionProvider":
+                provider_options=[{'device_id': self.settings.gpu}]
+                self.onnx_session.set_providers(providers=[val], provider_options=provider_options)
+            else:
+                self.onnx_session.set_providers(providers=[val])
             return self.get_info()
         elif key in self.settings.intData:
             setattr(self.settings, key, int(val))
+            if key == "gpu" and val >= 0 and val < self.gpu_num and hasattr(self, "onnx_session"):
+                providers = self.onnx_session.get_providers()
+                print("Providers::::", providers)
+                if "CUDAExecutionProvider" in providers:
+                    provider_options=[{'device_id': self.settings.gpu}]
+                    self.onnx_session.set_providers(providers=["CUDAExecutionProvider"], provider_options=provider_options)
             return self.get_info()
         elif key in self.settings.floatData:
             setattr(self.settings, key, float(val))
@@ -113,22 +124,7 @@ class VoiceChanger():
             print(f"{key} is not mutalbe variable!")
             return self.get_info()
 
-    
-    # def set_gpu(self, gpu:int):
-    #     self.settings.gpu = gpu
-    #     return {"gpu":self.settings.gpu}
 
-    # def set_crossfade_setting(self, crossFadeOffsetRate:float, crossFadeEndRate:float):
-    #     self.settings.crossFadeOffsetRate = crossFadeOffsetRate
-    #     self.settings.crossFadeEndRate = crossFadeEndRate
-    #     self.unpackedData_length = 0 # 次のVC時にStrengthを再計算させるため。
-    
-    # def set_conversion_setting(self, srcId:int, dstId:int):
-    #     self.settings.srcId = srcId
-    #     self.settings.dstId = dstId
-
-    # def set_convert_chunk_num(self, convertChunkNum):
-    #     self.settings.convertChunkNum = convertChunkNum
 
     def _generate_strength(self, unpackedData):
 
@@ -179,6 +175,91 @@ class VoiceChanger():
         return data
 
 
+    def _onnx_inference(self, data, inputSize):
+        if hasattr(self, 'onnx_session'):
+            x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x for x in data]
+            sid_tgt1 = torch.LongTensor([self.settings.dstId])
+            # if spec.size()[2] >= 8:
+            audio1 = self.onnx_session.run(
+                ["audio"],
+                {
+                    "specs": spec.numpy(),
+                    "lengths": spec_lengths.numpy(),
+                    "sid_src": sid_src.numpy(),
+                    "sid_tgt": sid_tgt1.numpy()
+                })[0][0,0] * self.hps.data.max_wav_value
+            if hasattr(self, 'np_prev_audio1') == True:
+                prev = self.np_prev_audio1[-1*inputSize:]
+                cur  = audio1[-2*inputSize:-1*inputSize]
+                # print(prev.shape, self.np_prev_strength.shape, cur.shape, self.np_cur_strength.shape)
+                powered_prev = prev * self.np_prev_strength
+                powered_cur = cur * self.np_cur_strength
+                result = powered_prev + powered_cur
+                #result = prev * self.np_prev_strength + cur * self.np_cur_strength
+            else:
+                cur = audio1[-2*inputSize:-1*inputSize]
+                result = cur
+            self.np_prev_audio1 = audio1
+            return result
+        else:
+            raise ValueError(ERROR_NO_ONNX_SESSION, "No ONNX Session.")
+
+    def _pyTorch_inference(self, data, inputSize):
+        if self.settings.gpu < 0 or self.gpu_num == 0:
+            with torch.no_grad():
+                x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x.cpu() for x in data]
+                sid_tgt1 = torch.LongTensor([self.settings.dstId]).cpu()
+                audio1 = (self.net_g.cpu().voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt1)[0][0, 0].data * self.hps.data.max_wav_value)
+
+                if self.prev_strength.device != torch.device('cpu'):
+                    print(f"prev_strength move from {self.prev_strength.device} to cpu")
+                    self.prev_strength = self.prev_strength.cpu()
+                if self.cur_strength.device != torch.device('cpu'):
+                    print(f"cur_strength move from {self.cur_strength.device} to cpu")
+                    self.cur_strength = self.cur_strength.cpu()
+
+                if hasattr(self, 'prev_audio1') == True and self.prev_audio1.device == torch.device('cpu'):
+                    prev = self.prev_audio1[-1*inputSize:]
+                    cur  = audio1[-2*inputSize:-1*inputSize]
+                    result = prev * self.prev_strength + cur * self.cur_strength
+                else:
+                    cur = audio1[-2*inputSize:-1*inputSize]
+                    result = cur
+
+                self.prev_audio1 = audio1
+                result = result.cpu().float().numpy()
+
+        else:
+            with torch.no_grad():
+                x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x.cuda(self.settings.gpu) for x in data]
+                sid_tgt1 = torch.LongTensor([self.settings.dstId]).cuda(self.settings.gpu)
+                audio1 = self.net_g.cuda(self.settings.gpu).voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt1)[0][0, 0].data * self.hps.data.max_wav_value
+
+                if self.prev_strength.device != torch.device('cuda', self.settings.gpu):
+                    print(f"prev_strength move from {self.prev_strength.device} to gpu{self.settings.gpu}")
+                    self.prev_strength = self.prev_strength.cuda(self.settings.gpu)
+                if self.cur_strength.device != torch.device('cuda', self.settings.gpu):
+                    print(f"cur_strength move from {self.cur_strength.device} to gpu{self.settings.gpu}")
+                    self.cur_strength = self.cur_strength.cuda(self.settings.gpu)
+
+
+
+                if hasattr(self, 'prev_audio1') == True and self.prev_audio1.device == torch.device('cuda', self.settings.gpu):
+                    prev = self.prev_audio1[-1*inputSize:]
+                    cur  = audio1[-2*inputSize:-1*inputSize]
+                    result = prev * self.prev_strength + cur * self.cur_strength
+                    # print("merging...", prev.shape, cur.shape)
+                else:
+                    cur = audio1[-2*inputSize:-1*inputSize]
+                    result = cur
+                    # print("no merging...", cur.shape)
+                self.prev_audio1 = audio1
+
+                #print(result)                    
+                result = result.cpu().float().numpy()
+        return result
+            
+
     def on_request(self,  unpackedData:any):
         convertSize = self.settings.convertChunkNum * 128 # 128sample/1chunk
         if unpackedData.shape[0] * 2 > convertSize:
@@ -189,96 +270,21 @@ class VoiceChanger():
         self._generate_strength(unpackedData)
         data = self._generate_input(unpackedData, convertSize)
 
-        # try:
-        #     # if gpu < 0 or (self.gpu_num == 0 and not self.mps_enabled):
-        #     if self.gpu == -2 and hasattr(self, 'onnx_session') == True:
-        #         x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x for x in data]
-        #         sid_tgt1 = torch.LongTensor([self.dstId])
-        #         # if spec.size()[2] >= 8:
-        #         audio1 = self.onnx_session.run(
-        #             ["audio"],
-        #             {
-        #                 "specs": spec.numpy(),
-        #                 "lengths": spec_lengths.numpy(),
-        #                 "sid_src": sid_src.numpy(),
-        #                 "sid_tgt": sid_tgt1.numpy()
-        #             })[0][0,0] * self.hps.data.max_wav_value
-        #         if hasattr(self, 'np_prev_audio1') == True:
-        #             prev = self.np_prev_audio1[-1*unpackedData.shape[0]:]
-        #             cur  = audio1[-2*unpackedData.shape[0]:-1*unpackedData.shape[0]]
-        #             # print(prev.shape, self.np_prev_strength.shape, cur.shape, self.np_cur_strength.shape)
-        #             powered_prev = prev * self.np_prev_strength
-        #             powered_cur = cur * self.np_cur_strength
-        #             result = powered_prev + powered_cur
-        #             #result = prev * self.np_prev_strength + cur * self.np_cur_strength
-        #         else:
-        #             cur = audio1[-2*unpackedData.shape[0]:-1*unpackedData.shape[0]]
-        #             result = cur
-        #         self.np_prev_audio1 = audio1
 
-        #     elif self.gpu < 0 or self.gpu_num == 0:
-        #         with torch.no_grad():
-        #             x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [
-        #                 x.cpu() for x in data]
-        #             sid_tgt1 = torch.LongTensor([self.dstId]).cpu()
-        #             audio1 = (self.net_g.cpu().voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt1)[0][0, 0].data * self.hps.data.max_wav_value)
-
-        #             if self.prev_strength.device != torch.device('cpu'):
-        #                 print(f"prev_strength move from {self.prev_strength.device} to cpu")
-        #                 self.prev_strength = self.prev_strength.cpu()
-        #             if self.cur_strength.device != torch.device('cpu'):
-        #                 print(f"cur_strength move from {self.cur_strength.device} to cpu")
-        #                 self.cur_strength = self.cur_strength.cpu()
-
-        #             if hasattr(self, 'prev_audio1') == True and self.prev_audio1.device == torch.device('cpu'):
-        #                 prev = self.prev_audio1[-1*unpackedData.shape[0]:]
-        #                 cur  = audio1[-2*unpackedData.shape[0]:-1*unpackedData.shape[0]]
-        #                 result = prev * self.prev_strength + cur * self.cur_strength
-        #             else:
-        #                 cur = audio1[-2*unpackedData.shape[0]:-1*unpackedData.shape[0]]
-        #                 result = cur
-
-        #             self.prev_audio1 = audio1
-        #             result = result.cpu().float().numpy()
-
-        #     else:
-        #         with torch.no_grad():
-        #             x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x.cuda(self.gpu) for x in data]
-        #             sid_tgt1 = torch.LongTensor([self.dstId]).cuda(self.gpu)
-        #             audio1 = self.net_g.cuda(self.gpu).voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt1)[0][0, 0].data * self.hps.data.max_wav_value
-
-        #             if self.prev_strength.device != torch.device('cuda', self.gpu):
-        #                 print(f"prev_strength move from {self.prev_strength.device} to gpu{self.gpu}")
-        #                 self.prev_strength = self.prev_strength.cuda(self.gpu)
-        #             if self.cur_strength.device != torch.device('cuda', self.gpu):
-        #                 print(f"cur_strength move from {self.cur_strength.device} to gpu{self.gpu}")
-        #                 self.cur_strength = self.cur_strength.cuda(self.gpu)
+        try:
+            if self.settings.framework == "ONNX":
+                result = self._onnx_inference(data, unpackedData.shape[0])
+            else:
+                result = self._pyTorch_inference(data, unpackedData.shape[0])
 
 
+        except Exception as e:
+            print("VC PROCESSING!!!! EXCEPTION!!!", e)            
+            print(traceback.format_exc())
+            del self.np_prev_audio1
+            del self.prev_audio1
 
-        #             if hasattr(self, 'prev_audio1') == True and self.prev_audio1.device == torch.device('cuda', self.gpu):
-        #                 prev = self.prev_audio1[-1*unpackedData.shape[0]:]
-        #                 cur  = audio1[-2*unpackedData.shape[0]:-1*unpackedData.shape[0]]
-        #                 result = prev * self.prev_strength + cur * self.cur_strength
-        #                 # print("merging...", prev.shape, cur.shape)
-        #             else:
-        #                 cur = audio1[-2*unpackedData.shape[0]:-1*unpackedData.shape[0]]
-        #                 result = cur
-        #                 # print("no merging...", cur.shape)
-        #             self.prev_audio1 = audio1
-
-        #             #print(result)                    
-        #             result = result.cpu().float().numpy()
-                
-
-        # except Exception as e:
-        #     print("VC PROCESSING!!!! EXCEPTION!!!", e)            
-        #     print(traceback.format_exc())
-        #     del self.np_prev_audio1
-        #     del self.prev_audio1
-
-        # result = result.astype(np.int16)
-        # # print("on_request result size:",result.shape)
-        # return result
-        return
+        result = result.astype(np.int16)
+        # print("on_request result size:",result.shape)
+        return result
 
diff --git a/server/voice_changer/VoiceChangerManager.py b/server/voice_changer/VoiceChangerManager.py
index 768d5a9b..ad06e287 100644
--- a/server/voice_changer/VoiceChangerManager.py
+++ b/server/voice_changer/VoiceChangerManager.py
@@ -27,23 +27,9 @@ class VoiceChangerManager():
         else:
             return {"no info":"no info"}
 
-    # def set_onnx_provider(self, provider:str):
-    #     if hasattr(self, 'voiceChanger'):
-    #         return self.voiceChanger.set_onnx_provider(provider)
-    #     else:
-    #         return {"error":"no voice changer"}
-
-
-    def changeVoice(self, gpu:int, srcId:int, dstId:int, timestamp:int, convertChunkNum:int, crossFadeLowerValue:float, crossFadeOffsetRate:float, crossFadeEndRate:float, unpackedData:any):
+    def changeVoice(self, unpackedData:any):
         if hasattr(self, 'voiceChanger') == True:
             return self.voiceChanger.on_request(unpackedData)
         else:
             print("Voice Change is not loaded. Did you load a correct model?")
             return np.zeros(1).astype(np.int16)
-
-    def changeVoice_old(self, gpu, srcId, dstId, timestamp, prefixChunkSize, unpackedData):
-        if hasattr(self, 'voiceChanger') == True:
-            return self.voiceChanger.on_request(gpu, srcId, dstId, timestamp, prefixChunkSize, unpackedData)
-        else:
-            print("Voice Change is not loaded. Did you load a correct model?")
-            return np.zeros(1).astype(np.int16)