WIP:refactoring

2025-02-02 16:23:58 +03:00 · 2023-06-16 01:49:49 +09:00 · 2023-06-16 01:49:49 +09:00 · 9806ce2f3d
commit 9806ce2f3d
parent be42bb682d
4 changed files with 84 additions and 183 deletions
--- a/server/voice_changer/Local/AudioDeviceList.py
+++ b/server/voice_changer/Local/AudioDeviceList.py
@ -15,7 +15,11 @@ class ServerAudioDevice:


 def list_audio_device():
+    try:
        audioDeviceList = sd.query_devices()
+    except Exception as e:
+        print("[Voice Changer] ex: query_devices", e)
+        return [], []

    inputAudioDeviceList = [d for d in audioDeviceList if d["max_input_channels"] > 0]
    outputAudioDeviceList = [d for d in audioDeviceList if d["max_output_channels"] > 0]
--- a/server/voice_changer/Local/ServerDevice.py
+++ b/server/voice_changer/Local/ServerDevice.py
@ -4,34 +4,66 @@ import numpy as np
 import librosa
 import sounddevice as sd

+from dataclasses import dataclass, asdict, field
+
 from voice_changer.Local.AudioDeviceList import ServerAudioDevice
-from voice_changer.VoiceChanger import VoiceChanger
+from voice_changer.VoiceChangerManager import VoiceChangerManager
 from voice_changer.utils.Timer import Timer


+@dataclass()
+class ServerDeviceSettings:
+    enableServerAudio: int = 0  # 0:off, 1:on
+    serverAudioStated: int = 0  # 0:off, 1:on
+    serverInputAudioSampleRate: int = 44100
+    serverOutputAudioSampleRate: int = 44100
+    serverInputDeviceId: int = -1
+    serverOutputDeviceId: int = -1
+    serverReadChunkSize: int = 256
+    serverInputAudioGain: float = 1.0
+    serverOutputAudioGain: float = 1.0
+
+
+EditableServerDeviceSettings = {
+    "intData": [
+        "enableServerAudio",
+        "serverAudioStated",
+        "serverInputAudioSampleRate",
+        "serverOutputAudioSampleRate",
+        "serverInputDeviceId",
+        "serverOutputDeviceId",
+        "serverReadChunkSize",
+    ],
+    "floatData": [
+        "serverInputAudioGain",
+        "serverOutputAudioGain",
+    ],
+}
+
+
 class ServerDevice:
-    def __init__(self):
-        self.voiceChanger: VoiceChanger | None = None
-        pass
+    def __init__(self, voiceChangerManager: VoiceChangerManager):
+        self.settings = ServerDeviceSettings()
+        self.voiceChangerManager: VoiceChangerManager = voiceChangerManager

    def audio_callback(self, indata: np.ndarray, outdata: np.ndarray, frames, times, status):
-        if self.voiceChanger is None:
+        if self.voiceChangerManager.voiceChanger is None:
            print("[Voice Changer] voiceChanger is None")
            return

        try:
-            indata = indata * self.voiceChanger.settings.serverInputAudioGain
+            indata = indata * self.settings.serverInputAudioGain
            with Timer("all_inference_time") as t:
                unpackedData = librosa.to_mono(indata.T) * 32768.0
-                out_wav, times = self.voiceChanger.on_request(unpackedData)
+                out_wav, times = self.voiceChangerManager.voiceChanger.on_request(unpackedData)
                outputChunnels = outdata.shape[1]
                outdata[:] = np.repeat(out_wav, outputChunnels).reshape(-1, outputChunnels) / 32768.0
-                outdata[:] = outdata * self.voiceChanger.settings.serverOutputAudioGain
+                outdata[:] = outdata * self.settings.serverOutputAudioGain
            all_inference_time = t.secs
            performance = [all_inference_time] + times
-            if self.voiceChanger.emitTo is not None:
-                self.voiceChanger.emitTo(performance)
-            self.voiceChanger.settings.performance = [round(x * 1000) for x in performance]
+            if self.voiceChangerManager.voiceChanger.emitTo is not None:
+                self.voiceChangerManager.voiceChanger.emitTo(performance)
+            self.voiceChangerManager.voiceChanger.settings.performance = [round(x * 1000) for x in performance]
        except Exception as e:
            print("[Voice Changer] ex:", e)

@ -42,31 +74,26 @@ class ServerDevice:
        else:
            return None

-    def serverLocal(self, _vc: VoiceChanger):
-        self.voiceChanger = _vc
-        vc = self.voiceChanger
-
+    def serverLocal(self):
        currentInputDeviceId = -1
        currentModelSamplingRate = -1
        currentOutputDeviceId = -1
        currentInputChunkNum = -1
        while True:
-            if vc.settings.serverAudioStated == 0 or vc.settings.serverInputDeviceId == -1 or vc is None:
-                vc.settings.inputSampleRate = 48000
+            if self.settings.serverAudioStated == 0 or self.settings.serverInputDeviceId == -1 or self.voiceChangerManager is None:
+                self.voiceChangerManager.voiceChanger.settings.inputSampleRate = 48000
                time.sleep(2)
            else:
                sd._terminate()
                sd._initialize()

-                sd.default.device[0] = vc.settings.serverInputDeviceId
-                currentInputDeviceId = vc.settings.serverInputDeviceId
-                sd.default.device[1] = vc.settings.serverOutputDeviceId
-                currentOutputDeviceId = vc.settings.serverOutputDeviceId
+                sd.default.device[0] = self.settings.serverInputDeviceId
+                currentInputDeviceId = self.settings.serverInputDeviceId
+                sd.default.device[1] = self.settings.serverOutputDeviceId
+                currentOutputDeviceId = self.settings.serverOutputDeviceId

-                currentInputChannelNum = vc.settings.serverAudioInputDevices
-
-                serverInputAudioDevice = self.getServerAudioDevice(vc.settings.serverAudioInputDevices, currentInputDeviceId)
-                serverOutputAudioDevice = self.getServerAudioDevice(vc.settings.serverAudioOutputDevices, currentOutputDeviceId)
+                serverInputAudioDevice = self.getServerAudioDevice(self.voiceChangerManager.serverAudioInputDevices, currentInputDeviceId)
+                serverOutputAudioDevice = self.getServerAudioDevice(self.voiceChangerManager.serverAudioOutputDevices, currentOutputDeviceId)
                print(serverInputAudioDevice, serverOutputAudioDevice)
                if serverInputAudioDevice is None or serverOutputAudioDevice is None:
                    time.sleep(2)
@ -76,12 +103,12 @@ class ServerDevice:
                currentInputChannelNum = serverInputAudioDevice.maxInputChannels
                currentOutputChannelNum = serverOutputAudioDevice.maxOutputChannels

-                currentInputChunkNum = vc.settings.serverReadChunkSize
+                currentInputChunkNum = self.settings.serverReadChunkSize
                block_frame = currentInputChunkNum * 128

                # sample rate precheck(alsa cannot use 40000?)
                try:
-                    currentModelSamplingRate = self.voiceChanger.voiceChangerModel.get_processing_sampling_rate()
+                    currentModelSamplingRate = self.voiceChangerManager.voiceChanger.voiceChangerModel.get_processing_sampling_rate()
                except Exception as e:
                    print("[Voice Changer] ex: get_processing_sampling_rate", e)
                    continue
@ -94,38 +121,38 @@ class ServerDevice:
                        channels=[currentInputChannelNum, currentOutputChannelNum],
                    ):
                        pass
-                    vc.settings.serverInputAudioSampleRate = currentModelSamplingRate
-                    vc.settings.inputSampleRate = currentModelSamplingRate
-                    print(f"[Voice Changer] sample rate {vc.settings.serverInputAudioSampleRate}")
+                    self.settings.serverInputAudioSampleRate = currentModelSamplingRate
+                    self.voiceChangerManager.voiceChanger.settings.inputSampleRate = currentModelSamplingRate
+                    print(f"[Voice Changer] sample rate {self.settings.serverInputAudioSampleRate}")
                except Exception as e:
                    print(
                        "[Voice Changer] ex: fallback to device default samplerate",
                        e,
                    )
-                    vc.settings.serverInputAudioSampleRate = serverInputAudioDevice.default_samplerate
-                    vc.settings.inputSampleRate = vc.settings.serverInputAudioSampleRate
+                    self.settings.serverInputAudioSampleRate = serverInputAudioDevice.default_samplerate
+                    self.voiceChangerManager.voiceChanger.settings.inputSampleRate = self.settings.serverInputAudioSampleRate

                # main loop
                try:
                    with sd.Stream(
                        callback=self.audio_callback,
                        blocksize=block_frame,
-                        samplerate=vc.settings.serverInputAudioSampleRate,
+                        samplerate=self.settings.serverInputAudioSampleRate,
                        dtype="float32",
                        channels=[currentInputChannelNum, currentOutputChannelNum],
                    ):
-                        while vc.settings.serverAudioStated == 1 and currentInputDeviceId == vc.settings.serverInputDeviceId and currentOutputDeviceId == vc.settings.serverOutputDeviceId and currentModelSamplingRate == self.voiceChanger.voiceChangerModel.get_processing_sampling_rate() and currentInputChunkNum == vc.settings.serverReadChunkSize:
+                        while self.settings.serverAudioStated == 1 and currentInputDeviceId == self.settings.serverInputDeviceId and currentOutputDeviceId == self.settings.serverOutputDeviceId and currentModelSamplingRate == self.voiceChangerManager.voiceChanger.voiceChangerModel.get_processing_sampling_rate() and currentInputChunkNum == self.settings.serverReadChunkSize:
                            time.sleep(2)
                            print(
                                "[Voice Changer] server audio",
-                                vc.settings.performance,
+                                self.voiceChangerManager.settings.performance,
                            )
                            print(
                                "[Voice Changer] info:",
-                                vc.settings.serverAudioStated,
+                                self.settings.serverAudioStated,
                                currentInputDeviceId,
                                currentOutputDeviceId,
-                                vc.settings.serverInputAudioSampleRate,
+                                self.settings.serverInputAudioSampleRate,
                                currentInputChunkNum,
                            )

--- a/server/voice_changer/VoiceChanger.py
+++ b/server/voice_changer/VoiceChanger.py
@ -51,22 +51,7 @@ class VoiceChangerSettings:
    crossFadeOverlapSize: int = 4096

    recordIO: int = 0  # 0:off, 1:on
-    serverAudioInputDevices: list[ServerAudioDevice] = field(default_factory=lambda: [])
-    serverAudioOutputDevices: list[ServerAudioDevice] = field(default_factory=lambda: [])

-    enableServerAudio: int = 0  # 0:off, 1:on
-    serverAudioStated: int = 0  # 0:off, 1:on
-    # serverInputAudioSampleRate: int = 48000
-    # serverOutputAudioSampleRate: int = 48000
-    serverInputAudioSampleRate: int = 44100
-    serverOutputAudioSampleRate: int = 44100
-    # serverInputAudioBufferSize: int = 1024 * 24
-    # serverOutputAudioBufferSize: int = 1024 * 24
-    serverInputDeviceId: int = -1
-    serverOutputDeviceId: int = -1
-    serverReadChunkSize: int = 256
-    serverInputAudioGain: float = 1.0
-    serverOutputAudioGain: float = 1.0
    performance: list[int] = field(default_factory=lambda: [0, 0, 0, 0])

    # ↓mutableな物だけ列挙
@ -75,23 +60,12 @@ class VoiceChangerSettings:
            "inputSampleRate",
            "crossFadeOverlapSize",
            "recordIO",
-            "enableServerAudio",
-            "serverAudioStated",
-            "serverInputAudioSampleRate",
-            "serverOutputAudioSampleRate",
-            # "serverInputAudioBufferSize",
-            # "serverOutputAudioBufferSize",
-            "serverInputDeviceId",
-            "serverOutputDeviceId",
-            "serverReadChunkSize",
        ]
    )
    floatData: list[str] = field(
        default_factory=lambda: [
            "crossFadeOffsetRate",
            "crossFadeEndRate",
-            "serverInputAudioGain",
-            "serverOutputAudioGain",
        ]
    )
    strData: list[str] = field(default_factory=lambda: [])
@ -108,120 +82,6 @@ class VoiceChanger:

    # emitTo = None

-    # def audio_callback(self, indata: np.ndarray, outdata: np.ndarray, frames, times, status):
-    #     try:
-    #         indata = indata * self.settings.serverInputAudioGain
-    #         with Timer("all_inference_time") as t:
-    #             unpackedData = librosa.to_mono(indata.T) * 32768.0
-    #             out_wav, times = self.on_request(unpackedData)
-    #             outputChunnels = outdata.shape[1]
-    #             outdata[:] = np.repeat(out_wav, outputChunnels).reshape(-1, outputChunnels) / 32768.0
-    #             outdata[:] = outdata * self.settings.serverOutputAudioGain
-    #         all_inference_time = t.secs
-    #         performance = [all_inference_time] + times
-    #         if self.emitTo is not None:
-    #             self.emitTo(performance)
-    #         self.settings.performance = [round(x * 1000) for x in performance]
-    #     except Exception as e:
-    #         print("[Voice Changer] ex:", e)
-
-    # def getServerAudioDevice(self, audioDeviceList: list[ServerAudioDevice], index: int):
-    #     serverAudioDevice = [x for x in audioDeviceList if x.index == index]
-    #     if len(serverAudioDevice) > 0:
-    #         return serverAudioDevice[0]
-    #     else:
-    #         return None
-
-    # def serverLocal(self, _vc):
-    #     vc: VoiceChanger = _vc
-
-    #     currentInputDeviceId = -1
-    #     currentModelSamplingRate = -1
-    #     currentOutputDeviceId = -1
-    #     currentInputChunkNum = -1
-    #     while True:
-    #         if vc.settings.serverAudioStated == 0 or vc.settings.serverInputDeviceId == -1 or vc.voiceChanger is None:
-    #             vc.settings.inputSampleRate = 48000
-    #             time.sleep(2)
-    #         else:
-    #             sd._terminate()
-    #             sd._initialize()
-
-    #             sd.default.device[0] = vc.settings.serverInputDeviceId
-    #             currentInputDeviceId = vc.settings.serverInputDeviceId
-    #             sd.default.device[1] = vc.settings.serverOutputDeviceId
-    #             currentOutputDeviceId = vc.settings.serverOutputDeviceId
-
-    #             currentInputChannelNum = vc.settings.serverAudioInputDevices
-
-    #             serverInputAudioDevice = self.getServerAudioDevice(vc.settings.serverAudioInputDevices, currentInputDeviceId)
-    #             serverOutputAudioDevice = self.getServerAudioDevice(vc.settings.serverAudioOutputDevices, currentOutputDeviceId)
-    #             print(serverInputAudioDevice, serverOutputAudioDevice)
-    #             if serverInputAudioDevice is None or serverOutputAudioDevice is None:
-    #                 time.sleep(2)
-    #                 print("serverInputAudioDevice or serverOutputAudioDevice is None")
-    #                 continue
-
-    #             currentInputChannelNum = serverInputAudioDevice.maxInputChannels
-    #             currentOutputChannelNum = serverOutputAudioDevice.maxOutputChannels
-
-    #             currentInputChunkNum = vc.settings.serverReadChunkSize
-    #             block_frame = currentInputChunkNum * 128
-
-    #             # sample rate precheck(alsa cannot use 40000?)
-    #             try:
-    #                 currentModelSamplingRate = self.voiceChanger.get_processing_sampling_rate()
-    #             except Exception as e:
-    #                 print("[Voice Changer] ex: get_processing_sampling_rate", e)
-    #                 continue
-    #             try:
-    #                 with sd.Stream(
-    #                     callback=self.audio_callback,
-    #                     blocksize=block_frame,
-    #                     samplerate=currentModelSamplingRate,
-    #                     dtype="float32",
-    #                     channels=[currentInputChannelNum, currentOutputChannelNum],
-    #                 ):
-    #                     pass
-    #                 vc.settings.serverInputAudioSampleRate = currentModelSamplingRate
-    #                 vc.settings.inputSampleRate = currentModelSamplingRate
-    #                 print(f"[Voice Changer] sample rate {vc.settings.serverInputAudioSampleRate}")
-    #             except Exception as e:
-    #                 print(
-    #                     "[Voice Changer] ex: fallback to device default samplerate",
-    #                     e,
-    #                 )
-    #                 vc.settings.serverInputAudioSampleRate = serverInputAudioDevice.default_samplerate
-    #                 vc.settings.inputSampleRate = vc.settings.serverInputAudioSampleRate
-
-    #             # main loop
-    #             try:
-    #                 with sd.Stream(
-    #                     callback=self.audio_callback,
-    #                     blocksize=block_frame,
-    #                     samplerate=vc.settings.serverInputAudioSampleRate,
-    #                     dtype="float32",
-    #                     channels=[currentInputChannelNum, currentOutputChannelNum],
-    #                 ):
-    #                     while vc.settings.serverAudioStated == 1 and currentInputDeviceId == vc.settings.serverInputDeviceId and currentOutputDeviceId == vc.settings.serverOutputDeviceId and currentModelSamplingRate == self.voiceChanger.get_processing_sampling_rate() and currentInputChunkNum == vc.settings.serverReadChunkSize:
-    #                         time.sleep(2)
-    #                         print(
-    #                             "[Voice Changer] server audio",
-    #                             self.settings.performance,
-    #                         )
-    #                         print(
-    #                             "[Voice Changer] info:",
-    #                             vc.settings.serverAudioStated,
-    #                             currentInputDeviceId,
-    #                             currentOutputDeviceId,
-    #                             vc.settings.serverInputAudioSampleRate,
-    #                             currentInputChunkNum,
-    #                         )
-
-    #             except Exception as e:
-    #                 print("[Voice Changer] ex:", e)
-    #                 time.sleep(2)
-
    def __init__(self, params: VoiceChangerParams, slotIndex: int):
        # 初期化
        self.settings = VoiceChangerSettings()
@ -238,9 +98,9 @@ class VoiceChanger:
        self.ioRecorder: IORecorder | None = None
        self.sola_buffer: AudioInOut | None = None

-        audioinput, audiooutput = list_audio_device()
-        self.settings.serverAudioInputDevices = audioinput
-        self.settings.serverAudioOutputDevices = audiooutput
+        # audioinput, audiooutput = list_audio_device()
+        # self.settings.serverAudioInputDevices = audioinput
+        # self.settings.serverAudioOutputDevices = audiooutput

        self.slotIndex = slotIndex
        self.slotInfo = loadSlotInfo(params.model_dir, self.slotIndex)
--- a/server/voice_changer/VoiceChangerManager.py
+++ b/server/voice_changer/VoiceChangerManager.py
@ -3,6 +3,7 @@ import threading
 from data.ModelSample import ModelSamples
 from data.ModelSlot import ModelSlots, loadSlotInfo
 from utils.downloader.SampleDownloader import downloadSample, getSampleInfos
+from voice_changer.Local.AudioDeviceList import ServerAudioDevice, list_audio_device
 from voice_changer.Local.ServerDevice import ServerDevice
 from voice_changer.RVC.ModelSlotGenerator import setSlotAsRVC

@ -26,6 +27,7 @@ class GPUInfo:
@dataclass()
 class VoiceChangerManagerSettings:
    slotIndex: int
+
    intData: list[str] = field(default_factory=lambda: ["slotIndex"])


@ -43,6 +45,14 @@ class VoiceChangerManager(object):
        self.sampleModels: list[ModelSamples] = getSampleInfos(self.params.sample_mode)
        self.gpus: list[GPUInfo] = self._get_gpuInfos()

+        audioinput, audiooutput = list_audio_device()
+        self.serverAudioInputDevices: list[ServerAudioDevice] = audioinput
+        self.serverAudioOutputDevices: list[ServerAudioDevice] = audiooutput
+
+        # ServerDevice
+        thread = threading.Thread(target=self.serverDevice.serverLocal, args=(self,))
+        thread.start()
+
    def _get_gpuInfos(self):
        devCount = torch.cuda.device_count()
        gpus = []
@ -63,8 +73,6 @@ class VoiceChangerManager(object):
            print(f"VoiceChanger Initialized (GPU_NUM:{gpu_num}, mps_enabled:{mps_enabled})")

            cls._instance.voiceChanger = VoiceChanger(params, cls._instance.settings.slotIndex)
-            thread = threading.Thread(target=cls._instance.serverDevice.serverLocal, args=(cls._instance.voiceChanger,))
-            thread.start()
            cls._instance.voiceChanger.prepareModel()
        return cls._instance

@ -94,6 +102,8 @@ class VoiceChangerManager(object):
        data["slotInfos"] = slotInfos
        data["gpus"] = self.gpus
        data["sampleModels"] = self.sampleModels
+        data["serverAudioInputDevices"] = self.serverAudioInputDevices
+        data["serverAudioOutputDevices"] = self.serverAudioOutputDevices

        data["status"] = "OK"
        if hasattr(self, "voiceChanger"):