fix: server device mode channel num

2025-01-23 21:45:00 +03:00 · 2023-05-13 14:30:15 +09:00 · 2023-05-13 14:30:15 +09:00 · 6fecb5f908
commit 6fecb5f908
parent ea3c5b5740
2 changed files with 74 additions and 52 deletions
--- a/server/voice_changer/Local/AudioDeviceList.py
+++ b/server/voice_changer/Local/AudioDeviceList.py
@ -1,10 +1,5 @@
-import pyaudio
+import sounddevice as sd
 # import json
 from dataclasses import dataclass
 from const import ServerAudioDeviceTypes
@ -14,44 +9,42 @@ class ServerAudioDevice:
    index: int = 0
    name: str = ""
    hostAPI: str = ""
    maxInputChannels: int = 0
    maxOutputChannels: int = 0
 def list_audio_device():
-    audio = pyaudio.PyAudio()
+    audioDeviceList = sd.query_devices()
    audio_input_devices: list[ServerAudioDevice] = []
    audio_output_devices: list[ServerAudioDevice] = []
    host_apis = []
-    for api_index in range(audio.get_host_api_count()):
+    inputAudioDeviceList = [d for d in audioDeviceList if d["max_input_channels"] > 0]
-        host_apis.append(audio.get_host_api_info_by_index(api_index)["name"])
+    outputDeviceList = [d for d in audioDeviceList if d["max_output_channels"] > 0]
    hostapis = sd.query_hostapis()
-    for x in range(0, audio.get_device_count()):
+    print("input:", inputAudioDeviceList)
-        device = audio.get_device_info_by_index(x)
+    print("output:", outputDeviceList)
-        try:
+    print("hostapis", hostapis)
            deviceName = device["name"].encode("shift-jis").decode("utf-8")
        except (UnicodeDecodeError, UnicodeEncodeError):
            deviceName = device["name"]
-        deviceIndex = device["index"]
+    serverAudioInputDevices = []
-        hostAPI = host_apis[device["hostApi"]]
+    serverAudioOutputDevices = []
    for d in inputAudioDeviceList:
        serverInputAudioDevice: ServerAudioDevice = ServerAudioDevice(
            kind=ServerAudioDeviceTypes.audioinput,
            index=d["index"],
            name=d["name"],
            hostAPI=hostapis[d["hostapi"]]["name"],
            maxInputChannels=d["max_input_channels"],
            maxOutputChannels=d["max_output_channels"],
        )
        serverAudioInputDevices.append(serverInputAudioDevice)
    for d in outputDeviceList:
        serverOutputAudioDevice: ServerAudioDevice = ServerAudioDevice(
            kind=ServerAudioDeviceTypes.audiooutput,
            index=d["index"],
            name=d["name"],
            hostAPI=hostapis[d["hostapi"]]["name"],
            maxInputChannels=d["max_input_channels"],
            maxOutputChannels=d["max_output_channels"],
        )
        serverAudioOutputDevices.append(serverOutputAudioDevice)
-        if device["maxInputChannels"] > 0:
+    return serverAudioInputDevices, serverAudioOutputDevices
            audio_input_devices.append(
                ServerAudioDevice(
                    kind=ServerAudioDeviceTypes.audioinput,
                    index=deviceIndex,
                    name=deviceName,
                    hostAPI=hostAPI,
                )
            )
        if device["maxOutputChannels"] > 0:
            audio_output_devices.append(
                ServerAudioDevice(
                    kind=ServerAudioDeviceTypes.audiooutput,
                    index=deviceIndex,
                    name=deviceName,
                    hostAPI=hostAPI,
                )
            )
    return audio_input_devices, audio_output_devices
--- a/server/voice_changer/VoiceChanger.py
+++ b/server/voice_changer/VoiceChanger.py
@ -102,17 +102,29 @@ class VoiceChanger:
    def audio_callback(
        self, indata: np.ndarray, outdata: np.ndarray, frames, times, status
    ):
        # print(indata)
        try:
            with Timer("all_inference_time") as t:
                unpackedData = librosa.to_mono(indata.T) * 32768.0
                out_wav, times = self.on_request(unpackedData)
-                outdata[:] = np.repeat(out_wav, 2).reshape(-1, 2) / 32768.0
+                outputChunnels = outdata.shape[1]
                outdata[:] = (
                    np.repeat(out_wav, outputChunnels).reshape(-1, outputChunnels)
                    / 32768.0
                )
            all_inference_time = t.secs
            performance = [all_inference_time] + times
            self.settings.performance = [round(x * 1000) for x in performance]
        except Exception as e:
-            print(e)
+            print("[Voice Changer] ex:", e)
    def getServerAudioDevice(
        self, audioDeviceList: list[ServerAudioDevice], index: int
    ):
        serverAudioDevice = [x for x in audioDeviceList if x.index == index]
        if len(serverAudioDevice) > 0:
            return serverAudioDevice[0]
        else:
            return None
    def serverLocal(self, _vc):
        vc: VoiceChanger = _vc
@ -131,12 +143,28 @@ class VoiceChanger:
            else:
                sd._terminate()
                sd._initialize()
-                if currentInputDeviceId != vc.settings.serverInputDeviceId:
+
-                    sd.default.device[0] = vc.settings.serverInputDeviceId
+                sd.default.device[0] = vc.settings.serverInputDeviceId
-                    currentInputDeviceId = vc.settings.serverInputDeviceId
+                currentInputDeviceId = vc.settings.serverInputDeviceId
-                if currentOutputDeviceId != vc.settings.serverOutputDeviceId:
+                sd.default.device[1] = vc.settings.serverOutputDeviceId
-                    sd.default.device[1] = vc.settings.serverOutputDeviceId
+                currentOutputDeviceId = vc.settings.serverOutputDeviceId
-                    currentOutputDeviceId = vc.settings.serverOutputDeviceId
+
                currentInputChannelNum = vc.settings.serverAudioInputDevices
                serverInputAudioDevice = self.getServerAudioDevice(
                    vc.settings.serverAudioInputDevices, currentInputDeviceId
                )
                serverOutputAudioDevice = self.getServerAudioDevice(
                    vc.settings.serverAudioOutputDevices, currentOutputDeviceId
                )
                print(serverInputAudioDevice, serverOutputAudioDevice)
                if serverInputAudioDevice is None or serverOutputAudioDevice is None:
                    time.sleep(2)
                    print("serverInputAudioDevice or serverOutputAudioDevice is None")
                    continue
                currentInputChannelNum = serverInputAudioDevice.maxInputChannels
                currentOutputChannelNum = serverInputAudioDevice.maxOutputChannels
                vc.settings.serverInputAudioSampleRate = (
                    self.voiceChanger.get_processing_sampling_rate()
@ -144,14 +172,16 @@ class VoiceChanger:
                currentInputSampleRate = vc.settings.serverInputAudioSampleRate
                currentInputChunkNum = vc.settings.serverReadChunkSize
                block_frame = currentInputChunkNum * 128
                try:
                    with sd.Stream(
                        callback=self.audio_callback,
                        blocksize=block_frame,
                        samplerate=currentInputSampleRate,
                        dtype="float32",
-                        channels=1,
+                        channels=[currentInputChannelNum, currentOutputChannelNum],
                    ):
                        print()
                        while (
                            vc.settings.serverAudioStated == 1
                            and currentInputDeviceId == vc.settings.serverInputDeviceId
@ -182,8 +212,7 @@ class VoiceChanger:
                            )
                except Exception as e:
-                    print(e)
+                    print("[Voice Changer] ex:", e)
                    print()
                    time.sleep(2)
    def __init__(self, params: VoiceChangerParams):