From 08b3f25f0b4591105377925ba1a7dff68a67c964 Mon Sep 17 00:00:00 2001
From: w-okada <you@example.com>
Date: Thu, 23 Nov 2023 07:53:14 +0900
Subject: [PATCH] Improve Device Detection

---
 .../components/demo/001_GuiStateProvider.tsx  | 91 +++++++++++++++----
 .../demo/components2/102-3_DeviceArea.tsx     | 16 +++-
 server/voice_changer/Local/AudioDeviceList.py | 13 ++-
 server/voice_changer/Local/ServerDevice.py    | 78 +++-------------
 4 files changed, 109 insertions(+), 89 deletions(-)
diff --git a/client/demo/src/components/demo/001_GuiStateProvider.tsx b/client/demo/src/components/demo/001_GuiStateProvider.tsx
index 49ab6425..0f56eea2 100644
--- a/client/demo/src/components/demo/001_GuiStateProvider.tsx
+++ b/client/demo/src/components/demo/001_GuiStateProvider.tsx
@@ -1,4 +1,4 @@
-import React, { useContext, useEffect, useState } from "react";
+import React, { useContext, useEffect, useState, useRef } from "react";
 import { ReactNode } from "react";
 import { useAppRoot } from "../../001_provider/001_AppRootProvider";
 import { StateControlCheckbox, useStateControlCheckbox } from "../../hooks/useStateControlCheckbox";
@@ -62,6 +62,7 @@ type GuiStateAndMethod = {
     setIsAnalyzing: (val: boolean) => void;
     setShowPyTorchModelUpload: (val: boolean) => void;
 
+    reloadDeviceInfo: () => Promise<void>;
     inputAudioDeviceInfo: MediaDeviceInfo[];
     outputAudioDeviceInfo: MediaDeviceInfo[];
     audioInputForGUI: string;
@@ -128,14 +129,20 @@ export const GuiStateProvider = ({ children }: Props) => {
     const [beatriceJVSSpeakerId, setBeatriceJVSSpeakerId] = useState<number>(1);
     const [beatriceJVSSpeakerPitch, setBeatriceJVSSpeakerPitch] = useState<number>(0);
 
-    const reloadDeviceInfo = async () => {
-        try {
-            const ms = await navigator.mediaDevices.getUserMedia({ video: false, audio: true });
-            ms.getTracks().forEach((x) => {
-                x.stop();
-            });
-        } catch (e) {
-            console.warn("Enumerate device error::", e);
+    const checkDeviceAvailable = useRef<boolean>(false);
+
+    const _reloadDeviceInfo = async () => {
+        // デバイスチェックの空振り
+        if (checkDeviceAvailable.current == false) {
+            try {
+                const ms = await navigator.mediaDevices.getUserMedia({ video: false, audio: true });
+                ms.getTracks().forEach((x) => {
+                    x.stop();
+                });
+                checkDeviceAvailable.current = true;
+            } catch (e) {
+                console.warn("Enumerate device error::", e);
+            }
         }
         const mediaDeviceInfos = await navigator.mediaDevices.enumerateDevices();
 
@@ -182,14 +189,66 @@ export const GuiStateProvider = ({ children }: Props) => {
         // })
         return [audioInputs, audioOutputs];
     };
+
+    const reloadDeviceInfo = async () => {
+        const audioInfo = await _reloadDeviceInfo();
+        setInputAudioDeviceInfo(audioInfo[0]);
+        setOutputAudioDeviceInfo(audioInfo[1]);
+    };
+
+    // useEffect(() => {
+    //     const audioInitialize = async () => {
+    //         await reloadDeviceInfo();
+    //     };
+    //     audioInitialize();
+    // }, []);
+
     useEffect(() => {
-        const audioInitialize = async () => {
-            const audioInfo = await reloadDeviceInfo();
-            setInputAudioDeviceInfo(audioInfo[0]);
-            setOutputAudioDeviceInfo(audioInfo[1]);
+        let isMounted = true;
+
+        // デバイスのポーリングを再帰的に実行する関数
+        const pollDevices = async () => {
+            const checkDeviceDiff = (knownDeviceIds: Set<string>, newDeviceIds: Set<string>) => {
+                const deleted = new Set([...knownDeviceIds].filter((x) => !newDeviceIds.has(x)));
+                const added = new Set([...newDeviceIds].filter((x) => !knownDeviceIds.has(x)));
+                return { deleted, added };
+            };
+            try {
+                const audioInfo = await _reloadDeviceInfo();
+
+                const knownAudioinputIds = new Set(inputAudioDeviceInfo.map((x) => x.deviceId));
+                const newAudioinputIds = new Set(audioInfo[0].map((x) => x.deviceId));
+
+                const knownAudiooutputIds = new Set(outputAudioDeviceInfo.map((x) => x.deviceId));
+                const newAudiooutputIds = new Set(audioInfo[1].map((x) => x.deviceId));
+
+                const audioInputDiff = checkDeviceDiff(knownAudioinputIds, newAudioinputIds);
+                const audioOutputDiff = checkDeviceDiff(knownAudiooutputIds, newAudiooutputIds);
+
+                if (audioInputDiff.deleted.size > 0 || audioInputDiff.added.size > 0) {
+                    console.log(`deleted input device: ${[...audioInputDiff.deleted]}`);
+                    console.log(`added input device: ${[...audioInputDiff.added]}`);
+                    setInputAudioDeviceInfo(audioInfo[0]);
+                }
+                if (audioOutputDiff.deleted.size > 0 || audioOutputDiff.added.size > 0) {
+                    console.log(`deleted output device: ${[...audioOutputDiff.deleted]}`);
+                    console.log(`added output device: ${[...audioOutputDiff.added]}`);
+                    setOutputAudioDeviceInfo(audioInfo[1]);
+                }
+
+                if (isMounted) {
+                    setTimeout(pollDevices, 1000 * 3);
+                }
+            } catch (err) {
+                console.error("An error occurred during enumeration of devices:", err);
+            }
         };
-        audioInitialize();
-    }, []);
+
+        pollDevices();
+        return () => {
+            isMounted = false;
+        };
+    }, [inputAudioDeviceInfo, outputAudioDeviceInfo]);
 
     // (1) Controller Switch
     const openServerControlCheckbox = useStateControlCheckbox(OpenServerControlCheckbox);
@@ -271,7 +330,7 @@ export const GuiStateProvider = ({ children }: Props) => {
         serverSetting.updateServerSettings({ ...serverSetting.serverSetting, dstId: dstId });
     }, [beatriceJVSSpeakerId, beatriceJVSSpeakerPitch]);
 
-    const providerValue = {
+    const providerValue: GuiStateAndMethod = {
         stateControls: {
             openServerControlCheckbox,
             openModelSettingCheckbox,
diff --git a/client/demo/src/components/demo/components2/102-3_DeviceArea.tsx b/client/demo/src/components/demo/components2/102-3_DeviceArea.tsx
index 59a654fd..d4750a48 100644
--- a/client/demo/src/components/demo/components2/102-3_DeviceArea.tsx
+++ b/client/demo/src/components/demo/components2/102-3_DeviceArea.tsx
@@ -10,7 +10,7 @@ export type DeviceAreaProps = {};
 
 export const DeviceArea = (_props: DeviceAreaProps) => {
     const { setting, serverSetting, audioContext, setAudioOutputElementId, setAudioMonitorElementId, initializedRef, setVoiceChangerClientSetting, startOutputRecording, stopOutputRecording } = useAppState();
-    const { isConverting, audioInputForGUI, inputAudioDeviceInfo, setAudioInputForGUI, fileInputEchoback, setFileInputEchoback, setAudioOutputForGUI, setAudioMonitorForGUI, audioOutputForGUI, audioMonitorForGUI, outputAudioDeviceInfo, shareScreenEnabled, setShareScreenEnabled } = useGuiState();
+    const { isConverting, audioInputForGUI, inputAudioDeviceInfo, setAudioInputForGUI, fileInputEchoback, setFileInputEchoback, setAudioOutputForGUI, setAudioMonitorForGUI, audioOutputForGUI, audioMonitorForGUI, outputAudioDeviceInfo, shareScreenEnabled, setShareScreenEnabled, reloadDeviceInfo } = useGuiState();
     const [inputHostApi, setInputHostApi] = useState<string>("ALL");
     const [outputHostApi, setOutputHostApi] = useState<string>("ALL");
     const [monitorHostApi, setMonitorHostApi] = useState<string>("ALL");
@@ -29,7 +29,13 @@ export const DeviceArea = (_props: DeviceAreaProps) => {
             return (
                 <div className="config-sub-area-control">
                     <div className="config-sub-area-control-title">AUDIO:</div>
-                    <div className="config-sub-area-control-field"></div>
+                    <div className="config-sub-area-control-field">
+                        <div className="config-sub-area-buttons">
+                            <div onClick={reloadDeviceInfo} className="config-sub-area-button">
+                                reload
+                            </div>
+                        </div>
+                    </div>
                 </div>
             );
         }
@@ -75,6 +81,12 @@ export const DeviceArea = (_props: DeviceAreaProps) => {
                             />
                             <label htmlFor="server-device">server</label>
                         </div>
+
+                        <div className="config-sub-area-buttons">
+                            <div onClick={reloadDeviceInfo} className="config-sub-area-button">
+                                reload
+                            </div>
+                        </div>
                     </div>
                 </div>
             </div>
diff --git a/server/voice_changer/Local/AudioDeviceList.py b/server/voice_changer/Local/AudioDeviceList.py
index ea4bcb30..d467498d 100644
--- a/server/voice_changer/Local/AudioDeviceList.py
+++ b/server/voice_changer/Local/AudioDeviceList.py
@@ -5,6 +5,7 @@ import numpy as np
 
 from const import ServerAudioDeviceType
 from mods.log_control import VoiceChangaerLogger
+
 # from const import SERVER_DEVICE_SAMPLE_RATES
 
 logger = VoiceChangaerLogger.get_instance().getLogger()
@@ -26,18 +27,20 @@ def dummy_callback(data: np.ndarray, frames, times, status):
     pass
 
 
-def checkSamplingRate(deviceId: int, desiredSamplingRate: int, type: ServerAudioDeviceType):
+def checkSamplingRate(
+    deviceId: int, desiredSamplingRate: int, type: ServerAudioDeviceType
+):
     if type == "input":
         try:
             with sd.InputStream(
                 device=deviceId,
                 callback=dummy_callback,
                 dtype="float32",
-                samplerate=desiredSamplingRate
+                samplerate=desiredSamplingRate,
             ):
                 pass
             return True
-        except Exception as e: # NOQA
+        except Exception as e:  # NOQA
             # print("[checkSamplingRate]", e)
             return False
     else:
@@ -46,11 +49,11 @@ def checkSamplingRate(deviceId: int, desiredSamplingRate: int, type: ServerAudio
                 device=deviceId,
                 callback=dummy_callback,
                 dtype="float32",
-                samplerate=desiredSamplingRate
+                samplerate=desiredSamplingRate,
             ):
                 pass
             return True
-        except Exception as e: # NOQA
+        except Exception as e:  # NOQA
             # print("[checkSamplingRate]", e)
             return False
 
diff --git a/server/voice_changer/Local/ServerDevice.py b/server/voice_changer/Local/ServerDevice.py
index 1d907576..1f46409c 100644
--- a/server/voice_changer/Local/ServerDevice.py
+++ b/server/voice_changer/Local/ServerDevice.py
@@ -16,6 +16,7 @@ from voice_changer.utils.VoiceChangerModel import AudioInOut
 from typing import Protocol
 from typing import Union
 from typing import Literal, TypeAlias
+
 AudioDeviceKind: TypeAlias = Literal["input", "output"]
 
 logger = VoiceChangaerLogger.get_instance().getLogger()
@@ -69,9 +70,7 @@ EditableServerDeviceSettings = {
         "serverOutputAudioGain",
         "serverMonitorAudioGain",
     ],
-    "boolData": [
-        "exclusiveMode"
-    ]
+    "boolData": ["exclusiveMode"],
 }
 
 
@@ -233,24 +232,8 @@ class ServerDevice:
             return False
 
     def runNoMonitorSeparate(self, block_frame: int, inputMaxChannel: int, outputMaxChannel: int, inputExtraSetting, outputExtraSetting):
-        with sd.InputStream(
-            callback=self.audioInput_callback_outQueue,
-            dtype="float32",
-            device=self.settings.serverInputDeviceId,
-            blocksize=block_frame,
-            samplerate=self.settings.serverInputAudioSampleRate,
-            channels=inputMaxChannel,
-            extra_settings=inputExtraSetting
-        ):
-            with sd.OutputStream(
-                callback=self.audioOutput_callback,
-                dtype="float32",
-                device=self.settings.serverOutputDeviceId,
-                blocksize=block_frame,
-                samplerate=self.settings.serverOutputAudioSampleRate,
-                channels=outputMaxChannel,
-                extra_settings=outputExtraSetting
-            ):
+        with sd.InputStream(callback=self.audioInput_callback_outQueue, dtype="float32", device=self.settings.serverInputDeviceId, blocksize=block_frame, samplerate=self.settings.serverInputAudioSampleRate, channels=inputMaxChannel, extra_settings=inputExtraSetting):
+            with sd.OutputStream(callback=self.audioOutput_callback, dtype="float32", device=self.settings.serverOutputDeviceId, blocksize=block_frame, samplerate=self.settings.serverOutputAudioSampleRate, channels=outputMaxChannel, extra_settings=outputExtraSetting):
                 while True:
                     changed = self.checkSettingChanged()
                     if changed:
@@ -263,24 +246,8 @@ class ServerDevice:
                     # print(f"                monitor: id:{self.settings.serverMonitorDeviceId}, sr:{self.settings.serverMonitorAudioSampleRate}, ch:{self.serverMonitorAudioDevice.maxOutputChannels}")
 
     def runWithMonitorStandard(self, block_frame: int, inputMaxChannel: int, outputMaxChannel: int, monitorMaxChannel: int, inputExtraSetting, outputExtraSetting, monitorExtraSetting):
-        with sd.Stream(
-            callback=self.audio_callback_outQueue,
-            dtype="float32",
-            device=(self.settings.serverInputDeviceId, self.settings.serverMonitorDeviceId),
-            blocksize=block_frame,
-            samplerate=self.settings.serverInputAudioSampleRate,
-            channels=(inputMaxChannel, monitorMaxChannel),
-            extra_settings=[inputExtraSetting, monitorExtraSetting]
-        ):
-            with sd.OutputStream(
-                callback=self.audioOutput_callback,
-                dtype="float32",
-                device=self.settings.serverOutputDeviceId,
-                blocksize=block_frame,
-                samplerate=self.settings.serverOutputAudioSampleRate,
-                channels=outputMaxChannel,
-                extra_settings=outputExtraSetting
-            ):
+        with sd.Stream(callback=self.audio_callback_outQueue, dtype="float32", device=(self.settings.serverInputDeviceId, self.settings.serverMonitorDeviceId), blocksize=block_frame, samplerate=self.settings.serverInputAudioSampleRate, channels=(inputMaxChannel, monitorMaxChannel), extra_settings=[inputExtraSetting, monitorExtraSetting]):
+            with sd.OutputStream(callback=self.audioOutput_callback, dtype="float32", device=self.settings.serverOutputDeviceId, blocksize=block_frame, samplerate=self.settings.serverOutputAudioSampleRate, channels=outputMaxChannel, extra_settings=outputExtraSetting):
                 while True:
                     changed = self.checkSettingChanged()
                     if changed:
@@ -293,33 +260,9 @@ class ServerDevice:
                     print(f"                monitor: id:{self.settings.serverMonitorDeviceId}, sr:{self.settings.serverMonitorAudioSampleRate}, ch:{monitorMaxChannel}")
 
     def runWithMonitorAllSeparate(self, block_frame: int, inputMaxChannel: int, outputMaxChannel: int, monitorMaxChannel: int, inputExtraSetting, outputExtraSetting, monitorExtraSetting):
-        with sd.InputStream(
-            callback=self.audioInput_callback_outQueue_monQueue,
-            dtype="float32",
-            device=self.settings.serverInputDeviceId,
-            blocksize=block_frame,
-            samplerate=self.settings.serverInputAudioSampleRate,
-            channels=inputMaxChannel,
-            extra_settings=inputExtraSetting
-        ):
-            with sd.OutputStream(
-                callback=self.audioOutput_callback,
-                dtype="float32",
-                device=self.settings.serverOutputDeviceId,
-                blocksize=block_frame,
-                samplerate=self.settings.serverOutputAudioSampleRate,
-                channels=outputMaxChannel,
-                extra_settings=outputExtraSetting
-            ):
-                with sd.OutputStream(
-                    callback=self.audioMonitor_callback,
-                    dtype="float32",
-                    device=self.settings.serverMonitorDeviceId,
-                    blocksize=block_frame,
-                    samplerate=self.settings.serverMonitorAudioSampleRate,
-                    channels=monitorMaxChannel,
-                    extra_settings=monitorExtraSetting
-                ):
+        with sd.InputStream(callback=self.audioInput_callback_outQueue_monQueue, dtype="float32", device=self.settings.serverInputDeviceId, blocksize=block_frame, samplerate=self.settings.serverInputAudioSampleRate, channels=inputMaxChannel, extra_settings=inputExtraSetting):
+            with sd.OutputStream(callback=self.audioOutput_callback, dtype="float32", device=self.settings.serverOutputDeviceId, blocksize=block_frame, samplerate=self.settings.serverOutputAudioSampleRate, channels=outputMaxChannel, extra_settings=outputExtraSetting):
+                with sd.OutputStream(callback=self.audioMonitor_callback, dtype="float32", device=self.settings.serverMonitorDeviceId, blocksize=block_frame, samplerate=self.settings.serverMonitorAudioSampleRate, channels=monitorMaxChannel, extra_settings=monitorExtraSetting):
                     while True:
                         changed = self.checkSettingChanged()
                         if changed:
@@ -338,6 +281,8 @@ class ServerDevice:
         self.currentModelSamplingRate = -1
         while True:
             if self.settings.serverAudioStated == 0 or self.settings.serverInputDeviceId == -1:
+                sd._terminate()
+                sd._initialize()
                 time.sleep(2)
             else:
                 sd._terminate()
@@ -474,6 +419,7 @@ class ServerDevice:
                 except Exception as e:
                     print("[Voice Changer] processing, ex:", e)
                     import traceback
+
                     traceback.print_exc()
                     time.sleep(2)