WIP: local auido

2025-03-13 19:34:02 +03:00 · 2023-02-18 09:30:50 +09:00 · 2023-02-18 09:30:50 +09:00 · fc5c4aa2fb
commit fc5c4aa2fb
parent b97dc18654
3 changed files with 185 additions and 122 deletions
--- a/client/native/cli_client.py
+++ b/client/native/cli_client.py
@ -1,122 +0,0 @@
-import argparse
-import pyaudio
-import wave
-import struct
-import socketio
-import ssl
-from datetime import datetime
-import time
-
-context = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2)
-context.verify_mode = ssl.CERT_NONE
-
-
-def setupArgParser():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("-p", type=int, default=18888, help="port")
-    parser.add_argument("-d", type=int, help="device index")
-    parser.add_argument("-s", type=str, default="", help="sid")
-
-    return parser
-
-
-class MockStream:
-    """
-    オーディオストリーミング入出力をファイル入出力にそのまま置き換えるためのモック
-    """
-
-    def __init__(self, sampling_rate):
-        self.sampling_rate = sampling_rate
-        self.start_count = 2
-        self.end_count = 2
-        self.fr = None
-        self.fw = None
-
-    def open_inputfile(self, input_filename):
-        self.fr = wave.open(input_filename, 'rb')
-
-    def open_outputfile(self, output_filename):
-        self.fw = wave.open(output_filename, 'wb')
-        self.fw.setnchannels(1)
-        self.fw.setsampwidth(2)
-        self.fw.setframerate(self.sampling_rate)
-
-    def read(self, length, exception_on_overflow=False):
-        if self.start_count > 0:
-            wav = bytes(length * 2)
-            self.start_count -= 1  # 最初の2回はダミーの空データ送る
-        else:
-            wav = self.fr.readframes(length)
-        if len(wav) <= 0:  # データなくなってから最後の2回はダミーの空データを送る
-            wav = bytes(length * 2)
-            self.end_count -= 1
-            if self.end_count < 0:
-                Hyperparameters.VC_END_FLAG = True
-        return wav
-
-    def write(self, wav):
-        self.fw.writeframes(wav)
-
-    def stop_stream(self):
-        pass
-
-    def close(self):
-        if self.fr != None:
-            self.fr.close()
-            self.fr = None
-        if self.fw != None:
-            self.fw.close()
-            self.fw = None
-
-
-mock_stream_out = MockStream(24000)
-mock_stream_out.open_outputfile("test.wav")
-
-
-class MyCustomNamespace(socketio.ClientNamespace):  # 名前空間を設定するクラス
-    def on_connect(self):
-        print('[{}] connect'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
-
-    def on_disconnect(self):
-        print('[{}] disconnect'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
-
-    def on_response(self, msg):
-        print('[{}] response : {}'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), msg))
-        timestamp = msg[0]
-        data = msg[1]
-        unpackedData = struct.unpack('<%sh' % (len(data) // struct.calcsize('<h')), data)
-        mock_stream_out.write(data)
-
-
-def my_background_task(sio):  # ここにバックグランド処理のコードを書く
-    while True:
-
-        sio.emit('broadcast_message', "aaa", namespace="/test")  # ターミナルで入力された文字をサーバーに送信
-        sio.sleep(1)
-
-
-if __name__ == '__main__':
-    parser = setupArgParser()
-    args, unknown = parser.parse_known_args()
-
-    port = args.p
-    deviceIndex = args.d
-    sid = args.s
-
-    audio = pyaudio.PyAudio()
-    audio_input_stream = audio.open(
-        format=pyaudio.paInt16,
-        channels=1,
-        rate=24000,
-        frames_per_buffer=4096,
-        input_device_index=args.d,
-        input=True)
-
-    sio = socketio.Client(ssl_verify=False)
-    sio.register_namespace(MyCustomNamespace("/test"))
-    sio.connect("https://192.168.0.3:18888")
-    while True:
-        in_wav = audio_input_stream.read(4096, exception_on_overflow=False)
-        bin = struct.pack('<%sh' % len(in_wav), *in_wav)
-        sio.emit('request_message', [1000, bin], namespace="/test")
-        # sio.start_background_task(my_background_task, sio)
--- a/client/python/audio_device_list.py
+++ b/client/python/audio_device_list.py
@ -0,0 +1,32 @@
+import pyaudio
+
+if __name__ == '__main__':
+    audio = pyaudio.PyAudio()
+    audio_input_devices = []
+    audio_output_devices = []
+    audio_devices = {}
+    host_apis = []
+
+    for api_index in range(audio.get_host_api_count()):
+        host_apis.append(audio.get_host_api_info_by_index(api_index)['name'])
+
+    for x in range(0, audio.get_device_count()):
+        device = audio.get_device_info_by_index(x)
+        try:
+            deviceName = device['name'].encode('shift-jis').decode('utf-8')
+        except (UnicodeDecodeError, UnicodeEncodeError):
+            deviceName = device['name']
+
+        deviceIndex = device['index']
+        hostAPI = host_apis[device['hostApi']]
+
+        if device['maxInputChannels'] > 0:
+            audio_input_devices.append({"kind": "audioinput", "index": deviceIndex, "name": deviceName, "hostAPI": hostAPI})
+        if device['maxOutputChannels'] > 0:
+            audio_output_devices.append({"kind": "audiooutput", "index": deviceIndex, "name": deviceName, "hostAPI": hostAPI})
+    audio_devices["audio_input_devices"] = audio_input_devices
+    audio_devices["audio_output_devices"] = audio_output_devices
+
+    json_compatible_item_data = jsonable_encoder(audio_devices)
+
+    print(json_compatible_item_data)
--- a/client/python/vc_client.py
+++ b/client/python/vc_client.py
@ -0,0 +1,153 @@
+import argparse
+import pyaudio
+import wave
+import struct
+import socketio
+import ssl
+from datetime import datetime
+import time
+
+import urllib3
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+import signal
+import sys
+import numpy as np
+
+BUFFER_SIZE = 2048
+
+
+def setupArgParser():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--url", type=str, default="http://localhost:18888", help="url")
+    parser.add_argument("--input", type=int, required=True, help="input device index")
+    parser.add_argument("--output", type=int, default=-1, help="input device index")
+    parser.add_argument("--to", type=str, default="", help="sid")
+
+    return parser
+
+
+class MockStream:
+
+    def __init__(self, sampling_rate):
+        self.sampling_rate = sampling_rate
+        self.start_count = 2
+        self.end_count = 2
+        self.fr = None
+        self.fw = None
+
+    def open_inputfile(self, input_filename):
+        self.fr = wave.open(input_filename, 'rb')
+
+    def open_outputfile(self, output_filename):
+        self.fw = wave.open(output_filename, 'wb')
+        self.fw.setnchannels(1)
+        self.fw.setsampwidth(2)
+        self.fw.setframerate(self.sampling_rate)
+
+    def read(self, length, exception_on_overflow=False):
+        if self.start_count > 0:
+            wav = bytes(length * 2)
+            self.start_count -= 1
+        else:
+            wav = self.fr.readframes(length)
+        if len(wav) <= 0:
+            wav = bytes(length * 2)
+            self.end_count -= 1
+            if self.end_count < 0:
+                Hyperparameters.VC_END_FLAG = True
+        return wav
+
+    def write(self, wav):
+        self.fw.writeframes(wav)
+
+    def stop_stream(self):
+        pass
+
+    def close(self):
+        if self.fr != None:
+            self.fr.close()
+            self.fr = None
+        if self.fw != None:
+            self.fw.close()
+            self.fw = None
+
+
+class MyCustomNamespace(socketio.ClientNamespace):
+    def __init__(self, namespace: str, audio_output_stream, file_output_stream):
+        super().__init__(namespace)
+        self.audio_output_stream = audio_output_stream
+        self.file_output_stream = file_output_stream
+
+    def on_connect(self):
+        print(f'connected')
+
+    def on_disconnect(self):
+        print(f'disconnected')
+
+    def on_response(self, msg):
+        timestamp = msg[0]
+        responseTime = time.time() * 1000 - timestamp
+        data = msg[1]
+        print(f"RT:{responseTime}msec")
+        unpackedData = struct.unpack('<%sh' % (len(data) // struct.calcsize('<h')), data)
+
+        if self.file_output_stream != None:
+            self.file_output_stream.write(data)
+        if self.audio_output_stream != None:
+            self.audio_output_stream.write(data)
+
+
+if __name__ == '__main__':
+    parser = setupArgParser()
+    args, unknown = parser.parse_known_args()
+
+    url = args.url
+    inputDevice = args.input
+    outputDevice = args.output
+    to = args.to
+
+    audio = pyaudio.PyAudio()
+    audio_input_stream = audio.open(
+        format=pyaudio.paInt16,
+        channels=1,
+        rate=24000,
+        frames_per_buffer=BUFFER_SIZE,
+        input_device_index=inputDevice,
+        input=True)
+
+    if outputDevice >= 0:
+        audio_output_stream = audio.open(
+            format=pyaudio.paInt16,
+            channels=1,
+            rate=24000,
+            frames_per_buffer=BUFFER_SIZE,
+            input_device_index=outputDevice,
+            output=True)
+    else:
+        audio_output_stream = None
+
+    # mock_stream_out = MockStream(24000)
+    # mock_stream_out.open_outputfile("test.wav")
+    mock_stream_out = None
+
+    # mock_stream_in = MockStream(24000)
+    # mock_stream_in.open_outputfile("test_in.wav")
+
+    my_namespace = MyCustomNamespace("/test", audio_output_stream, mock_stream_out)
+
+    sio = socketio.Client(ssl_verify=False)
+    sio.register_namespace(my_namespace)
+    sio.connect(url)
+    try:
+        while True:
+            in_wav = audio_input_stream.read(BUFFER_SIZE, exception_on_overflow=False)
+            sio.emit('request_message', [time.time() * 1000, in_wav], namespace="/test")
+    except KeyboardInterrupt:
+        audio_input_stream.stop_stream()
+        audio_input_stream.close()
+        audio_output_stream.stop_stream()
+        audio_output_stream.close()
+        audio.terminate()
+
+        mock_stream_out.close()