WIP: local auido

This commit is contained in:
wataru 2023-02-18 09:30:50 +09:00
parent b97dc18654
commit fc5c4aa2fb
3 changed files with 185 additions and 122 deletions

View File

@ -1,122 +0,0 @@
import argparse
import pyaudio
import wave
import struct
import socketio
import ssl
from datetime import datetime
import time
context = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2)
context.verify_mode = ssl.CERT_NONE
def setupArgParser():
parser = argparse.ArgumentParser()
parser.add_argument("-p", type=int, default=18888, help="port")
parser.add_argument("-d", type=int, help="device index")
parser.add_argument("-s", type=str, default="", help="sid")
return parser
class MockStream:
"""
オーディオストリーミング入出力をファイル入出力にそのまま置き換えるためのモック
"""
def __init__(self, sampling_rate):
self.sampling_rate = sampling_rate
self.start_count = 2
self.end_count = 2
self.fr = None
self.fw = None
def open_inputfile(self, input_filename):
self.fr = wave.open(input_filename, 'rb')
def open_outputfile(self, output_filename):
self.fw = wave.open(output_filename, 'wb')
self.fw.setnchannels(1)
self.fw.setsampwidth(2)
self.fw.setframerate(self.sampling_rate)
def read(self, length, exception_on_overflow=False):
if self.start_count > 0:
wav = bytes(length * 2)
self.start_count -= 1 # 最初の2回はダミーの空データ送る
else:
wav = self.fr.readframes(length)
if len(wav) <= 0: # データなくなってから最後の2回はダミーの空データを送る
wav = bytes(length * 2)
self.end_count -= 1
if self.end_count < 0:
Hyperparameters.VC_END_FLAG = True
return wav
def write(self, wav):
self.fw.writeframes(wav)
def stop_stream(self):
pass
def close(self):
if self.fr != None:
self.fr.close()
self.fr = None
if self.fw != None:
self.fw.close()
self.fw = None
mock_stream_out = MockStream(24000)
mock_stream_out.open_outputfile("test.wav")
class MyCustomNamespace(socketio.ClientNamespace): # 名前空間を設定するクラス
def on_connect(self):
print('[{}] connect'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
def on_disconnect(self):
print('[{}] disconnect'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
def on_response(self, msg):
print('[{}] response : {}'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), msg))
timestamp = msg[0]
data = msg[1]
unpackedData = struct.unpack('<%sh' % (len(data) // struct.calcsize('<h')), data)
mock_stream_out.write(data)
def my_background_task(sio): # ここにバックグランド処理のコードを書く
while True:
sio.emit('broadcast_message', "aaa", namespace="/test") # ターミナルで入力された文字をサーバーに送信
sio.sleep(1)
if __name__ == '__main__':
parser = setupArgParser()
args, unknown = parser.parse_known_args()
port = args.p
deviceIndex = args.d
sid = args.s
audio = pyaudio.PyAudio()
audio_input_stream = audio.open(
format=pyaudio.paInt16,
channels=1,
rate=24000,
frames_per_buffer=4096,
input_device_index=args.d,
input=True)
sio = socketio.Client(ssl_verify=False)
sio.register_namespace(MyCustomNamespace("/test"))
sio.connect("https://192.168.0.3:18888")
while True:
in_wav = audio_input_stream.read(4096, exception_on_overflow=False)
bin = struct.pack('<%sh' % len(in_wav), *in_wav)
sio.emit('request_message', [1000, bin], namespace="/test")
# sio.start_background_task(my_background_task, sio)

View File

@ -0,0 +1,32 @@
import pyaudio
if __name__ == '__main__':
audio = pyaudio.PyAudio()
audio_input_devices = []
audio_output_devices = []
audio_devices = {}
host_apis = []
for api_index in range(audio.get_host_api_count()):
host_apis.append(audio.get_host_api_info_by_index(api_index)['name'])
for x in range(0, audio.get_device_count()):
device = audio.get_device_info_by_index(x)
try:
deviceName = device['name'].encode('shift-jis').decode('utf-8')
except (UnicodeDecodeError, UnicodeEncodeError):
deviceName = device['name']
deviceIndex = device['index']
hostAPI = host_apis[device['hostApi']]
if device['maxInputChannels'] > 0:
audio_input_devices.append({"kind": "audioinput", "index": deviceIndex, "name": deviceName, "hostAPI": hostAPI})
if device['maxOutputChannels'] > 0:
audio_output_devices.append({"kind": "audiooutput", "index": deviceIndex, "name": deviceName, "hostAPI": hostAPI})
audio_devices["audio_input_devices"] = audio_input_devices
audio_devices["audio_output_devices"] = audio_output_devices
json_compatible_item_data = jsonable_encoder(audio_devices)
print(json_compatible_item_data)

153
client/python/vc_client.py Normal file
View File

@ -0,0 +1,153 @@
import argparse
import pyaudio
import wave
import struct
import socketio
import ssl
from datetime import datetime
import time
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
import signal
import sys
import numpy as np
BUFFER_SIZE = 2048
def setupArgParser():
parser = argparse.ArgumentParser()
parser.add_argument("--url", type=str, default="http://localhost:18888", help="url")
parser.add_argument("--input", type=int, required=True, help="input device index")
parser.add_argument("--output", type=int, default=-1, help="input device index")
parser.add_argument("--to", type=str, default="", help="sid")
return parser
class MockStream:
def __init__(self, sampling_rate):
self.sampling_rate = sampling_rate
self.start_count = 2
self.end_count = 2
self.fr = None
self.fw = None
def open_inputfile(self, input_filename):
self.fr = wave.open(input_filename, 'rb')
def open_outputfile(self, output_filename):
self.fw = wave.open(output_filename, 'wb')
self.fw.setnchannels(1)
self.fw.setsampwidth(2)
self.fw.setframerate(self.sampling_rate)
def read(self, length, exception_on_overflow=False):
if self.start_count > 0:
wav = bytes(length * 2)
self.start_count -= 1
else:
wav = self.fr.readframes(length)
if len(wav) <= 0:
wav = bytes(length * 2)
self.end_count -= 1
if self.end_count < 0:
Hyperparameters.VC_END_FLAG = True
return wav
def write(self, wav):
self.fw.writeframes(wav)
def stop_stream(self):
pass
def close(self):
if self.fr != None:
self.fr.close()
self.fr = None
if self.fw != None:
self.fw.close()
self.fw = None
class MyCustomNamespace(socketio.ClientNamespace):
def __init__(self, namespace: str, audio_output_stream, file_output_stream):
super().__init__(namespace)
self.audio_output_stream = audio_output_stream
self.file_output_stream = file_output_stream
def on_connect(self):
print(f'connected')
def on_disconnect(self):
print(f'disconnected')
def on_response(self, msg):
timestamp = msg[0]
responseTime = time.time() * 1000 - timestamp
data = msg[1]
print(f"RT:{responseTime}msec")
unpackedData = struct.unpack('<%sh' % (len(data) // struct.calcsize('<h')), data)
if self.file_output_stream != None:
self.file_output_stream.write(data)
if self.audio_output_stream != None:
self.audio_output_stream.write(data)
if __name__ == '__main__':
parser = setupArgParser()
args, unknown = parser.parse_known_args()
url = args.url
inputDevice = args.input
outputDevice = args.output
to = args.to
audio = pyaudio.PyAudio()
audio_input_stream = audio.open(
format=pyaudio.paInt16,
channels=1,
rate=24000,
frames_per_buffer=BUFFER_SIZE,
input_device_index=inputDevice,
input=True)
if outputDevice >= 0:
audio_output_stream = audio.open(
format=pyaudio.paInt16,
channels=1,
rate=24000,
frames_per_buffer=BUFFER_SIZE,
input_device_index=outputDevice,
output=True)
else:
audio_output_stream = None
# mock_stream_out = MockStream(24000)
# mock_stream_out.open_outputfile("test.wav")
mock_stream_out = None
# mock_stream_in = MockStream(24000)
# mock_stream_in.open_outputfile("test_in.wav")
my_namespace = MyCustomNamespace("/test", audio_output_stream, mock_stream_out)
sio = socketio.Client(ssl_verify=False)
sio.register_namespace(my_namespace)
sio.connect(url)
try:
while True:
in_wav = audio_input_stream.read(BUFFER_SIZE, exception_on_overflow=False)
sio.emit('request_message', [time.time() * 1000, in_wav], namespace="/test")
except KeyboardInterrupt:
audio_input_stream.stop_stream()
audio_input_stream.close()
audio_output_stream.stop_stream()
audio_output_stream.close()
audio.terminate()
mock_stream_out.close()