fix: server device mode channel num

This commit is contained in:
wataru 2023-05-13 14:30:15 +09:00
parent ea3c5b5740
commit 6fecb5f908
2 changed files with 74 additions and 52 deletions

View File

@ -1,10 +1,5 @@
import pyaudio import sounddevice as sd
# import json
from dataclasses import dataclass from dataclasses import dataclass
from const import ServerAudioDeviceTypes from const import ServerAudioDeviceTypes
@ -14,44 +9,42 @@ class ServerAudioDevice:
index: int = 0 index: int = 0
name: str = "" name: str = ""
hostAPI: str = "" hostAPI: str = ""
maxInputChannels: int = 0
maxOutputChannels: int = 0
def list_audio_device(): def list_audio_device():
audio = pyaudio.PyAudio() audioDeviceList = sd.query_devices()
audio_input_devices: list[ServerAudioDevice] = []
audio_output_devices: list[ServerAudioDevice] = []
host_apis = []
for api_index in range(audio.get_host_api_count()): inputAudioDeviceList = [d for d in audioDeviceList if d["max_input_channels"] > 0]
host_apis.append(audio.get_host_api_info_by_index(api_index)["name"]) outputDeviceList = [d for d in audioDeviceList if d["max_output_channels"] > 0]
hostapis = sd.query_hostapis()
for x in range(0, audio.get_device_count()): print("input:", inputAudioDeviceList)
device = audio.get_device_info_by_index(x) print("output:", outputDeviceList)
try: print("hostapis", hostapis)
deviceName = device["name"].encode("shift-jis").decode("utf-8")
except (UnicodeDecodeError, UnicodeEncodeError):
deviceName = device["name"]
deviceIndex = device["index"] serverAudioInputDevices = []
hostAPI = host_apis[device["hostApi"]] serverAudioOutputDevices = []
for d in inputAudioDeviceList:
serverInputAudioDevice: ServerAudioDevice = ServerAudioDevice(
kind=ServerAudioDeviceTypes.audioinput,
index=d["index"],
name=d["name"],
hostAPI=hostapis[d["hostapi"]]["name"],
maxInputChannels=d["max_input_channels"],
maxOutputChannels=d["max_output_channels"],
)
serverAudioInputDevices.append(serverInputAudioDevice)
for d in outputDeviceList:
serverOutputAudioDevice: ServerAudioDevice = ServerAudioDevice(
kind=ServerAudioDeviceTypes.audiooutput,
index=d["index"],
name=d["name"],
hostAPI=hostapis[d["hostapi"]]["name"],
maxInputChannels=d["max_input_channels"],
maxOutputChannels=d["max_output_channels"],
)
serverAudioOutputDevices.append(serverOutputAudioDevice)
if device["maxInputChannels"] > 0: return serverAudioInputDevices, serverAudioOutputDevices
audio_input_devices.append(
ServerAudioDevice(
kind=ServerAudioDeviceTypes.audioinput,
index=deviceIndex,
name=deviceName,
hostAPI=hostAPI,
)
)
if device["maxOutputChannels"] > 0:
audio_output_devices.append(
ServerAudioDevice(
kind=ServerAudioDeviceTypes.audiooutput,
index=deviceIndex,
name=deviceName,
hostAPI=hostAPI,
)
)
return audio_input_devices, audio_output_devices

View File

@ -102,17 +102,29 @@ class VoiceChanger:
def audio_callback( def audio_callback(
self, indata: np.ndarray, outdata: np.ndarray, frames, times, status self, indata: np.ndarray, outdata: np.ndarray, frames, times, status
): ):
# print(indata)
try: try:
with Timer("all_inference_time") as t: with Timer("all_inference_time") as t:
unpackedData = librosa.to_mono(indata.T) * 32768.0 unpackedData = librosa.to_mono(indata.T) * 32768.0
out_wav, times = self.on_request(unpackedData) out_wav, times = self.on_request(unpackedData)
outdata[:] = np.repeat(out_wav, 2).reshape(-1, 2) / 32768.0 outputChunnels = outdata.shape[1]
outdata[:] = (
np.repeat(out_wav, outputChunnels).reshape(-1, outputChunnels)
/ 32768.0
)
all_inference_time = t.secs all_inference_time = t.secs
performance = [all_inference_time] + times performance = [all_inference_time] + times
self.settings.performance = [round(x * 1000) for x in performance] self.settings.performance = [round(x * 1000) for x in performance]
except Exception as e: except Exception as e:
print(e) print("[Voice Changer] ex:", e)
def getServerAudioDevice(
self, audioDeviceList: list[ServerAudioDevice], index: int
):
serverAudioDevice = [x for x in audioDeviceList if x.index == index]
if len(serverAudioDevice) > 0:
return serverAudioDevice[0]
else:
return None
def serverLocal(self, _vc): def serverLocal(self, _vc):
vc: VoiceChanger = _vc vc: VoiceChanger = _vc
@ -131,12 +143,28 @@ class VoiceChanger:
else: else:
sd._terminate() sd._terminate()
sd._initialize() sd._initialize()
if currentInputDeviceId != vc.settings.serverInputDeviceId:
sd.default.device[0] = vc.settings.serverInputDeviceId sd.default.device[0] = vc.settings.serverInputDeviceId
currentInputDeviceId = vc.settings.serverInputDeviceId currentInputDeviceId = vc.settings.serverInputDeviceId
if currentOutputDeviceId != vc.settings.serverOutputDeviceId: sd.default.device[1] = vc.settings.serverOutputDeviceId
sd.default.device[1] = vc.settings.serverOutputDeviceId currentOutputDeviceId = vc.settings.serverOutputDeviceId
currentOutputDeviceId = vc.settings.serverOutputDeviceId
currentInputChannelNum = vc.settings.serverAudioInputDevices
serverInputAudioDevice = self.getServerAudioDevice(
vc.settings.serverAudioInputDevices, currentInputDeviceId
)
serverOutputAudioDevice = self.getServerAudioDevice(
vc.settings.serverAudioOutputDevices, currentOutputDeviceId
)
print(serverInputAudioDevice, serverOutputAudioDevice)
if serverInputAudioDevice is None or serverOutputAudioDevice is None:
time.sleep(2)
print("serverInputAudioDevice or serverOutputAudioDevice is None")
continue
currentInputChannelNum = serverInputAudioDevice.maxInputChannels
currentOutputChannelNum = serverInputAudioDevice.maxOutputChannels
vc.settings.serverInputAudioSampleRate = ( vc.settings.serverInputAudioSampleRate = (
self.voiceChanger.get_processing_sampling_rate() self.voiceChanger.get_processing_sampling_rate()
@ -144,14 +172,16 @@ class VoiceChanger:
currentInputSampleRate = vc.settings.serverInputAudioSampleRate currentInputSampleRate = vc.settings.serverInputAudioSampleRate
currentInputChunkNum = vc.settings.serverReadChunkSize currentInputChunkNum = vc.settings.serverReadChunkSize
block_frame = currentInputChunkNum * 128 block_frame = currentInputChunkNum * 128
try: try:
with sd.Stream( with sd.Stream(
callback=self.audio_callback, callback=self.audio_callback,
blocksize=block_frame, blocksize=block_frame,
samplerate=currentInputSampleRate, samplerate=currentInputSampleRate,
dtype="float32", dtype="float32",
channels=1, channels=[currentInputChannelNum, currentOutputChannelNum],
): ):
print()
while ( while (
vc.settings.serverAudioStated == 1 vc.settings.serverAudioStated == 1
and currentInputDeviceId == vc.settings.serverInputDeviceId and currentInputDeviceId == vc.settings.serverInputDeviceId
@ -182,8 +212,7 @@ class VoiceChanger:
) )
except Exception as e: except Exception as e:
print(e) print("[Voice Changer] ex:", e)
print()
time.sleep(2) time.sleep(2)
def __init__(self, params: VoiceChangerParams): def __init__(self, params: VoiceChangerParams):