fix: server device mode channel num

This commit is contained in:
wataru 2023-05-13 14:30:15 +09:00
parent ea3c5b5740
commit 6fecb5f908
2 changed files with 74 additions and 52 deletions

View File

@ -1,10 +1,5 @@
import pyaudio
# import json
import sounddevice as sd
from dataclasses import dataclass
from const import ServerAudioDeviceTypes
@ -14,44 +9,42 @@ class ServerAudioDevice:
index: int = 0
name: str = ""
hostAPI: str = ""
maxInputChannels: int = 0
maxOutputChannels: int = 0
def list_audio_device():
audio = pyaudio.PyAudio()
audio_input_devices: list[ServerAudioDevice] = []
audio_output_devices: list[ServerAudioDevice] = []
host_apis = []
audioDeviceList = sd.query_devices()
for api_index in range(audio.get_host_api_count()):
host_apis.append(audio.get_host_api_info_by_index(api_index)["name"])
inputAudioDeviceList = [d for d in audioDeviceList if d["max_input_channels"] > 0]
outputDeviceList = [d for d in audioDeviceList if d["max_output_channels"] > 0]
hostapis = sd.query_hostapis()
for x in range(0, audio.get_device_count()):
device = audio.get_device_info_by_index(x)
try:
deviceName = device["name"].encode("shift-jis").decode("utf-8")
except (UnicodeDecodeError, UnicodeEncodeError):
deviceName = device["name"]
print("input:", inputAudioDeviceList)
print("output:", outputDeviceList)
print("hostapis", hostapis)
deviceIndex = device["index"]
hostAPI = host_apis[device["hostApi"]]
serverAudioInputDevices = []
serverAudioOutputDevices = []
for d in inputAudioDeviceList:
serverInputAudioDevice: ServerAudioDevice = ServerAudioDevice(
kind=ServerAudioDeviceTypes.audioinput,
index=d["index"],
name=d["name"],
hostAPI=hostapis[d["hostapi"]]["name"],
maxInputChannels=d["max_input_channels"],
maxOutputChannels=d["max_output_channels"],
)
serverAudioInputDevices.append(serverInputAudioDevice)
for d in outputDeviceList:
serverOutputAudioDevice: ServerAudioDevice = ServerAudioDevice(
kind=ServerAudioDeviceTypes.audiooutput,
index=d["index"],
name=d["name"],
hostAPI=hostapis[d["hostapi"]]["name"],
maxInputChannels=d["max_input_channels"],
maxOutputChannels=d["max_output_channels"],
)
serverAudioOutputDevices.append(serverOutputAudioDevice)
if device["maxInputChannels"] > 0:
audio_input_devices.append(
ServerAudioDevice(
kind=ServerAudioDeviceTypes.audioinput,
index=deviceIndex,
name=deviceName,
hostAPI=hostAPI,
)
)
if device["maxOutputChannels"] > 0:
audio_output_devices.append(
ServerAudioDevice(
kind=ServerAudioDeviceTypes.audiooutput,
index=deviceIndex,
name=deviceName,
hostAPI=hostAPI,
)
)
return audio_input_devices, audio_output_devices
return serverAudioInputDevices, serverAudioOutputDevices

View File

@ -102,17 +102,29 @@ class VoiceChanger:
def audio_callback(
self, indata: np.ndarray, outdata: np.ndarray, frames, times, status
):
# print(indata)
try:
with Timer("all_inference_time") as t:
unpackedData = librosa.to_mono(indata.T) * 32768.0
out_wav, times = self.on_request(unpackedData)
outdata[:] = np.repeat(out_wav, 2).reshape(-1, 2) / 32768.0
outputChunnels = outdata.shape[1]
outdata[:] = (
np.repeat(out_wav, outputChunnels).reshape(-1, outputChunnels)
/ 32768.0
)
all_inference_time = t.secs
performance = [all_inference_time] + times
self.settings.performance = [round(x * 1000) for x in performance]
except Exception as e:
print(e)
print("[Voice Changer] ex:", e)
def getServerAudioDevice(
self, audioDeviceList: list[ServerAudioDevice], index: int
):
serverAudioDevice = [x for x in audioDeviceList if x.index == index]
if len(serverAudioDevice) > 0:
return serverAudioDevice[0]
else:
return None
def serverLocal(self, _vc):
vc: VoiceChanger = _vc
@ -131,12 +143,28 @@ class VoiceChanger:
else:
sd._terminate()
sd._initialize()
if currentInputDeviceId != vc.settings.serverInputDeviceId:
sd.default.device[0] = vc.settings.serverInputDeviceId
currentInputDeviceId = vc.settings.serverInputDeviceId
if currentOutputDeviceId != vc.settings.serverOutputDeviceId:
sd.default.device[1] = vc.settings.serverOutputDeviceId
currentOutputDeviceId = vc.settings.serverOutputDeviceId
sd.default.device[0] = vc.settings.serverInputDeviceId
currentInputDeviceId = vc.settings.serverInputDeviceId
sd.default.device[1] = vc.settings.serverOutputDeviceId
currentOutputDeviceId = vc.settings.serverOutputDeviceId
currentInputChannelNum = vc.settings.serverAudioInputDevices
serverInputAudioDevice = self.getServerAudioDevice(
vc.settings.serverAudioInputDevices, currentInputDeviceId
)
serverOutputAudioDevice = self.getServerAudioDevice(
vc.settings.serverAudioOutputDevices, currentOutputDeviceId
)
print(serverInputAudioDevice, serverOutputAudioDevice)
if serverInputAudioDevice is None or serverOutputAudioDevice is None:
time.sleep(2)
print("serverInputAudioDevice or serverOutputAudioDevice is None")
continue
currentInputChannelNum = serverInputAudioDevice.maxInputChannels
currentOutputChannelNum = serverInputAudioDevice.maxOutputChannels
vc.settings.serverInputAudioSampleRate = (
self.voiceChanger.get_processing_sampling_rate()
@ -144,14 +172,16 @@ class VoiceChanger:
currentInputSampleRate = vc.settings.serverInputAudioSampleRate
currentInputChunkNum = vc.settings.serverReadChunkSize
block_frame = currentInputChunkNum * 128
try:
with sd.Stream(
callback=self.audio_callback,
blocksize=block_frame,
samplerate=currentInputSampleRate,
dtype="float32",
channels=1,
channels=[currentInputChannelNum, currentOutputChannelNum],
):
print()
while (
vc.settings.serverAudioStated == 1
and currentInputDeviceId == vc.settings.serverInputDeviceId
@ -182,8 +212,7 @@ class VoiceChanger:
)
except Exception as e:
print(e)
print()
print("[Voice Changer] ex:", e)
time.sleep(2)
def __init__(self, params: VoiceChangerParams):