mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-01-23 21:45:00 +03:00
fix: server device mode channel num
This commit is contained in:
parent
ea3c5b5740
commit
6fecb5f908
@ -1,10 +1,5 @@
|
|||||||
import pyaudio
|
import sounddevice as sd
|
||||||
|
|
||||||
# import json
|
|
||||||
|
|
||||||
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
from const import ServerAudioDeviceTypes
|
from const import ServerAudioDeviceTypes
|
||||||
|
|
||||||
|
|
||||||
@ -14,44 +9,42 @@ class ServerAudioDevice:
|
|||||||
index: int = 0
|
index: int = 0
|
||||||
name: str = ""
|
name: str = ""
|
||||||
hostAPI: str = ""
|
hostAPI: str = ""
|
||||||
|
maxInputChannels: int = 0
|
||||||
|
maxOutputChannels: int = 0
|
||||||
|
|
||||||
|
|
||||||
def list_audio_device():
|
def list_audio_device():
|
||||||
audio = pyaudio.PyAudio()
|
audioDeviceList = sd.query_devices()
|
||||||
audio_input_devices: list[ServerAudioDevice] = []
|
|
||||||
audio_output_devices: list[ServerAudioDevice] = []
|
|
||||||
host_apis = []
|
|
||||||
|
|
||||||
for api_index in range(audio.get_host_api_count()):
|
inputAudioDeviceList = [d for d in audioDeviceList if d["max_input_channels"] > 0]
|
||||||
host_apis.append(audio.get_host_api_info_by_index(api_index)["name"])
|
outputDeviceList = [d for d in audioDeviceList if d["max_output_channels"] > 0]
|
||||||
|
hostapis = sd.query_hostapis()
|
||||||
|
|
||||||
for x in range(0, audio.get_device_count()):
|
print("input:", inputAudioDeviceList)
|
||||||
device = audio.get_device_info_by_index(x)
|
print("output:", outputDeviceList)
|
||||||
try:
|
print("hostapis", hostapis)
|
||||||
deviceName = device["name"].encode("shift-jis").decode("utf-8")
|
|
||||||
except (UnicodeDecodeError, UnicodeEncodeError):
|
|
||||||
deviceName = device["name"]
|
|
||||||
|
|
||||||
deviceIndex = device["index"]
|
serverAudioInputDevices = []
|
||||||
hostAPI = host_apis[device["hostApi"]]
|
serverAudioOutputDevices = []
|
||||||
|
for d in inputAudioDeviceList:
|
||||||
|
serverInputAudioDevice: ServerAudioDevice = ServerAudioDevice(
|
||||||
|
kind=ServerAudioDeviceTypes.audioinput,
|
||||||
|
index=d["index"],
|
||||||
|
name=d["name"],
|
||||||
|
hostAPI=hostapis[d["hostapi"]]["name"],
|
||||||
|
maxInputChannels=d["max_input_channels"],
|
||||||
|
maxOutputChannels=d["max_output_channels"],
|
||||||
|
)
|
||||||
|
serverAudioInputDevices.append(serverInputAudioDevice)
|
||||||
|
for d in outputDeviceList:
|
||||||
|
serverOutputAudioDevice: ServerAudioDevice = ServerAudioDevice(
|
||||||
|
kind=ServerAudioDeviceTypes.audiooutput,
|
||||||
|
index=d["index"],
|
||||||
|
name=d["name"],
|
||||||
|
hostAPI=hostapis[d["hostapi"]]["name"],
|
||||||
|
maxInputChannels=d["max_input_channels"],
|
||||||
|
maxOutputChannels=d["max_output_channels"],
|
||||||
|
)
|
||||||
|
serverAudioOutputDevices.append(serverOutputAudioDevice)
|
||||||
|
|
||||||
if device["maxInputChannels"] > 0:
|
return serverAudioInputDevices, serverAudioOutputDevices
|
||||||
audio_input_devices.append(
|
|
||||||
ServerAudioDevice(
|
|
||||||
kind=ServerAudioDeviceTypes.audioinput,
|
|
||||||
index=deviceIndex,
|
|
||||||
name=deviceName,
|
|
||||||
hostAPI=hostAPI,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
if device["maxOutputChannels"] > 0:
|
|
||||||
audio_output_devices.append(
|
|
||||||
ServerAudioDevice(
|
|
||||||
kind=ServerAudioDeviceTypes.audiooutput,
|
|
||||||
index=deviceIndex,
|
|
||||||
name=deviceName,
|
|
||||||
hostAPI=hostAPI,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
return audio_input_devices, audio_output_devices
|
|
||||||
|
@ -102,17 +102,29 @@ class VoiceChanger:
|
|||||||
def audio_callback(
|
def audio_callback(
|
||||||
self, indata: np.ndarray, outdata: np.ndarray, frames, times, status
|
self, indata: np.ndarray, outdata: np.ndarray, frames, times, status
|
||||||
):
|
):
|
||||||
# print(indata)
|
|
||||||
try:
|
try:
|
||||||
with Timer("all_inference_time") as t:
|
with Timer("all_inference_time") as t:
|
||||||
unpackedData = librosa.to_mono(indata.T) * 32768.0
|
unpackedData = librosa.to_mono(indata.T) * 32768.0
|
||||||
out_wav, times = self.on_request(unpackedData)
|
out_wav, times = self.on_request(unpackedData)
|
||||||
outdata[:] = np.repeat(out_wav, 2).reshape(-1, 2) / 32768.0
|
outputChunnels = outdata.shape[1]
|
||||||
|
outdata[:] = (
|
||||||
|
np.repeat(out_wav, outputChunnels).reshape(-1, outputChunnels)
|
||||||
|
/ 32768.0
|
||||||
|
)
|
||||||
all_inference_time = t.secs
|
all_inference_time = t.secs
|
||||||
performance = [all_inference_time] + times
|
performance = [all_inference_time] + times
|
||||||
self.settings.performance = [round(x * 1000) for x in performance]
|
self.settings.performance = [round(x * 1000) for x in performance]
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print("[Voice Changer] ex:", e)
|
||||||
|
|
||||||
|
def getServerAudioDevice(
|
||||||
|
self, audioDeviceList: list[ServerAudioDevice], index: int
|
||||||
|
):
|
||||||
|
serverAudioDevice = [x for x in audioDeviceList if x.index == index]
|
||||||
|
if len(serverAudioDevice) > 0:
|
||||||
|
return serverAudioDevice[0]
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
def serverLocal(self, _vc):
|
def serverLocal(self, _vc):
|
||||||
vc: VoiceChanger = _vc
|
vc: VoiceChanger = _vc
|
||||||
@ -131,12 +143,28 @@ class VoiceChanger:
|
|||||||
else:
|
else:
|
||||||
sd._terminate()
|
sd._terminate()
|
||||||
sd._initialize()
|
sd._initialize()
|
||||||
if currentInputDeviceId != vc.settings.serverInputDeviceId:
|
|
||||||
sd.default.device[0] = vc.settings.serverInputDeviceId
|
sd.default.device[0] = vc.settings.serverInputDeviceId
|
||||||
currentInputDeviceId = vc.settings.serverInputDeviceId
|
currentInputDeviceId = vc.settings.serverInputDeviceId
|
||||||
if currentOutputDeviceId != vc.settings.serverOutputDeviceId:
|
sd.default.device[1] = vc.settings.serverOutputDeviceId
|
||||||
sd.default.device[1] = vc.settings.serverOutputDeviceId
|
currentOutputDeviceId = vc.settings.serverOutputDeviceId
|
||||||
currentOutputDeviceId = vc.settings.serverOutputDeviceId
|
|
||||||
|
currentInputChannelNum = vc.settings.serverAudioInputDevices
|
||||||
|
|
||||||
|
serverInputAudioDevice = self.getServerAudioDevice(
|
||||||
|
vc.settings.serverAudioInputDevices, currentInputDeviceId
|
||||||
|
)
|
||||||
|
serverOutputAudioDevice = self.getServerAudioDevice(
|
||||||
|
vc.settings.serverAudioOutputDevices, currentOutputDeviceId
|
||||||
|
)
|
||||||
|
print(serverInputAudioDevice, serverOutputAudioDevice)
|
||||||
|
if serverInputAudioDevice is None or serverOutputAudioDevice is None:
|
||||||
|
time.sleep(2)
|
||||||
|
print("serverInputAudioDevice or serverOutputAudioDevice is None")
|
||||||
|
continue
|
||||||
|
|
||||||
|
currentInputChannelNum = serverInputAudioDevice.maxInputChannels
|
||||||
|
currentOutputChannelNum = serverInputAudioDevice.maxOutputChannels
|
||||||
|
|
||||||
vc.settings.serverInputAudioSampleRate = (
|
vc.settings.serverInputAudioSampleRate = (
|
||||||
self.voiceChanger.get_processing_sampling_rate()
|
self.voiceChanger.get_processing_sampling_rate()
|
||||||
@ -144,14 +172,16 @@ class VoiceChanger:
|
|||||||
currentInputSampleRate = vc.settings.serverInputAudioSampleRate
|
currentInputSampleRate = vc.settings.serverInputAudioSampleRate
|
||||||
currentInputChunkNum = vc.settings.serverReadChunkSize
|
currentInputChunkNum = vc.settings.serverReadChunkSize
|
||||||
block_frame = currentInputChunkNum * 128
|
block_frame = currentInputChunkNum * 128
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with sd.Stream(
|
with sd.Stream(
|
||||||
callback=self.audio_callback,
|
callback=self.audio_callback,
|
||||||
blocksize=block_frame,
|
blocksize=block_frame,
|
||||||
samplerate=currentInputSampleRate,
|
samplerate=currentInputSampleRate,
|
||||||
dtype="float32",
|
dtype="float32",
|
||||||
channels=1,
|
channels=[currentInputChannelNum, currentOutputChannelNum],
|
||||||
):
|
):
|
||||||
|
print()
|
||||||
while (
|
while (
|
||||||
vc.settings.serverAudioStated == 1
|
vc.settings.serverAudioStated == 1
|
||||||
and currentInputDeviceId == vc.settings.serverInputDeviceId
|
and currentInputDeviceId == vc.settings.serverInputDeviceId
|
||||||
@ -182,8 +212,7 @@ class VoiceChanger:
|
|||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print("[Voice Changer] ex:", e)
|
||||||
print()
|
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
|
|
||||||
def __init__(self, params: VoiceChangerParams):
|
def __init__(self, params: VoiceChangerParams):
|
||||||
|
Loading…
Reference in New Issue
Block a user