WIP:refactoring

This commit is contained in:
wataru 2023-06-16 01:49:49 +09:00
parent be42bb682d
commit 9806ce2f3d
4 changed files with 84 additions and 183 deletions

View File

@ -15,7 +15,11 @@ class ServerAudioDevice:
def list_audio_device():
try:
audioDeviceList = sd.query_devices()
except Exception as e:
print("[Voice Changer] ex: query_devices", e)
return [], []
inputAudioDeviceList = [d for d in audioDeviceList if d["max_input_channels"] > 0]
outputAudioDeviceList = [d for d in audioDeviceList if d["max_output_channels"] > 0]

View File

@ -4,34 +4,66 @@ import numpy as np
import librosa
import sounddevice as sd
from dataclasses import dataclass, asdict, field
from voice_changer.Local.AudioDeviceList import ServerAudioDevice
from voice_changer.VoiceChanger import VoiceChanger
from voice_changer.VoiceChangerManager import VoiceChangerManager
from voice_changer.utils.Timer import Timer
@dataclass()
class ServerDeviceSettings:
enableServerAudio: int = 0 # 0:off, 1:on
serverAudioStated: int = 0 # 0:off, 1:on
serverInputAudioSampleRate: int = 44100
serverOutputAudioSampleRate: int = 44100
serverInputDeviceId: int = -1
serverOutputDeviceId: int = -1
serverReadChunkSize: int = 256
serverInputAudioGain: float = 1.0
serverOutputAudioGain: float = 1.0
EditableServerDeviceSettings = {
"intData": [
"enableServerAudio",
"serverAudioStated",
"serverInputAudioSampleRate",
"serverOutputAudioSampleRate",
"serverInputDeviceId",
"serverOutputDeviceId",
"serverReadChunkSize",
],
"floatData": [
"serverInputAudioGain",
"serverOutputAudioGain",
],
}
class ServerDevice:
def __init__(self):
self.voiceChanger: VoiceChanger | None = None
pass
def __init__(self, voiceChangerManager: VoiceChangerManager):
self.settings = ServerDeviceSettings()
self.voiceChangerManager: VoiceChangerManager = voiceChangerManager
def audio_callback(self, indata: np.ndarray, outdata: np.ndarray, frames, times, status):
if self.voiceChanger is None:
if self.voiceChangerManager.voiceChanger is None:
print("[Voice Changer] voiceChanger is None")
return
try:
indata = indata * self.voiceChanger.settings.serverInputAudioGain
indata = indata * self.settings.serverInputAudioGain
with Timer("all_inference_time") as t:
unpackedData = librosa.to_mono(indata.T) * 32768.0
out_wav, times = self.voiceChanger.on_request(unpackedData)
out_wav, times = self.voiceChangerManager.voiceChanger.on_request(unpackedData)
outputChunnels = outdata.shape[1]
outdata[:] = np.repeat(out_wav, outputChunnels).reshape(-1, outputChunnels) / 32768.0
outdata[:] = outdata * self.voiceChanger.settings.serverOutputAudioGain
outdata[:] = outdata * self.settings.serverOutputAudioGain
all_inference_time = t.secs
performance = [all_inference_time] + times
if self.voiceChanger.emitTo is not None:
self.voiceChanger.emitTo(performance)
self.voiceChanger.settings.performance = [round(x * 1000) for x in performance]
if self.voiceChangerManager.voiceChanger.emitTo is not None:
self.voiceChangerManager.voiceChanger.emitTo(performance)
self.voiceChangerManager.voiceChanger.settings.performance = [round(x * 1000) for x in performance]
except Exception as e:
print("[Voice Changer] ex:", e)
@ -42,31 +74,26 @@ class ServerDevice:
else:
return None
def serverLocal(self, _vc: VoiceChanger):
self.voiceChanger = _vc
vc = self.voiceChanger
def serverLocal(self):
currentInputDeviceId = -1
currentModelSamplingRate = -1
currentOutputDeviceId = -1
currentInputChunkNum = -1
while True:
if vc.settings.serverAudioStated == 0 or vc.settings.serverInputDeviceId == -1 or vc is None:
vc.settings.inputSampleRate = 48000
if self.settings.serverAudioStated == 0 or self.settings.serverInputDeviceId == -1 or self.voiceChangerManager is None:
self.voiceChangerManager.voiceChanger.settings.inputSampleRate = 48000
time.sleep(2)
else:
sd._terminate()
sd._initialize()
sd.default.device[0] = vc.settings.serverInputDeviceId
currentInputDeviceId = vc.settings.serverInputDeviceId
sd.default.device[1] = vc.settings.serverOutputDeviceId
currentOutputDeviceId = vc.settings.serverOutputDeviceId
sd.default.device[0] = self.settings.serverInputDeviceId
currentInputDeviceId = self.settings.serverInputDeviceId
sd.default.device[1] = self.settings.serverOutputDeviceId
currentOutputDeviceId = self.settings.serverOutputDeviceId
currentInputChannelNum = vc.settings.serverAudioInputDevices
serverInputAudioDevice = self.getServerAudioDevice(vc.settings.serverAudioInputDevices, currentInputDeviceId)
serverOutputAudioDevice = self.getServerAudioDevice(vc.settings.serverAudioOutputDevices, currentOutputDeviceId)
serverInputAudioDevice = self.getServerAudioDevice(self.voiceChangerManager.serverAudioInputDevices, currentInputDeviceId)
serverOutputAudioDevice = self.getServerAudioDevice(self.voiceChangerManager.serverAudioOutputDevices, currentOutputDeviceId)
print(serverInputAudioDevice, serverOutputAudioDevice)
if serverInputAudioDevice is None or serverOutputAudioDevice is None:
time.sleep(2)
@ -76,12 +103,12 @@ class ServerDevice:
currentInputChannelNum = serverInputAudioDevice.maxInputChannels
currentOutputChannelNum = serverOutputAudioDevice.maxOutputChannels
currentInputChunkNum = vc.settings.serverReadChunkSize
currentInputChunkNum = self.settings.serverReadChunkSize
block_frame = currentInputChunkNum * 128
# sample rate precheck(alsa cannot use 40000?)
try:
currentModelSamplingRate = self.voiceChanger.voiceChangerModel.get_processing_sampling_rate()
currentModelSamplingRate = self.voiceChangerManager.voiceChanger.voiceChangerModel.get_processing_sampling_rate()
except Exception as e:
print("[Voice Changer] ex: get_processing_sampling_rate", e)
continue
@ -94,38 +121,38 @@ class ServerDevice:
channels=[currentInputChannelNum, currentOutputChannelNum],
):
pass
vc.settings.serverInputAudioSampleRate = currentModelSamplingRate
vc.settings.inputSampleRate = currentModelSamplingRate
print(f"[Voice Changer] sample rate {vc.settings.serverInputAudioSampleRate}")
self.settings.serverInputAudioSampleRate = currentModelSamplingRate
self.voiceChangerManager.voiceChanger.settings.inputSampleRate = currentModelSamplingRate
print(f"[Voice Changer] sample rate {self.settings.serverInputAudioSampleRate}")
except Exception as e:
print(
"[Voice Changer] ex: fallback to device default samplerate",
e,
)
vc.settings.serverInputAudioSampleRate = serverInputAudioDevice.default_samplerate
vc.settings.inputSampleRate = vc.settings.serverInputAudioSampleRate
self.settings.serverInputAudioSampleRate = serverInputAudioDevice.default_samplerate
self.voiceChangerManager.voiceChanger.settings.inputSampleRate = self.settings.serverInputAudioSampleRate
# main loop
try:
with sd.Stream(
callback=self.audio_callback,
blocksize=block_frame,
samplerate=vc.settings.serverInputAudioSampleRate,
samplerate=self.settings.serverInputAudioSampleRate,
dtype="float32",
channels=[currentInputChannelNum, currentOutputChannelNum],
):
while vc.settings.serverAudioStated == 1 and currentInputDeviceId == vc.settings.serverInputDeviceId and currentOutputDeviceId == vc.settings.serverOutputDeviceId and currentModelSamplingRate == self.voiceChanger.voiceChangerModel.get_processing_sampling_rate() and currentInputChunkNum == vc.settings.serverReadChunkSize:
while self.settings.serverAudioStated == 1 and currentInputDeviceId == self.settings.serverInputDeviceId and currentOutputDeviceId == self.settings.serverOutputDeviceId and currentModelSamplingRate == self.voiceChangerManager.voiceChanger.voiceChangerModel.get_processing_sampling_rate() and currentInputChunkNum == self.settings.serverReadChunkSize:
time.sleep(2)
print(
"[Voice Changer] server audio",
vc.settings.performance,
self.voiceChangerManager.settings.performance,
)
print(
"[Voice Changer] info:",
vc.settings.serverAudioStated,
self.settings.serverAudioStated,
currentInputDeviceId,
currentOutputDeviceId,
vc.settings.serverInputAudioSampleRate,
self.settings.serverInputAudioSampleRate,
currentInputChunkNum,
)

View File

@ -51,22 +51,7 @@ class VoiceChangerSettings:
crossFadeOverlapSize: int = 4096
recordIO: int = 0 # 0:off, 1:on
serverAudioInputDevices: list[ServerAudioDevice] = field(default_factory=lambda: [])
serverAudioOutputDevices: list[ServerAudioDevice] = field(default_factory=lambda: [])
enableServerAudio: int = 0 # 0:off, 1:on
serverAudioStated: int = 0 # 0:off, 1:on
# serverInputAudioSampleRate: int = 48000
# serverOutputAudioSampleRate: int = 48000
serverInputAudioSampleRate: int = 44100
serverOutputAudioSampleRate: int = 44100
# serverInputAudioBufferSize: int = 1024 * 24
# serverOutputAudioBufferSize: int = 1024 * 24
serverInputDeviceId: int = -1
serverOutputDeviceId: int = -1
serverReadChunkSize: int = 256
serverInputAudioGain: float = 1.0
serverOutputAudioGain: float = 1.0
performance: list[int] = field(default_factory=lambda: [0, 0, 0, 0])
# ↓mutableな物だけ列挙
@ -75,23 +60,12 @@ class VoiceChangerSettings:
"inputSampleRate",
"crossFadeOverlapSize",
"recordIO",
"enableServerAudio",
"serverAudioStated",
"serverInputAudioSampleRate",
"serverOutputAudioSampleRate",
# "serverInputAudioBufferSize",
# "serverOutputAudioBufferSize",
"serverInputDeviceId",
"serverOutputDeviceId",
"serverReadChunkSize",
]
)
floatData: list[str] = field(
default_factory=lambda: [
"crossFadeOffsetRate",
"crossFadeEndRate",
"serverInputAudioGain",
"serverOutputAudioGain",
]
)
strData: list[str] = field(default_factory=lambda: [])
@ -108,120 +82,6 @@ class VoiceChanger:
# emitTo = None
# def audio_callback(self, indata: np.ndarray, outdata: np.ndarray, frames, times, status):
# try:
# indata = indata * self.settings.serverInputAudioGain
# with Timer("all_inference_time") as t:
# unpackedData = librosa.to_mono(indata.T) * 32768.0
# out_wav, times = self.on_request(unpackedData)
# outputChunnels = outdata.shape[1]
# outdata[:] = np.repeat(out_wav, outputChunnels).reshape(-1, outputChunnels) / 32768.0
# outdata[:] = outdata * self.settings.serverOutputAudioGain
# all_inference_time = t.secs
# performance = [all_inference_time] + times
# if self.emitTo is not None:
# self.emitTo(performance)
# self.settings.performance = [round(x * 1000) for x in performance]
# except Exception as e:
# print("[Voice Changer] ex:", e)
# def getServerAudioDevice(self, audioDeviceList: list[ServerAudioDevice], index: int):
# serverAudioDevice = [x for x in audioDeviceList if x.index == index]
# if len(serverAudioDevice) > 0:
# return serverAudioDevice[0]
# else:
# return None
# def serverLocal(self, _vc):
# vc: VoiceChanger = _vc
# currentInputDeviceId = -1
# currentModelSamplingRate = -1
# currentOutputDeviceId = -1
# currentInputChunkNum = -1
# while True:
# if vc.settings.serverAudioStated == 0 or vc.settings.serverInputDeviceId == -1 or vc.voiceChanger is None:
# vc.settings.inputSampleRate = 48000
# time.sleep(2)
# else:
# sd._terminate()
# sd._initialize()
# sd.default.device[0] = vc.settings.serverInputDeviceId
# currentInputDeviceId = vc.settings.serverInputDeviceId
# sd.default.device[1] = vc.settings.serverOutputDeviceId
# currentOutputDeviceId = vc.settings.serverOutputDeviceId
# currentInputChannelNum = vc.settings.serverAudioInputDevices
# serverInputAudioDevice = self.getServerAudioDevice(vc.settings.serverAudioInputDevices, currentInputDeviceId)
# serverOutputAudioDevice = self.getServerAudioDevice(vc.settings.serverAudioOutputDevices, currentOutputDeviceId)
# print(serverInputAudioDevice, serverOutputAudioDevice)
# if serverInputAudioDevice is None or serverOutputAudioDevice is None:
# time.sleep(2)
# print("serverInputAudioDevice or serverOutputAudioDevice is None")
# continue
# currentInputChannelNum = serverInputAudioDevice.maxInputChannels
# currentOutputChannelNum = serverOutputAudioDevice.maxOutputChannels
# currentInputChunkNum = vc.settings.serverReadChunkSize
# block_frame = currentInputChunkNum * 128
# # sample rate precheck(alsa cannot use 40000?)
# try:
# currentModelSamplingRate = self.voiceChanger.get_processing_sampling_rate()
# except Exception as e:
# print("[Voice Changer] ex: get_processing_sampling_rate", e)
# continue
# try:
# with sd.Stream(
# callback=self.audio_callback,
# blocksize=block_frame,
# samplerate=currentModelSamplingRate,
# dtype="float32",
# channels=[currentInputChannelNum, currentOutputChannelNum],
# ):
# pass
# vc.settings.serverInputAudioSampleRate = currentModelSamplingRate
# vc.settings.inputSampleRate = currentModelSamplingRate
# print(f"[Voice Changer] sample rate {vc.settings.serverInputAudioSampleRate}")
# except Exception as e:
# print(
# "[Voice Changer] ex: fallback to device default samplerate",
# e,
# )
# vc.settings.serverInputAudioSampleRate = serverInputAudioDevice.default_samplerate
# vc.settings.inputSampleRate = vc.settings.serverInputAudioSampleRate
# # main loop
# try:
# with sd.Stream(
# callback=self.audio_callback,
# blocksize=block_frame,
# samplerate=vc.settings.serverInputAudioSampleRate,
# dtype="float32",
# channels=[currentInputChannelNum, currentOutputChannelNum],
# ):
# while vc.settings.serverAudioStated == 1 and currentInputDeviceId == vc.settings.serverInputDeviceId and currentOutputDeviceId == vc.settings.serverOutputDeviceId and currentModelSamplingRate == self.voiceChanger.get_processing_sampling_rate() and currentInputChunkNum == vc.settings.serverReadChunkSize:
# time.sleep(2)
# print(
# "[Voice Changer] server audio",
# self.settings.performance,
# )
# print(
# "[Voice Changer] info:",
# vc.settings.serverAudioStated,
# currentInputDeviceId,
# currentOutputDeviceId,
# vc.settings.serverInputAudioSampleRate,
# currentInputChunkNum,
# )
# except Exception as e:
# print("[Voice Changer] ex:", e)
# time.sleep(2)
def __init__(self, params: VoiceChangerParams, slotIndex: int):
# 初期化
self.settings = VoiceChangerSettings()
@ -238,9 +98,9 @@ class VoiceChanger:
self.ioRecorder: IORecorder | None = None
self.sola_buffer: AudioInOut | None = None
audioinput, audiooutput = list_audio_device()
self.settings.serverAudioInputDevices = audioinput
self.settings.serverAudioOutputDevices = audiooutput
# audioinput, audiooutput = list_audio_device()
# self.settings.serverAudioInputDevices = audioinput
# self.settings.serverAudioOutputDevices = audiooutput
self.slotIndex = slotIndex
self.slotInfo = loadSlotInfo(params.model_dir, self.slotIndex)

View File

@ -3,6 +3,7 @@ import threading
from data.ModelSample import ModelSamples
from data.ModelSlot import ModelSlots, loadSlotInfo
from utils.downloader.SampleDownloader import downloadSample, getSampleInfos
from voice_changer.Local.AudioDeviceList import ServerAudioDevice, list_audio_device
from voice_changer.Local.ServerDevice import ServerDevice
from voice_changer.RVC.ModelSlotGenerator import setSlotAsRVC
@ -26,6 +27,7 @@ class GPUInfo:
@dataclass()
class VoiceChangerManagerSettings:
slotIndex: int
intData: list[str] = field(default_factory=lambda: ["slotIndex"])
@ -43,6 +45,14 @@ class VoiceChangerManager(object):
self.sampleModels: list[ModelSamples] = getSampleInfos(self.params.sample_mode)
self.gpus: list[GPUInfo] = self._get_gpuInfos()
audioinput, audiooutput = list_audio_device()
self.serverAudioInputDevices: list[ServerAudioDevice] = audioinput
self.serverAudioOutputDevices: list[ServerAudioDevice] = audiooutput
# ServerDevice
thread = threading.Thread(target=self.serverDevice.serverLocal, args=(self,))
thread.start()
def _get_gpuInfos(self):
devCount = torch.cuda.device_count()
gpus = []
@ -63,8 +73,6 @@ class VoiceChangerManager(object):
print(f"VoiceChanger Initialized (GPU_NUM:{gpu_num}, mps_enabled:{mps_enabled})")
cls._instance.voiceChanger = VoiceChanger(params, cls._instance.settings.slotIndex)
thread = threading.Thread(target=cls._instance.serverDevice.serverLocal, args=(cls._instance.voiceChanger,))
thread.start()
cls._instance.voiceChanger.prepareModel()
return cls._instance
@ -94,6 +102,8 @@ class VoiceChangerManager(object):
data["slotInfos"] = slotInfos
data["gpus"] = self.gpus
data["sampleModels"] = self.sampleModels
data["serverAudioInputDevices"] = self.serverAudioInputDevices
data["serverAudioOutputDevices"] = self.serverAudioOutputDevices
data["status"] = "OK"
if hasattr(self, "voiceChanger"):