mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-03-12 19:07:49 +03:00
WIP:separate server device
This commit is contained in:
parent
3c1d87751e
commit
8a8386640d
@ -25,8 +25,8 @@ def list_audio_device():
|
|||||||
# print("output:", outputDeviceList)
|
# print("output:", outputDeviceList)
|
||||||
# print("hostapis", hostapis)
|
# print("hostapis", hostapis)
|
||||||
|
|
||||||
serverAudioInputDevices = []
|
serverAudioInputDevices: list[ServerAudioDevice] = []
|
||||||
serverAudioOutputDevices = []
|
serverAudioOutputDevices: list[ServerAudioDevice] = []
|
||||||
for d in inputAudioDeviceList:
|
for d in inputAudioDeviceList:
|
||||||
serverInputAudioDevice: ServerAudioDevice = ServerAudioDevice(
|
serverInputAudioDevice: ServerAudioDevice = ServerAudioDevice(
|
||||||
kind=ServerAudioDeviceTypes.audioinput,
|
kind=ServerAudioDeviceTypes.audioinput,
|
||||||
|
190
server/voice_changer/Local/ServerDevice.py
Normal file
190
server/voice_changer/Local/ServerDevice.py
Normal file
@ -0,0 +1,190 @@
|
|||||||
|
from dataclasses import dataclass, asdict
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from voice_changer.Local.AudioDeviceList import list_audio_device
|
||||||
|
import time
|
||||||
|
import sounddevice as sd
|
||||||
|
from voice_changer.utils.Timer import Timer
|
||||||
|
import librosa
|
||||||
|
|
||||||
|
from voice_changer.utils.VoiceChangerModel import AudioInOut
|
||||||
|
from typing import Protocol
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ServerDeviceSettings:
|
||||||
|
enableServerAudio: int = 0 # 0:off, 1:on
|
||||||
|
serverAudioStated: int = 0 # 0:off, 1:on
|
||||||
|
serverInputAudioSampleRate: int = 44100
|
||||||
|
serverOutputAudioSampleRate: int = 44100
|
||||||
|
serverInputDeviceId: int = -1
|
||||||
|
serverOutputDeviceId: int = -1
|
||||||
|
serverReadChunkSize: int = 256
|
||||||
|
serverInputAudioGain: float = 1.0
|
||||||
|
serverOutputAudioGain: float = 1.0
|
||||||
|
|
||||||
|
|
||||||
|
EditableServerDeviceSettings = {
|
||||||
|
"intData": [
|
||||||
|
"enableServerAudio",
|
||||||
|
"serverAudioStated",
|
||||||
|
"serverInputAudioSampleRate",
|
||||||
|
"serverOutputAudioSampleRate",
|
||||||
|
"serverInputDeviceId",
|
||||||
|
"serverOutputDeviceId",
|
||||||
|
"serverReadChunkSize",
|
||||||
|
],
|
||||||
|
"floatData": [
|
||||||
|
"serverInputAudioGain",
|
||||||
|
"serverOutputAudioGain",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ServerDeviceCallbacks(Protocol):
|
||||||
|
def on_request(self, unpackedData: AudioInOut):
|
||||||
|
...
|
||||||
|
|
||||||
|
def emitTo(self, performance: list[float]):
|
||||||
|
...
|
||||||
|
|
||||||
|
def get_processing_sampling_rate(self):
|
||||||
|
...
|
||||||
|
|
||||||
|
def setSamplingRate(self, sr: int):
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
class ServerDevice:
|
||||||
|
def __init__(self, serverDeviceCallbacks: ServerDeviceCallbacks):
|
||||||
|
self.settings = ServerDeviceSettings()
|
||||||
|
self.serverDeviceCallbacks = serverDeviceCallbacks
|
||||||
|
|
||||||
|
def getServerInputAudioDevice(self, index: int):
|
||||||
|
audioinput, _audiooutput = list_audio_device()
|
||||||
|
serverAudioDevice = [x for x in audioinput if x.index == index]
|
||||||
|
if len(serverAudioDevice) > 0:
|
||||||
|
return serverAudioDevice[0]
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def getServerOutputAudioDevice(self, index: int):
|
||||||
|
_audioinput, audiooutput = list_audio_device()
|
||||||
|
serverAudioDevice = [x for x in audiooutput if x.index == index]
|
||||||
|
if len(serverAudioDevice) > 0:
|
||||||
|
return serverAudioDevice[0]
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def audio_callback(self, indata: np.ndarray, outdata: np.ndarray, frames, times, status):
|
||||||
|
try:
|
||||||
|
indata = indata * self.settings.serverInputAudioGain
|
||||||
|
with Timer("all_inference_time") as t:
|
||||||
|
unpackedData = librosa.to_mono(indata.T) * 32768.0
|
||||||
|
out_wav, times = self.serverDeviceCallbacks.on_request(unpackedData)
|
||||||
|
outputChunnels = outdata.shape[1]
|
||||||
|
outdata[:] = np.repeat(out_wav, outputChunnels).reshape(-1, outputChunnels) / 32768.0
|
||||||
|
outdata[:] = outdata * self.settings.serverOutputAudioGain
|
||||||
|
all_inference_time = t.secs
|
||||||
|
self.performance = [all_inference_time] + times
|
||||||
|
self.serverDeviceCallbacks.emitTo(self.performance)
|
||||||
|
self.performance = [round(x * 1000) for x in self.performance]
|
||||||
|
except Exception as e:
|
||||||
|
print("[Voice Changer] ex:", e)
|
||||||
|
|
||||||
|
def start(self):
|
||||||
|
# currentInputDeviceId = -1
|
||||||
|
# currentOutputDeviceId = -1
|
||||||
|
# currentInputChunkNum = -1
|
||||||
|
currentModelSamplingRate = -1
|
||||||
|
while True:
|
||||||
|
if self.settings.serverAudioStated == 0 or self.settings.serverInputDeviceId == -1:
|
||||||
|
# self.settings.inputSampleRate = 48000
|
||||||
|
time.sleep(2)
|
||||||
|
else:
|
||||||
|
sd._terminate()
|
||||||
|
sd._initialize()
|
||||||
|
|
||||||
|
sd.default.device[0] = self.settings.serverInputDeviceId
|
||||||
|
# currentInputDeviceId = self.settings.serverInputDeviceId
|
||||||
|
sd.default.device[1] = self.settings.serverOutputDeviceId
|
||||||
|
# currentOutputDeviceId = self.settings.serverOutputDeviceId
|
||||||
|
|
||||||
|
serverInputAudioDevice = self.getServerInputAudioDevice(sd.default.device[0])
|
||||||
|
serverOutputAudioDevice = self.getServerOutputAudioDevice(sd.default.device[1])
|
||||||
|
print(serverInputAudioDevice, serverOutputAudioDevice)
|
||||||
|
if serverInputAudioDevice is None or serverOutputAudioDevice is None:
|
||||||
|
time.sleep(2)
|
||||||
|
print("serverInputAudioDevice or serverOutputAudioDevice is None")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# currentInputChannelNum = serverInputAudioDevice.maxInputChannels
|
||||||
|
# currentOutputChannelNum = serverOutputAudioDevice.maxOutputChannels
|
||||||
|
sd.default.channels[0] = serverInputAudioDevice.maxInputChannels
|
||||||
|
sd.default.channels[1] = serverOutputAudioDevice.maxOutputChannels
|
||||||
|
|
||||||
|
currentInputChunkNum = self.settings.serverReadChunkSize
|
||||||
|
block_frame = currentInputChunkNum * 128
|
||||||
|
|
||||||
|
# sample rate precheck(alsa cannot use 40000?)
|
||||||
|
try:
|
||||||
|
currentModelSamplingRate = self.serverDeviceCallbacks.get_processing_sampling_rate()
|
||||||
|
except Exception as e:
|
||||||
|
print("[Voice Changer] ex: get_processing_sampling_rate", e)
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
with sd.Stream(
|
||||||
|
callback=self.audio_callback,
|
||||||
|
blocksize=block_frame,
|
||||||
|
# samplerate=currentModelSamplingRate,
|
||||||
|
dtype="float32",
|
||||||
|
# channels=[currentInputChannelNum, currentOutputChannelNum],
|
||||||
|
):
|
||||||
|
pass
|
||||||
|
self.settings.serverInputAudioSampleRate = currentModelSamplingRate
|
||||||
|
self.serverDeviceCallbacks.setSamplingRate(currentModelSamplingRate)
|
||||||
|
print(f"[Voice Changer] sample rate {self.settings.serverInputAudioSampleRate}")
|
||||||
|
except Exception as e:
|
||||||
|
print("[Voice Changer] ex: fallback to device default samplerate", e)
|
||||||
|
print("[Voice Changer] device default samplerate", serverInputAudioDevice.default_samplerate)
|
||||||
|
self.settings.serverInputAudioSampleRate = round(serverInputAudioDevice.default_samplerate)
|
||||||
|
self.serverDeviceCallbacks.setSamplingRate(round(serverInputAudioDevice.default_samplerate))
|
||||||
|
|
||||||
|
sd.default.samplerate = self.settings.serverInputAudioSampleRate
|
||||||
|
sd.default.blocksize = block_frame
|
||||||
|
# main loop
|
||||||
|
try:
|
||||||
|
with sd.Stream(
|
||||||
|
callback=self.audio_callback,
|
||||||
|
# blocksize=block_frame,
|
||||||
|
# samplerate=vc.settings.serverInputAudioSampleRate,
|
||||||
|
dtype="float32",
|
||||||
|
# channels=[currentInputChannelNum, currentOutputChannelNum],
|
||||||
|
):
|
||||||
|
while self.settings.serverAudioStated == 1 and sd.default.device[0] == self.settings.serverInputDeviceId and sd.default.device[1] == self.settings.serverOutputDeviceId and currentModelSamplingRate == self.serverDeviceCallbacks.get_processing_sampling_rate() and currentInputChunkNum == self.settings.serverReadChunkSize:
|
||||||
|
time.sleep(2)
|
||||||
|
print(
|
||||||
|
"[Voice Changer] server audio",
|
||||||
|
self.performance,
|
||||||
|
)
|
||||||
|
print(f"[Voice Changer] started:{self.settings.serverAudioStated}, input:{sd.default.device[0]}, output:{sd.default.device[1]}, mic_sr:{self.settings.serverInputAudioSampleRate}, model_sr:{currentModelSamplingRate}, chunk:{currentInputChunkNum}, ch:[{sd.default.channels}]")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print("[Voice Changer] ex:", e)
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
def get_info(self):
|
||||||
|
data = asdict(self.settings)
|
||||||
|
audioinput, audiooutput = list_audio_device()
|
||||||
|
data["serverAudioInputDevices"] = audioinput
|
||||||
|
data["serverAudioOutputDevices"] = audiooutput
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
def update_settings(self, key: str, val: str | int | float):
|
||||||
|
if key in EditableServerDeviceSettings["intData"]:
|
||||||
|
setattr(self.settings, key, int(val))
|
||||||
|
elif key in EditableServerDeviceSettings["floatData"]:
|
||||||
|
setattr(self.settings, key, float(val))
|
||||||
|
return self.get_info()
|
@ -11,7 +11,6 @@ import resampy
|
|||||||
|
|
||||||
|
|
||||||
from voice_changer.IORecorder import IORecorder
|
from voice_changer.IORecorder import IORecorder
|
||||||
from voice_changer.Local.AudioDeviceList import ServerAudioDevice, list_audio_device
|
|
||||||
from voice_changer.utils.LoadModelParams import LoadModelParams
|
from voice_changer.utils.LoadModelParams import LoadModelParams
|
||||||
|
|
||||||
from voice_changer.utils.Timer import Timer
|
from voice_changer.utils.Timer import Timer
|
||||||
@ -26,10 +25,6 @@ from Exceptions import (
|
|||||||
VoiceChangerIsNotSelectedException,
|
VoiceChangerIsNotSelectedException,
|
||||||
)
|
)
|
||||||
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
||||||
import threading
|
|
||||||
import time
|
|
||||||
import sounddevice as sd
|
|
||||||
import librosa
|
|
||||||
|
|
||||||
STREAM_INPUT_FILE = os.path.join(TMP_DIR, "in.wav")
|
STREAM_INPUT_FILE = os.path.join(TMP_DIR, "in.wav")
|
||||||
STREAM_OUTPUT_FILE = os.path.join(TMP_DIR, "out.wav")
|
STREAM_OUTPUT_FILE = os.path.join(TMP_DIR, "out.wav")
|
||||||
@ -44,22 +39,7 @@ class VoiceChangerSettings:
|
|||||||
crossFadeOverlapSize: int = 4096
|
crossFadeOverlapSize: int = 4096
|
||||||
|
|
||||||
recordIO: int = 0 # 0:off, 1:on
|
recordIO: int = 0 # 0:off, 1:on
|
||||||
serverAudioInputDevices: list[ServerAudioDevice] = field(default_factory=lambda: [])
|
|
||||||
serverAudioOutputDevices: list[ServerAudioDevice] = field(default_factory=lambda: [])
|
|
||||||
|
|
||||||
enableServerAudio: int = 0 # 0:off, 1:on
|
|
||||||
serverAudioStated: int = 0 # 0:off, 1:on
|
|
||||||
# serverInputAudioSampleRate: int = 48000
|
|
||||||
# serverOutputAudioSampleRate: int = 48000
|
|
||||||
serverInputAudioSampleRate: int = 44100
|
|
||||||
serverOutputAudioSampleRate: int = 44100
|
|
||||||
# serverInputAudioBufferSize: int = 1024 * 24
|
|
||||||
# serverOutputAudioBufferSize: int = 1024 * 24
|
|
||||||
serverInputDeviceId: int = -1
|
|
||||||
serverOutputDeviceId: int = -1
|
|
||||||
serverReadChunkSize: int = 256
|
|
||||||
serverInputAudioGain: float = 1.0
|
|
||||||
serverOutputAudioGain: float = 1.0
|
|
||||||
performance: list[int] = field(default_factory=lambda: [0, 0, 0, 0])
|
performance: list[int] = field(default_factory=lambda: [0, 0, 0, 0])
|
||||||
|
|
||||||
# ↓mutableな物だけ列挙
|
# ↓mutableな物だけ列挙
|
||||||
@ -68,23 +48,12 @@ class VoiceChangerSettings:
|
|||||||
"inputSampleRate",
|
"inputSampleRate",
|
||||||
"crossFadeOverlapSize",
|
"crossFadeOverlapSize",
|
||||||
"recordIO",
|
"recordIO",
|
||||||
"enableServerAudio",
|
|
||||||
"serverAudioStated",
|
|
||||||
"serverInputAudioSampleRate",
|
|
||||||
"serverOutputAudioSampleRate",
|
|
||||||
# "serverInputAudioBufferSize",
|
|
||||||
# "serverOutputAudioBufferSize",
|
|
||||||
"serverInputDeviceId",
|
|
||||||
"serverOutputDeviceId",
|
|
||||||
"serverReadChunkSize",
|
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
floatData: list[str] = field(
|
floatData: list[str] = field(
|
||||||
default_factory=lambda: [
|
default_factory=lambda: [
|
||||||
"crossFadeOffsetRate",
|
"crossFadeOffsetRate",
|
||||||
"crossFadeEndRate",
|
"crossFadeEndRate",
|
||||||
"serverInputAudioGain",
|
|
||||||
"serverOutputAudioGain",
|
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
strData: list[str] = field(default_factory=lambda: [])
|
strData: list[str] = field(default_factory=lambda: [])
|
||||||
@ -101,120 +70,6 @@ class VoiceChanger:
|
|||||||
|
|
||||||
emitTo = None
|
emitTo = None
|
||||||
|
|
||||||
def audio_callback(self, indata: np.ndarray, outdata: np.ndarray, frames, times, status):
|
|
||||||
try:
|
|
||||||
indata = indata * self.settings.serverInputAudioGain
|
|
||||||
with Timer("all_inference_time") as t:
|
|
||||||
unpackedData = librosa.to_mono(indata.T) * 32768.0
|
|
||||||
out_wav, times = self.on_request(unpackedData)
|
|
||||||
outputChunnels = outdata.shape[1]
|
|
||||||
outdata[:] = np.repeat(out_wav, outputChunnels).reshape(-1, outputChunnels) / 32768.0
|
|
||||||
outdata[:] = outdata * self.settings.serverOutputAudioGain
|
|
||||||
all_inference_time = t.secs
|
|
||||||
performance = [all_inference_time] + times
|
|
||||||
if self.emitTo is not None:
|
|
||||||
self.emitTo(performance)
|
|
||||||
self.settings.performance = [round(x * 1000) for x in performance]
|
|
||||||
except Exception as e:
|
|
||||||
print("[Voice Changer] ex:", e)
|
|
||||||
|
|
||||||
def getServerAudioDevice(self, audioDeviceList: list[ServerAudioDevice], index: int):
|
|
||||||
serverAudioDevice = [x for x in audioDeviceList if x.index == index]
|
|
||||||
if len(serverAudioDevice) > 0:
|
|
||||||
return serverAudioDevice[0]
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
def serverLocal(self, _vc):
|
|
||||||
vc: VoiceChanger = _vc
|
|
||||||
|
|
||||||
currentInputDeviceId = -1
|
|
||||||
currentModelSamplingRate = -1
|
|
||||||
currentOutputDeviceId = -1
|
|
||||||
currentInputChunkNum = -1
|
|
||||||
while True:
|
|
||||||
if vc.settings.serverAudioStated == 0 or vc.settings.serverInputDeviceId == -1 or vc.voiceChanger is None:
|
|
||||||
vc.settings.inputSampleRate = 48000
|
|
||||||
time.sleep(2)
|
|
||||||
else:
|
|
||||||
sd._terminate()
|
|
||||||
sd._initialize()
|
|
||||||
|
|
||||||
sd.default.device[0] = vc.settings.serverInputDeviceId
|
|
||||||
currentInputDeviceId = vc.settings.serverInputDeviceId
|
|
||||||
sd.default.device[1] = vc.settings.serverOutputDeviceId
|
|
||||||
currentOutputDeviceId = vc.settings.serverOutputDeviceId
|
|
||||||
|
|
||||||
currentInputChannelNum = vc.settings.serverAudioInputDevices
|
|
||||||
|
|
||||||
serverInputAudioDevice = self.getServerAudioDevice(vc.settings.serverAudioInputDevices, currentInputDeviceId)
|
|
||||||
serverOutputAudioDevice = self.getServerAudioDevice(vc.settings.serverAudioOutputDevices, currentOutputDeviceId)
|
|
||||||
print(serverInputAudioDevice, serverOutputAudioDevice)
|
|
||||||
if serverInputAudioDevice is None or serverOutputAudioDevice is None:
|
|
||||||
time.sleep(2)
|
|
||||||
print("serverInputAudioDevice or serverOutputAudioDevice is None")
|
|
||||||
continue
|
|
||||||
|
|
||||||
currentInputChannelNum = serverInputAudioDevice.maxInputChannels
|
|
||||||
currentOutputChannelNum = serverOutputAudioDevice.maxOutputChannels
|
|
||||||
|
|
||||||
currentInputChunkNum = vc.settings.serverReadChunkSize
|
|
||||||
block_frame = currentInputChunkNum * 128
|
|
||||||
|
|
||||||
# sample rate precheck(alsa cannot use 40000?)
|
|
||||||
try:
|
|
||||||
currentModelSamplingRate = self.voiceChanger.get_processing_sampling_rate()
|
|
||||||
except Exception as e:
|
|
||||||
print("[Voice Changer] ex: get_processing_sampling_rate", e)
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
with sd.Stream(
|
|
||||||
callback=self.audio_callback,
|
|
||||||
blocksize=block_frame,
|
|
||||||
samplerate=currentModelSamplingRate,
|
|
||||||
dtype="float32",
|
|
||||||
channels=[currentInputChannelNum, currentOutputChannelNum],
|
|
||||||
):
|
|
||||||
pass
|
|
||||||
vc.settings.serverInputAudioSampleRate = currentModelSamplingRate
|
|
||||||
vc.settings.inputSampleRate = currentModelSamplingRate
|
|
||||||
print(f"[Voice Changer] sample rate {vc.settings.serverInputAudioSampleRate}")
|
|
||||||
except Exception as e:
|
|
||||||
print(
|
|
||||||
"[Voice Changer] ex: fallback to device default samplerate",
|
|
||||||
e,
|
|
||||||
)
|
|
||||||
vc.settings.serverInputAudioSampleRate = serverInputAudioDevice.default_samplerate
|
|
||||||
vc.settings.inputSampleRate = vc.settings.serverInputAudioSampleRate
|
|
||||||
|
|
||||||
# main loop
|
|
||||||
try:
|
|
||||||
with sd.Stream(
|
|
||||||
callback=self.audio_callback,
|
|
||||||
blocksize=block_frame,
|
|
||||||
samplerate=vc.settings.serverInputAudioSampleRate,
|
|
||||||
dtype="float32",
|
|
||||||
channels=[currentInputChannelNum, currentOutputChannelNum],
|
|
||||||
):
|
|
||||||
while vc.settings.serverAudioStated == 1 and currentInputDeviceId == vc.settings.serverInputDeviceId and currentOutputDeviceId == vc.settings.serverOutputDeviceId and currentModelSamplingRate == self.voiceChanger.get_processing_sampling_rate() and currentInputChunkNum == vc.settings.serverReadChunkSize:
|
|
||||||
time.sleep(2)
|
|
||||||
print(
|
|
||||||
"[Voice Changer] server audio",
|
|
||||||
self.settings.performance,
|
|
||||||
)
|
|
||||||
print(
|
|
||||||
"[Voice Changer] info:",
|
|
||||||
vc.settings.serverAudioStated,
|
|
||||||
currentInputDeviceId,
|
|
||||||
currentOutputDeviceId,
|
|
||||||
vc.settings.serverInputAudioSampleRate,
|
|
||||||
currentInputChunkNum,
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print("[Voice Changer] ex:", e)
|
|
||||||
time.sleep(2)
|
|
||||||
|
|
||||||
def __init__(self, params: VoiceChangerParams):
|
def __init__(self, params: VoiceChangerParams):
|
||||||
# 初期化
|
# 初期化
|
||||||
self.settings = VoiceChangerSettings()
|
self.settings = VoiceChangerSettings()
|
||||||
@ -231,12 +86,6 @@ class VoiceChanger:
|
|||||||
self.prev_audio = np.zeros(4096)
|
self.prev_audio = np.zeros(4096)
|
||||||
self.mps_enabled: bool = getattr(torch.backends, "mps", None) is not None and torch.backends.mps.is_available()
|
self.mps_enabled: bool = getattr(torch.backends, "mps", None) is not None and torch.backends.mps.is_available()
|
||||||
|
|
||||||
audioinput, audiooutput = list_audio_device()
|
|
||||||
self.settings.serverAudioInputDevices = audioinput
|
|
||||||
self.settings.serverAudioOutputDevices = audiooutput
|
|
||||||
|
|
||||||
thread = threading.Thread(target=self.serverLocal, args=(self,))
|
|
||||||
thread.start()
|
|
||||||
print(f"VoiceChanger Initialized (GPU_NUM:{self.gpu_num}, mps_enabled:{self.mps_enabled})")
|
print(f"VoiceChanger Initialized (GPU_NUM:{self.gpu_num}, mps_enabled:{self.mps_enabled})")
|
||||||
|
|
||||||
def switchModelType(self, modelType: ModelType):
|
def switchModelType(self, modelType: ModelType):
|
||||||
@ -375,6 +224,12 @@ class VoiceChanger:
|
|||||||
if hasattr(self, "sola_buffer") is True:
|
if hasattr(self, "sola_buffer") is True:
|
||||||
del self.sola_buffer
|
del self.sola_buffer
|
||||||
|
|
||||||
|
def get_processing_sampling_rate(self):
|
||||||
|
if self.voiceChanger is None:
|
||||||
|
return 0
|
||||||
|
else:
|
||||||
|
return self.voiceChanger.get_processing_sampling_rate()
|
||||||
|
|
||||||
# receivedData: tuple of short
|
# receivedData: tuple of short
|
||||||
def on_request(self, receivedData: AudioInOut) -> tuple[AudioInOut, list[Union[int, float]]]:
|
def on_request(self, receivedData: AudioInOut) -> tuple[AudioInOut, list[Union[int, float]]]:
|
||||||
return self.on_request_sola(receivedData)
|
return self.on_request_sola(receivedData)
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
from voice_changer.Local.ServerDevice import ServerDevice, ServerDeviceCallbacks
|
||||||
from voice_changer.VoiceChanger import VoiceChanger
|
from voice_changer.VoiceChanger import VoiceChanger
|
||||||
from const import ModelType
|
from const import ModelType
|
||||||
from voice_changer.utils.LoadModelParams import LoadModelParams
|
from voice_changer.utils.LoadModelParams import LoadModelParams
|
||||||
@ -6,6 +7,7 @@ from voice_changer.utils.VoiceChangerModel import AudioInOut
|
|||||||
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
||||||
from dataclasses import dataclass, asdict
|
from dataclasses import dataclass, asdict
|
||||||
import torch
|
import torch
|
||||||
|
import threading
|
||||||
|
|
||||||
|
|
||||||
@dataclass()
|
@dataclass()
|
||||||
@ -22,15 +24,38 @@ class VoiceChangerManagerSettings:
|
|||||||
# intData: list[str] = field(default_factory=lambda: ["slotIndex"])
|
# intData: list[str] = field(default_factory=lambda: ["slotIndex"])
|
||||||
|
|
||||||
|
|
||||||
class VoiceChangerManager(object):
|
class VoiceChangerManager(ServerDeviceCallbacks):
|
||||||
_instance = None
|
_instance = None
|
||||||
|
|
||||||
|
############################
|
||||||
|
# ServerDeviceCallbacks
|
||||||
|
############################
|
||||||
|
def on_request(self, unpackedData: AudioInOut):
|
||||||
|
return self.changeVoice(unpackedData)
|
||||||
|
|
||||||
|
def emitTo(self, performance: list[float]):
|
||||||
|
print("emit ", performance)
|
||||||
|
|
||||||
|
def get_processing_sampling_rate(self):
|
||||||
|
return self.voiceChanger.get_processing_sampling_rate()
|
||||||
|
|
||||||
|
def setSamplingRate(self, sr: int):
|
||||||
|
self.voiceChanger.settings.inputSampleRate = sr
|
||||||
|
|
||||||
|
############################
|
||||||
|
# VoiceChangerManager
|
||||||
|
############################
|
||||||
def __init__(self, params: VoiceChangerParams):
|
def __init__(self, params: VoiceChangerParams):
|
||||||
self.voiceChanger: VoiceChanger = None
|
self.voiceChanger: VoiceChanger = None
|
||||||
self.settings: VoiceChangerManagerSettings = VoiceChangerManagerSettings(dummy=0)
|
self.settings: VoiceChangerManagerSettings = VoiceChangerManagerSettings(dummy=0)
|
||||||
# スタティックな情報を収集
|
# スタティックな情報を収集
|
||||||
self.gpus: list[GPUInfo] = self._get_gpuInfos()
|
self.gpus: list[GPUInfo] = self._get_gpuInfos()
|
||||||
|
|
||||||
|
self.serverDevice = ServerDevice(self)
|
||||||
|
|
||||||
|
thread = threading.Thread(target=self.serverDevice.start, args=())
|
||||||
|
thread.start()
|
||||||
|
|
||||||
def _get_gpuInfos(self):
|
def _get_gpuInfos(self):
|
||||||
devCount = torch.cuda.device_count()
|
devCount = torch.cuda.device_count()
|
||||||
gpus = []
|
gpus = []
|
||||||
@ -62,6 +87,9 @@ class VoiceChangerManager(object):
|
|||||||
|
|
||||||
data["status"] = "OK"
|
data["status"] = "OK"
|
||||||
|
|
||||||
|
info = self.serverDevice.get_info()
|
||||||
|
data.update(info)
|
||||||
|
|
||||||
if hasattr(self, "voiceChanger"):
|
if hasattr(self, "voiceChanger"):
|
||||||
info = self.voiceChanger.get_info()
|
info = self.voiceChanger.get_info()
|
||||||
data.update(info)
|
data.update(info)
|
||||||
@ -77,6 +105,7 @@ class VoiceChangerManager(object):
|
|||||||
return {"status": "ERROR", "msg": "no model loaded"}
|
return {"status": "ERROR", "msg": "no model loaded"}
|
||||||
|
|
||||||
def update_settings(self, key: str, val: str | int | float):
|
def update_settings(self, key: str, val: str | int | float):
|
||||||
|
self.serverDevice.update_settings(key, val)
|
||||||
if hasattr(self, "voiceChanger"):
|
if hasattr(self, "voiceChanger"):
|
||||||
self.voiceChanger.update_settings(key, val)
|
self.voiceChanger.update_settings(key, val)
|
||||||
else:
|
else:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user