mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-02-02 16:23:58 +03:00
move getting gpu info function to manager
This commit is contained in:
parent
bdad75699e
commit
d7e541ffa7
11
client/demo/dist/index.html
vendored
11
client/demo/dist/index.html
vendored
@ -1 +1,10 @@
|
|||||||
<!doctype html><html style="width:100%;height:100%;overflow:hidden"><head><meta charset="utf-8"/><title>Voice Changer Client Demo</title><script defer="defer" src="index.js"></script></head><body style="width:100%;height:100%;margin:0"><div id="app" style="width:100%;height:100%"></div></body></html>
|
<!DOCTYPE html>
|
||||||
|
<html style="width: 100%; height: 100%; overflow: hidden">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<title>Voice Changer Client Demo</title>
|
||||||
|
<script defer src="index.js"></script></head>
|
||||||
|
<body style="width: 100%; height: 100%; margin: 0px">
|
||||||
|
<div id="app" style="width: 100%; height: 100%"></div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
1904
client/demo/dist/index.js
vendored
1904
client/demo/dist/index.js
vendored
File diff suppressed because one or more lines are too long
31
client/demo/dist/index.js.LICENSE.txt
vendored
31
client/demo/dist/index.js.LICENSE.txt
vendored
@ -1,31 +0,0 @@
|
|||||||
/*! regenerator-runtime -- Copyright (c) 2014-present, Facebook, Inc. -- license (MIT): https://github.com/facebook/regenerator/blob/main/LICENSE */
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @license React
|
|
||||||
* react-dom.production.min.js
|
|
||||||
*
|
|
||||||
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
||||||
*
|
|
||||||
* This source code is licensed under the MIT license found in the
|
|
||||||
* LICENSE file in the root directory of this source tree.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @license React
|
|
||||||
* react.production.min.js
|
|
||||||
*
|
|
||||||
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
||||||
*
|
|
||||||
* This source code is licensed under the MIT license found in the
|
|
||||||
* LICENSE file in the root directory of this source tree.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @license React
|
|
||||||
* scheduler.production.min.js
|
|
||||||
*
|
|
||||||
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
||||||
*
|
|
||||||
* This source code is licensed under the MIT license found in the
|
|
||||||
* LICENSE file in the root directory of this source tree.
|
|
||||||
*/
|
|
1
server/.vscode/settings.json
vendored
1
server/.vscode/settings.json
vendored
@ -8,6 +8,7 @@
|
|||||||
"editor.defaultFormatter": null, // Prettier を使わないようにする
|
"editor.defaultFormatter": null, // Prettier を使わないようにする
|
||||||
"editor.formatOnSave": true // ファイル保存時に自動フォーマット
|
"editor.formatOnSave": true // ファイル保存時に自動フォーマット
|
||||||
},
|
},
|
||||||
|
"python.formatting.blackArgs": ["--line-length", "550"],
|
||||||
"flake8.args": [
|
"flake8.args": [
|
||||||
"--ignore=E501,E402,E722,E741,E203,W503"
|
"--ignore=E501,E402,E722,E741,E203,W503"
|
||||||
// "--max-line-length=150",
|
// "--max-line-length=150",
|
||||||
|
@ -45,9 +45,7 @@ class VoiceChangerSettings:
|
|||||||
|
|
||||||
recordIO: int = 0 # 0:off, 1:on
|
recordIO: int = 0 # 0:off, 1:on
|
||||||
serverAudioInputDevices: list[ServerAudioDevice] = field(default_factory=lambda: [])
|
serverAudioInputDevices: list[ServerAudioDevice] = field(default_factory=lambda: [])
|
||||||
serverAudioOutputDevices: list[ServerAudioDevice] = field(
|
serverAudioOutputDevices: list[ServerAudioDevice] = field(default_factory=lambda: [])
|
||||||
default_factory=lambda: []
|
|
||||||
)
|
|
||||||
|
|
||||||
enableServerAudio: int = 0 # 0:off, 1:on
|
enableServerAudio: int = 0 # 0:off, 1:on
|
||||||
serverAudioStated: int = 0 # 0:off, 1:on
|
serverAudioStated: int = 0 # 0:off, 1:on
|
||||||
@ -103,19 +101,14 @@ class VoiceChanger:
|
|||||||
|
|
||||||
emitTo = None
|
emitTo = None
|
||||||
|
|
||||||
def audio_callback(
|
def audio_callback(self, indata: np.ndarray, outdata: np.ndarray, frames, times, status):
|
||||||
self, indata: np.ndarray, outdata: np.ndarray, frames, times, status
|
|
||||||
):
|
|
||||||
try:
|
try:
|
||||||
indata = indata * self.settings.serverInputAudioGain
|
indata = indata * self.settings.serverInputAudioGain
|
||||||
with Timer("all_inference_time") as t:
|
with Timer("all_inference_time") as t:
|
||||||
unpackedData = librosa.to_mono(indata.T) * 32768.0
|
unpackedData = librosa.to_mono(indata.T) * 32768.0
|
||||||
out_wav, times = self.on_request(unpackedData)
|
out_wav, times = self.on_request(unpackedData)
|
||||||
outputChunnels = outdata.shape[1]
|
outputChunnels = outdata.shape[1]
|
||||||
outdata[:] = (
|
outdata[:] = np.repeat(out_wav, outputChunnels).reshape(-1, outputChunnels) / 32768.0
|
||||||
np.repeat(out_wav, outputChunnels).reshape(-1, outputChunnels)
|
|
||||||
/ 32768.0
|
|
||||||
)
|
|
||||||
outdata[:] = outdata * self.settings.serverOutputAudioGain
|
outdata[:] = outdata * self.settings.serverOutputAudioGain
|
||||||
all_inference_time = t.secs
|
all_inference_time = t.secs
|
||||||
performance = [all_inference_time] + times
|
performance = [all_inference_time] + times
|
||||||
@ -125,9 +118,7 @@ class VoiceChanger:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("[Voice Changer] ex:", e)
|
print("[Voice Changer] ex:", e)
|
||||||
|
|
||||||
def getServerAudioDevice(
|
def getServerAudioDevice(self, audioDeviceList: list[ServerAudioDevice], index: int):
|
||||||
self, audioDeviceList: list[ServerAudioDevice], index: int
|
|
||||||
):
|
|
||||||
serverAudioDevice = [x for x in audioDeviceList if x.index == index]
|
serverAudioDevice = [x for x in audioDeviceList if x.index == index]
|
||||||
if len(serverAudioDevice) > 0:
|
if len(serverAudioDevice) > 0:
|
||||||
return serverAudioDevice[0]
|
return serverAudioDevice[0]
|
||||||
@ -142,11 +133,7 @@ class VoiceChanger:
|
|||||||
currentOutputDeviceId = -1
|
currentOutputDeviceId = -1
|
||||||
currentInputChunkNum = -1
|
currentInputChunkNum = -1
|
||||||
while True:
|
while True:
|
||||||
if (
|
if vc.settings.serverAudioStated == 0 or vc.settings.serverInputDeviceId == -1 or vc.voiceChanger is None:
|
||||||
vc.settings.serverAudioStated == 0
|
|
||||||
or vc.settings.serverInputDeviceId == -1
|
|
||||||
or vc.voiceChanger is None
|
|
||||||
):
|
|
||||||
vc.settings.inputSampleRate = 48000
|
vc.settings.inputSampleRate = 48000
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
else:
|
else:
|
||||||
@ -160,12 +147,8 @@ class VoiceChanger:
|
|||||||
|
|
||||||
currentInputChannelNum = vc.settings.serverAudioInputDevices
|
currentInputChannelNum = vc.settings.serverAudioInputDevices
|
||||||
|
|
||||||
serverInputAudioDevice = self.getServerAudioDevice(
|
serverInputAudioDevice = self.getServerAudioDevice(vc.settings.serverAudioInputDevices, currentInputDeviceId)
|
||||||
vc.settings.serverAudioInputDevices, currentInputDeviceId
|
serverOutputAudioDevice = self.getServerAudioDevice(vc.settings.serverAudioOutputDevices, currentOutputDeviceId)
|
||||||
)
|
|
||||||
serverOutputAudioDevice = self.getServerAudioDevice(
|
|
||||||
vc.settings.serverAudioOutputDevices, currentOutputDeviceId
|
|
||||||
)
|
|
||||||
print(serverInputAudioDevice, serverOutputAudioDevice)
|
print(serverInputAudioDevice, serverOutputAudioDevice)
|
||||||
if serverInputAudioDevice is None or serverOutputAudioDevice is None:
|
if serverInputAudioDevice is None or serverOutputAudioDevice is None:
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
@ -180,9 +163,7 @@ class VoiceChanger:
|
|||||||
|
|
||||||
# sample rate precheck(alsa cannot use 40000?)
|
# sample rate precheck(alsa cannot use 40000?)
|
||||||
try:
|
try:
|
||||||
currentModelSamplingRate = (
|
currentModelSamplingRate = self.voiceChanger.get_processing_sampling_rate()
|
||||||
self.voiceChanger.get_processing_sampling_rate()
|
|
||||||
)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("[Voice Changer] ex: get_processing_sampling_rate", e)
|
print("[Voice Changer] ex: get_processing_sampling_rate", e)
|
||||||
continue
|
continue
|
||||||
@ -197,17 +178,13 @@ class VoiceChanger:
|
|||||||
pass
|
pass
|
||||||
vc.settings.serverInputAudioSampleRate = currentModelSamplingRate
|
vc.settings.serverInputAudioSampleRate = currentModelSamplingRate
|
||||||
vc.settings.inputSampleRate = currentModelSamplingRate
|
vc.settings.inputSampleRate = currentModelSamplingRate
|
||||||
print(
|
print(f"[Voice Changer] sample rate {vc.settings.serverInputAudioSampleRate}")
|
||||||
f"[Voice Changer] sample rate {vc.settings.serverInputAudioSampleRate}"
|
|
||||||
)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(
|
print(
|
||||||
"[Voice Changer] ex: fallback to device default samplerate",
|
"[Voice Changer] ex: fallback to device default samplerate",
|
||||||
e,
|
e,
|
||||||
)
|
)
|
||||||
vc.settings.serverInputAudioSampleRate = (
|
vc.settings.serverInputAudioSampleRate = serverInputAudioDevice.default_samplerate
|
||||||
serverInputAudioDevice.default_samplerate
|
|
||||||
)
|
|
||||||
vc.settings.inputSampleRate = vc.settings.serverInputAudioSampleRate
|
vc.settings.inputSampleRate = vc.settings.serverInputAudioSampleRate
|
||||||
|
|
||||||
# main loop
|
# main loop
|
||||||
@ -219,15 +196,7 @@ class VoiceChanger:
|
|||||||
dtype="float32",
|
dtype="float32",
|
||||||
channels=[currentInputChannelNum, currentOutputChannelNum],
|
channels=[currentInputChannelNum, currentOutputChannelNum],
|
||||||
):
|
):
|
||||||
while (
|
while vc.settings.serverAudioStated == 1 and currentInputDeviceId == vc.settings.serverInputDeviceId and currentOutputDeviceId == vc.settings.serverOutputDeviceId and currentModelSamplingRate == self.voiceChanger.get_processing_sampling_rate() and currentInputChunkNum == vc.settings.serverReadChunkSize:
|
||||||
vc.settings.serverAudioStated == 1
|
|
||||||
and currentInputDeviceId == vc.settings.serverInputDeviceId
|
|
||||||
and currentOutputDeviceId
|
|
||||||
== vc.settings.serverOutputDeviceId
|
|
||||||
and currentModelSamplingRate
|
|
||||||
== self.voiceChanger.get_processing_sampling_rate()
|
|
||||||
and currentInputChunkNum == vc.settings.serverReadChunkSize
|
|
||||||
):
|
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
print(
|
print(
|
||||||
"[Voice Changer] server audio",
|
"[Voice Changer] server audio",
|
||||||
@ -260,10 +229,7 @@ class VoiceChanger:
|
|||||||
self.params = params
|
self.params = params
|
||||||
self.gpu_num = torch.cuda.device_count()
|
self.gpu_num = torch.cuda.device_count()
|
||||||
self.prev_audio = np.zeros(4096)
|
self.prev_audio = np.zeros(4096)
|
||||||
self.mps_enabled: bool = (
|
self.mps_enabled: bool = getattr(torch.backends, "mps", None) is not None and torch.backends.mps.is_available()
|
||||||
getattr(torch.backends, "mps", None) is not None
|
|
||||||
and torch.backends.mps.is_available()
|
|
||||||
)
|
|
||||||
|
|
||||||
audioinput, audiooutput = list_audio_device()
|
audioinput, audiooutput = list_audio_device()
|
||||||
self.settings.serverAudioInputDevices = audioinput
|
self.settings.serverAudioInputDevices = audioinput
|
||||||
@ -271,9 +237,7 @@ class VoiceChanger:
|
|||||||
|
|
||||||
thread = threading.Thread(target=self.serverLocal, args=(self,))
|
thread = threading.Thread(target=self.serverLocal, args=(self,))
|
||||||
thread.start()
|
thread.start()
|
||||||
print(
|
print(f"VoiceChanger Initialized (GPU_NUM:{self.gpu_num}, mps_enabled:{self.mps_enabled})")
|
||||||
f"VoiceChanger Initialized (GPU_NUM:{self.gpu_num}, mps_enabled:{self.mps_enabled})"
|
|
||||||
)
|
|
||||||
|
|
||||||
def switchModelType(self, modelType: ModelType):
|
def switchModelType(self, modelType: ModelType):
|
||||||
try:
|
try:
|
||||||
@ -295,10 +259,7 @@ class VoiceChanger:
|
|||||||
from voice_changer.SoVitsSvc40v2.SoVitsSvc40v2 import SoVitsSvc40v2
|
from voice_changer.SoVitsSvc40v2.SoVitsSvc40v2 import SoVitsSvc40v2
|
||||||
|
|
||||||
self.voiceChanger = SoVitsSvc40v2(self.params)
|
self.voiceChanger = SoVitsSvc40v2(self.params)
|
||||||
elif (
|
elif self.modelType == "so-vits-svc-40" or self.modelType == "so-vits-svc-40_c":
|
||||||
self.modelType == "so-vits-svc-40"
|
|
||||||
or self.modelType == "so-vits-svc-40_c"
|
|
||||||
):
|
|
||||||
from voice_changer.SoVitsSvc40.SoVitsSvc40 import SoVitsSvc40
|
from voice_changer.SoVitsSvc40.SoVitsSvc40 import SoVitsSvc40
|
||||||
|
|
||||||
self.voiceChanger = SoVitsSvc40(self.params)
|
self.voiceChanger = SoVitsSvc40(self.params)
|
||||||
@ -328,9 +289,7 @@ class VoiceChanger:
|
|||||||
def loadModel(self, props: LoadModelParams):
|
def loadModel(self, props: LoadModelParams):
|
||||||
try:
|
try:
|
||||||
if self.voiceChanger is None:
|
if self.voiceChanger is None:
|
||||||
raise VoiceChangerIsNotSelectedException(
|
raise VoiceChangerIsNotSelectedException("Voice Changer is not selected.")
|
||||||
"Voice Changer is not selected."
|
|
||||||
)
|
|
||||||
return self.voiceChanger.loadModel(props)
|
return self.voiceChanger.loadModel(props)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(traceback.format_exc())
|
print(traceback.format_exc())
|
||||||
@ -341,15 +300,6 @@ class VoiceChanger:
|
|||||||
data = asdict(self.settings)
|
data = asdict(self.settings)
|
||||||
if self.voiceChanger is not None:
|
if self.voiceChanger is not None:
|
||||||
data.update(self.voiceChanger.get_info())
|
data.update(self.voiceChanger.get_info())
|
||||||
|
|
||||||
devCount = torch.cuda.device_count()
|
|
||||||
gpus = []
|
|
||||||
for id in range(devCount):
|
|
||||||
name = torch.cuda.get_device_name(id)
|
|
||||||
memory = torch.cuda.get_device_properties(id).total_memory
|
|
||||||
gpu = {"id": id, "name": name, "memory": memory}
|
|
||||||
gpus.append(gpu)
|
|
||||||
data["gpus"] = gpus
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def get_performance(self):
|
def get_performance(self):
|
||||||
@ -367,9 +317,7 @@ class VoiceChanger:
|
|||||||
if key == "recordIO" and val == 1:
|
if key == "recordIO" and val == 1:
|
||||||
if hasattr(self, "ioRecorder"):
|
if hasattr(self, "ioRecorder"):
|
||||||
self.ioRecorder.close()
|
self.ioRecorder.close()
|
||||||
self.ioRecorder = IORecorder(
|
self.ioRecorder = IORecorder(STREAM_INPUT_FILE, STREAM_OUTPUT_FILE, self.settings.inputSampleRate)
|
||||||
STREAM_INPUT_FILE, STREAM_OUTPUT_FILE, self.settings.inputSampleRate
|
|
||||||
)
|
|
||||||
if key == "recordIO" and val == 0:
|
if key == "recordIO" and val == 0:
|
||||||
if hasattr(self, "ioRecorder"):
|
if hasattr(self, "ioRecorder"):
|
||||||
self.ioRecorder.close()
|
self.ioRecorder.close()
|
||||||
@ -390,12 +338,7 @@ class VoiceChanger:
|
|||||||
return self.get_info()
|
return self.get_info()
|
||||||
|
|
||||||
def _generate_strength(self, crossfadeSize: int):
|
def _generate_strength(self, crossfadeSize: int):
|
||||||
if (
|
if self.crossfadeSize != crossfadeSize or self.currentCrossFadeOffsetRate != self.settings.crossFadeOffsetRate or self.currentCrossFadeEndRate != self.settings.crossFadeEndRate or self.currentCrossFadeOverlapSize != self.settings.crossFadeOverlapSize:
|
||||||
self.crossfadeSize != crossfadeSize
|
|
||||||
or self.currentCrossFadeOffsetRate != self.settings.crossFadeOffsetRate
|
|
||||||
or self.currentCrossFadeEndRate != self.settings.crossFadeEndRate
|
|
||||||
or self.currentCrossFadeOverlapSize != self.settings.crossFadeOverlapSize
|
|
||||||
):
|
|
||||||
self.crossfadeSize = crossfadeSize
|
self.crossfadeSize = crossfadeSize
|
||||||
self.currentCrossFadeOffsetRate = self.settings.crossFadeOffsetRate
|
self.currentCrossFadeOffsetRate = self.settings.crossFadeOffsetRate
|
||||||
self.currentCrossFadeEndRate = self.settings.crossFadeEndRate
|
self.currentCrossFadeEndRate = self.settings.crossFadeEndRate
|
||||||
@ -424,9 +367,7 @@ class VoiceChanger:
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
print(
|
print(f"Generated Strengths: for prev:{self.np_prev_strength.shape}, for cur:{self.np_cur_strength.shape}")
|
||||||
f"Generated Strengths: for prev:{self.np_prev_strength.shape}, for cur:{self.np_cur_strength.shape}"
|
|
||||||
)
|
|
||||||
|
|
||||||
# ひとつ前の結果とサイズが変わるため、記録は消去する。
|
# ひとつ前の結果とサイズが変わるため、記録は消去する。
|
||||||
if hasattr(self, "np_prev_audio1") is True:
|
if hasattr(self, "np_prev_audio1") is True:
|
||||||
@ -435,19 +376,13 @@ class VoiceChanger:
|
|||||||
del self.sola_buffer
|
del self.sola_buffer
|
||||||
|
|
||||||
# receivedData: tuple of short
|
# receivedData: tuple of short
|
||||||
def on_request(
|
def on_request(self, receivedData: AudioInOut) -> tuple[AudioInOut, list[Union[int, float]]]:
|
||||||
self, receivedData: AudioInOut
|
|
||||||
) -> tuple[AudioInOut, list[Union[int, float]]]:
|
|
||||||
return self.on_request_sola(receivedData)
|
return self.on_request_sola(receivedData)
|
||||||
|
|
||||||
def on_request_sola(
|
def on_request_sola(self, receivedData: AudioInOut) -> tuple[AudioInOut, list[Union[int, float]]]:
|
||||||
self, receivedData: AudioInOut
|
|
||||||
) -> tuple[AudioInOut, list[Union[int, float]]]:
|
|
||||||
try:
|
try:
|
||||||
if self.voiceChanger is None:
|
if self.voiceChanger is None:
|
||||||
raise VoiceChangerIsNotSelectedException(
|
raise VoiceChangerIsNotSelectedException("Voice Changer is not selected.")
|
||||||
"Voice Changer is not selected."
|
|
||||||
)
|
|
||||||
|
|
||||||
processing_sampling_rate = self.voiceChanger.get_processing_sampling_rate()
|
processing_sampling_rate = self.voiceChanger.get_processing_sampling_rate()
|
||||||
# 前処理
|
# 前処理
|
||||||
@ -470,9 +405,7 @@ class VoiceChanger:
|
|||||||
crossfade_frame = min(self.settings.crossFadeOverlapSize, block_frame)
|
crossfade_frame = min(self.settings.crossFadeOverlapSize, block_frame)
|
||||||
self._generate_strength(crossfade_frame)
|
self._generate_strength(crossfade_frame)
|
||||||
|
|
||||||
data = self.voiceChanger.generate_input(
|
data = self.voiceChanger.generate_input(newData, block_frame, crossfade_frame, sola_search_frame)
|
||||||
newData, block_frame, crossfade_frame, sola_search_frame
|
|
||||||
)
|
|
||||||
preprocess_time = t.secs
|
preprocess_time = t.secs
|
||||||
|
|
||||||
# 変換処理
|
# 変換処理
|
||||||
@ -482,9 +415,7 @@ class VoiceChanger:
|
|||||||
|
|
||||||
if hasattr(self, "sola_buffer") is True:
|
if hasattr(self, "sola_buffer") is True:
|
||||||
np.set_printoptions(threshold=10000)
|
np.set_printoptions(threshold=10000)
|
||||||
audio_offset = -1 * (
|
audio_offset = -1 * (sola_search_frame + crossfade_frame + block_frame)
|
||||||
sola_search_frame + crossfade_frame + block_frame
|
|
||||||
)
|
|
||||||
audio = audio[audio_offset:]
|
audio = audio[audio_offset:]
|
||||||
|
|
||||||
# SOLA algorithm from https://github.com/yxlllc/DDSP-SVC, https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI
|
# SOLA algorithm from https://github.com/yxlllc/DDSP-SVC, https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI
|
||||||
@ -512,10 +443,7 @@ class VoiceChanger:
|
|||||||
print("[Voice Changer] warming up... generating sola buffer.")
|
print("[Voice Changer] warming up... generating sola buffer.")
|
||||||
result = np.zeros(4096).astype(np.int16)
|
result = np.zeros(4096).astype(np.int16)
|
||||||
|
|
||||||
if (
|
if hasattr(self, "sola_buffer") is True and sola_offset < sola_search_frame:
|
||||||
hasattr(self, "sola_buffer") is True
|
|
||||||
and sola_offset < sola_search_frame
|
|
||||||
):
|
|
||||||
offset = -1 * (sola_search_frame + crossfade_frame - sola_offset)
|
offset = -1 * (sola_search_frame + crossfade_frame - sola_offset)
|
||||||
end = -1 * (sola_search_frame - sola_offset)
|
end = -1 * (sola_search_frame - sola_offset)
|
||||||
sola_buf_org = audio[offset:end]
|
sola_buf_org = audio[offset:end]
|
||||||
@ -545,9 +473,7 @@ class VoiceChanger:
|
|||||||
else:
|
else:
|
||||||
outputData = result
|
outputData = result
|
||||||
|
|
||||||
print_convert_processing(
|
print_convert_processing(f" Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz")
|
||||||
f" Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz"
|
|
||||||
)
|
|
||||||
|
|
||||||
if receivedData.shape[0] != outputData.shape[0]:
|
if receivedData.shape[0] != outputData.shape[0]:
|
||||||
# print(
|
# print(
|
||||||
@ -564,9 +490,7 @@ class VoiceChanger:
|
|||||||
|
|
||||||
postprocess_time = t.secs
|
postprocess_time = t.secs
|
||||||
|
|
||||||
print_convert_processing(
|
print_convert_processing(f" [fin] Input/Output size:{receivedData.shape[0]},{outputData.shape[0]}")
|
||||||
f" [fin] Input/Output size:{receivedData.shape[0]},{outputData.shape[0]}"
|
|
||||||
)
|
|
||||||
perf = [preprocess_time, mainprocess_time, postprocess_time]
|
perf = [preprocess_time, mainprocess_time, postprocess_time]
|
||||||
return outputData, perf
|
return outputData, perf
|
||||||
|
|
||||||
@ -586,9 +510,7 @@ class VoiceChanger:
|
|||||||
print("[Voice Changer] embedder:", e)
|
print("[Voice Changer] embedder:", e)
|
||||||
return np.zeros(1).astype(np.int16), [0, 0, 0]
|
return np.zeros(1).astype(np.int16), [0, 0, 0]
|
||||||
except VoiceChangerIsNotSelectedException:
|
except VoiceChangerIsNotSelectedException:
|
||||||
print(
|
print("[Voice Changer] Voice Changer is not selected. Wait a bit and if there is no improvement, please re-select vc.")
|
||||||
"[Voice Changer] Voice Changer is not selected. Wait a bit and if there is no improvement, please re-select vc."
|
|
||||||
)
|
|
||||||
return np.zeros(1).astype(np.int16), [0, 0, 0]
|
return np.zeros(1).astype(np.int16), [0, 0, 0]
|
||||||
except DeviceCannotSupportHalfPrecisionException:
|
except DeviceCannotSupportHalfPrecisionException:
|
||||||
# RVC.pyでfallback処理をするので、ここはダミーデータ返すだけ。
|
# RVC.pyでfallback処理をするので、ここはダミーデータ返すだけ。
|
||||||
|
@ -4,16 +4,47 @@ from const import ModelType
|
|||||||
from voice_changer.utils.LoadModelParams import LoadModelParams
|
from voice_changer.utils.LoadModelParams import LoadModelParams
|
||||||
from voice_changer.utils.VoiceChangerModel import AudioInOut
|
from voice_changer.utils.VoiceChangerModel import AudioInOut
|
||||||
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
||||||
|
from dataclasses import dataclass, asdict
|
||||||
|
import torch
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass()
|
||||||
|
class GPUInfo:
|
||||||
|
id: int
|
||||||
|
name: str
|
||||||
|
memory: int
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass()
|
||||||
|
class VoiceChangerManagerSettings:
|
||||||
|
dummy: int
|
||||||
|
|
||||||
|
# intData: list[str] = field(default_factory=lambda: ["slotIndex"])
|
||||||
|
|
||||||
|
|
||||||
class VoiceChangerManager(object):
|
class VoiceChangerManager(object):
|
||||||
_instance = None
|
_instance = None
|
||||||
voiceChanger: VoiceChanger = None
|
|
||||||
|
def __init__(self, params: VoiceChangerParams):
|
||||||
|
self.voiceChanger: VoiceChanger = None
|
||||||
|
self.settings: VoiceChangerManagerSettings = VoiceChangerManagerSettings(dummy=0)
|
||||||
|
# スタティックな情報を収集
|
||||||
|
self.gpus: list[GPUInfo] = self._get_gpuInfos()
|
||||||
|
|
||||||
|
def _get_gpuInfos(self):
|
||||||
|
devCount = torch.cuda.device_count()
|
||||||
|
gpus = []
|
||||||
|
for id in range(devCount):
|
||||||
|
name = torch.cuda.get_device_name(id)
|
||||||
|
memory = torch.cuda.get_device_properties(id).total_memory
|
||||||
|
gpu = {"id": id, "name": name, "memory": memory}
|
||||||
|
gpus.append(gpu)
|
||||||
|
return gpus
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_instance(cls, params: VoiceChangerParams):
|
def get_instance(cls, params: VoiceChangerParams):
|
||||||
if cls._instance is None:
|
if cls._instance is None:
|
||||||
cls._instance = cls()
|
cls._instance = cls(params)
|
||||||
cls._instance.voiceChanger = VoiceChanger(params)
|
cls._instance.voiceChanger = VoiceChanger(params)
|
||||||
return cls._instance
|
return cls._instance
|
||||||
|
|
||||||
@ -26,10 +57,15 @@ class VoiceChangerManager(object):
|
|||||||
return info
|
return info
|
||||||
|
|
||||||
def get_info(self):
|
def get_info(self):
|
||||||
|
data = asdict(self.settings)
|
||||||
|
data["gpus"] = self.gpus
|
||||||
|
|
||||||
|
data["status"] = "OK"
|
||||||
|
|
||||||
if hasattr(self, "voiceChanger"):
|
if hasattr(self, "voiceChanger"):
|
||||||
info = self.voiceChanger.get_info()
|
info = self.voiceChanger.get_info()
|
||||||
info["status"] = "OK"
|
data.update(info)
|
||||||
return info
|
return data
|
||||||
else:
|
else:
|
||||||
return {"status": "ERROR", "msg": "no model loaded"}
|
return {"status": "ERROR", "msg": "no model loaded"}
|
||||||
|
|
||||||
@ -42,11 +78,10 @@ class VoiceChangerManager(object):
|
|||||||
|
|
||||||
def update_settings(self, key: str, val: str | int | float):
|
def update_settings(self, key: str, val: str | int | float):
|
||||||
if hasattr(self, "voiceChanger"):
|
if hasattr(self, "voiceChanger"):
|
||||||
info = self.voiceChanger.update_settings(key, val)
|
self.voiceChanger.update_settings(key, val)
|
||||||
info["status"] = "OK"
|
|
||||||
return info
|
|
||||||
else:
|
else:
|
||||||
return {"status": "ERROR", "msg": "no model loaded"}
|
return {"status": "ERROR", "msg": "no model loaded"}
|
||||||
|
return self.get_info()
|
||||||
|
|
||||||
def changeVoice(self, receivedData: AudioInOut):
|
def changeVoice(self, receivedData: AudioInOut):
|
||||||
if hasattr(self, "voiceChanger") is True:
|
if hasattr(self, "voiceChanger") is True:
|
||||||
|
Loading…
Reference in New Issue
Block a user