move getting gpu info function to manager

This commit is contained in:
wataru 2023-06-16 02:50:05 +09:00
parent bdad75699e
commit d7e541ffa7
6 changed files with 1982 additions and 146 deletions

View File

@ -1 +1,10 @@
<!doctype html><html style="width:100%;height:100%;overflow:hidden"><head><meta charset="utf-8"/><title>Voice Changer Client Demo</title><script defer="defer" src="index.js"></script></head><body style="width:100%;height:100%;margin:0"><div id="app" style="width:100%;height:100%"></div></body></html> <!DOCTYPE html>
<html style="width: 100%; height: 100%; overflow: hidden">
<head>
<meta charset="utf-8" />
<title>Voice Changer Client Demo</title>
<script defer src="index.js"></script></head>
<body style="width: 100%; height: 100%; margin: 0px">
<div id="app" style="width: 100%; height: 100%"></div>
</body>
</html>

File diff suppressed because one or more lines are too long

View File

@ -1,31 +0,0 @@
/*! regenerator-runtime -- Copyright (c) 2014-present, Facebook, Inc. -- license (MIT): https://github.com/facebook/regenerator/blob/main/LICENSE */
/**
* @license React
* react-dom.production.min.js
*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
/**
* @license React
* react.production.min.js
*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
/**
* @license React
* scheduler.production.min.js
*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

View File

@ -8,6 +8,7 @@
"editor.defaultFormatter": null, // Prettier 使 "editor.defaultFormatter": null, // Prettier 使
"editor.formatOnSave": true // "editor.formatOnSave": true //
}, },
"python.formatting.blackArgs": ["--line-length", "550"],
"flake8.args": [ "flake8.args": [
"--ignore=E501,E402,E722,E741,E203,W503" "--ignore=E501,E402,E722,E741,E203,W503"
// "--max-line-length=150", // "--max-line-length=150",

View File

@ -45,9 +45,7 @@ class VoiceChangerSettings:
recordIO: int = 0 # 0:off, 1:on recordIO: int = 0 # 0:off, 1:on
serverAudioInputDevices: list[ServerAudioDevice] = field(default_factory=lambda: []) serverAudioInputDevices: list[ServerAudioDevice] = field(default_factory=lambda: [])
serverAudioOutputDevices: list[ServerAudioDevice] = field( serverAudioOutputDevices: list[ServerAudioDevice] = field(default_factory=lambda: [])
default_factory=lambda: []
)
enableServerAudio: int = 0 # 0:off, 1:on enableServerAudio: int = 0 # 0:off, 1:on
serverAudioStated: int = 0 # 0:off, 1:on serverAudioStated: int = 0 # 0:off, 1:on
@ -103,19 +101,14 @@ class VoiceChanger:
emitTo = None emitTo = None
def audio_callback( def audio_callback(self, indata: np.ndarray, outdata: np.ndarray, frames, times, status):
self, indata: np.ndarray, outdata: np.ndarray, frames, times, status
):
try: try:
indata = indata * self.settings.serverInputAudioGain indata = indata * self.settings.serverInputAudioGain
with Timer("all_inference_time") as t: with Timer("all_inference_time") as t:
unpackedData = librosa.to_mono(indata.T) * 32768.0 unpackedData = librosa.to_mono(indata.T) * 32768.0
out_wav, times = self.on_request(unpackedData) out_wav, times = self.on_request(unpackedData)
outputChunnels = outdata.shape[1] outputChunnels = outdata.shape[1]
outdata[:] = ( outdata[:] = np.repeat(out_wav, outputChunnels).reshape(-1, outputChunnels) / 32768.0
np.repeat(out_wav, outputChunnels).reshape(-1, outputChunnels)
/ 32768.0
)
outdata[:] = outdata * self.settings.serverOutputAudioGain outdata[:] = outdata * self.settings.serverOutputAudioGain
all_inference_time = t.secs all_inference_time = t.secs
performance = [all_inference_time] + times performance = [all_inference_time] + times
@ -125,9 +118,7 @@ class VoiceChanger:
except Exception as e: except Exception as e:
print("[Voice Changer] ex:", e) print("[Voice Changer] ex:", e)
def getServerAudioDevice( def getServerAudioDevice(self, audioDeviceList: list[ServerAudioDevice], index: int):
self, audioDeviceList: list[ServerAudioDevice], index: int
):
serverAudioDevice = [x for x in audioDeviceList if x.index == index] serverAudioDevice = [x for x in audioDeviceList if x.index == index]
if len(serverAudioDevice) > 0: if len(serverAudioDevice) > 0:
return serverAudioDevice[0] return serverAudioDevice[0]
@ -142,11 +133,7 @@ class VoiceChanger:
currentOutputDeviceId = -1 currentOutputDeviceId = -1
currentInputChunkNum = -1 currentInputChunkNum = -1
while True: while True:
if ( if vc.settings.serverAudioStated == 0 or vc.settings.serverInputDeviceId == -1 or vc.voiceChanger is None:
vc.settings.serverAudioStated == 0
or vc.settings.serverInputDeviceId == -1
or vc.voiceChanger is None
):
vc.settings.inputSampleRate = 48000 vc.settings.inputSampleRate = 48000
time.sleep(2) time.sleep(2)
else: else:
@ -160,12 +147,8 @@ class VoiceChanger:
currentInputChannelNum = vc.settings.serverAudioInputDevices currentInputChannelNum = vc.settings.serverAudioInputDevices
serverInputAudioDevice = self.getServerAudioDevice( serverInputAudioDevice = self.getServerAudioDevice(vc.settings.serverAudioInputDevices, currentInputDeviceId)
vc.settings.serverAudioInputDevices, currentInputDeviceId serverOutputAudioDevice = self.getServerAudioDevice(vc.settings.serverAudioOutputDevices, currentOutputDeviceId)
)
serverOutputAudioDevice = self.getServerAudioDevice(
vc.settings.serverAudioOutputDevices, currentOutputDeviceId
)
print(serverInputAudioDevice, serverOutputAudioDevice) print(serverInputAudioDevice, serverOutputAudioDevice)
if serverInputAudioDevice is None or serverOutputAudioDevice is None: if serverInputAudioDevice is None or serverOutputAudioDevice is None:
time.sleep(2) time.sleep(2)
@ -180,9 +163,7 @@ class VoiceChanger:
# sample rate precheck(alsa cannot use 40000?) # sample rate precheck(alsa cannot use 40000?)
try: try:
currentModelSamplingRate = ( currentModelSamplingRate = self.voiceChanger.get_processing_sampling_rate()
self.voiceChanger.get_processing_sampling_rate()
)
except Exception as e: except Exception as e:
print("[Voice Changer] ex: get_processing_sampling_rate", e) print("[Voice Changer] ex: get_processing_sampling_rate", e)
continue continue
@ -197,17 +178,13 @@ class VoiceChanger:
pass pass
vc.settings.serverInputAudioSampleRate = currentModelSamplingRate vc.settings.serverInputAudioSampleRate = currentModelSamplingRate
vc.settings.inputSampleRate = currentModelSamplingRate vc.settings.inputSampleRate = currentModelSamplingRate
print( print(f"[Voice Changer] sample rate {vc.settings.serverInputAudioSampleRate}")
f"[Voice Changer] sample rate {vc.settings.serverInputAudioSampleRate}"
)
except Exception as e: except Exception as e:
print( print(
"[Voice Changer] ex: fallback to device default samplerate", "[Voice Changer] ex: fallback to device default samplerate",
e, e,
) )
vc.settings.serverInputAudioSampleRate = ( vc.settings.serverInputAudioSampleRate = serverInputAudioDevice.default_samplerate
serverInputAudioDevice.default_samplerate
)
vc.settings.inputSampleRate = vc.settings.serverInputAudioSampleRate vc.settings.inputSampleRate = vc.settings.serverInputAudioSampleRate
# main loop # main loop
@ -219,15 +196,7 @@ class VoiceChanger:
dtype="float32", dtype="float32",
channels=[currentInputChannelNum, currentOutputChannelNum], channels=[currentInputChannelNum, currentOutputChannelNum],
): ):
while ( while vc.settings.serverAudioStated == 1 and currentInputDeviceId == vc.settings.serverInputDeviceId and currentOutputDeviceId == vc.settings.serverOutputDeviceId and currentModelSamplingRate == self.voiceChanger.get_processing_sampling_rate() and currentInputChunkNum == vc.settings.serverReadChunkSize:
vc.settings.serverAudioStated == 1
and currentInputDeviceId == vc.settings.serverInputDeviceId
and currentOutputDeviceId
== vc.settings.serverOutputDeviceId
and currentModelSamplingRate
== self.voiceChanger.get_processing_sampling_rate()
and currentInputChunkNum == vc.settings.serverReadChunkSize
):
time.sleep(2) time.sleep(2)
print( print(
"[Voice Changer] server audio", "[Voice Changer] server audio",
@ -260,10 +229,7 @@ class VoiceChanger:
self.params = params self.params = params
self.gpu_num = torch.cuda.device_count() self.gpu_num = torch.cuda.device_count()
self.prev_audio = np.zeros(4096) self.prev_audio = np.zeros(4096)
self.mps_enabled: bool = ( self.mps_enabled: bool = getattr(torch.backends, "mps", None) is not None and torch.backends.mps.is_available()
getattr(torch.backends, "mps", None) is not None
and torch.backends.mps.is_available()
)
audioinput, audiooutput = list_audio_device() audioinput, audiooutput = list_audio_device()
self.settings.serverAudioInputDevices = audioinput self.settings.serverAudioInputDevices = audioinput
@ -271,9 +237,7 @@ class VoiceChanger:
thread = threading.Thread(target=self.serverLocal, args=(self,)) thread = threading.Thread(target=self.serverLocal, args=(self,))
thread.start() thread.start()
print( print(f"VoiceChanger Initialized (GPU_NUM:{self.gpu_num}, mps_enabled:{self.mps_enabled})")
f"VoiceChanger Initialized (GPU_NUM:{self.gpu_num}, mps_enabled:{self.mps_enabled})"
)
def switchModelType(self, modelType: ModelType): def switchModelType(self, modelType: ModelType):
try: try:
@ -295,10 +259,7 @@ class VoiceChanger:
from voice_changer.SoVitsSvc40v2.SoVitsSvc40v2 import SoVitsSvc40v2 from voice_changer.SoVitsSvc40v2.SoVitsSvc40v2 import SoVitsSvc40v2
self.voiceChanger = SoVitsSvc40v2(self.params) self.voiceChanger = SoVitsSvc40v2(self.params)
elif ( elif self.modelType == "so-vits-svc-40" or self.modelType == "so-vits-svc-40_c":
self.modelType == "so-vits-svc-40"
or self.modelType == "so-vits-svc-40_c"
):
from voice_changer.SoVitsSvc40.SoVitsSvc40 import SoVitsSvc40 from voice_changer.SoVitsSvc40.SoVitsSvc40 import SoVitsSvc40
self.voiceChanger = SoVitsSvc40(self.params) self.voiceChanger = SoVitsSvc40(self.params)
@ -328,9 +289,7 @@ class VoiceChanger:
def loadModel(self, props: LoadModelParams): def loadModel(self, props: LoadModelParams):
try: try:
if self.voiceChanger is None: if self.voiceChanger is None:
raise VoiceChangerIsNotSelectedException( raise VoiceChangerIsNotSelectedException("Voice Changer is not selected.")
"Voice Changer is not selected."
)
return self.voiceChanger.loadModel(props) return self.voiceChanger.loadModel(props)
except Exception as e: except Exception as e:
print(traceback.format_exc()) print(traceback.format_exc())
@ -341,15 +300,6 @@ class VoiceChanger:
data = asdict(self.settings) data = asdict(self.settings)
if self.voiceChanger is not None: if self.voiceChanger is not None:
data.update(self.voiceChanger.get_info()) data.update(self.voiceChanger.get_info())
devCount = torch.cuda.device_count()
gpus = []
for id in range(devCount):
name = torch.cuda.get_device_name(id)
memory = torch.cuda.get_device_properties(id).total_memory
gpu = {"id": id, "name": name, "memory": memory}
gpus.append(gpu)
data["gpus"] = gpus
return data return data
def get_performance(self): def get_performance(self):
@ -367,9 +317,7 @@ class VoiceChanger:
if key == "recordIO" and val == 1: if key == "recordIO" and val == 1:
if hasattr(self, "ioRecorder"): if hasattr(self, "ioRecorder"):
self.ioRecorder.close() self.ioRecorder.close()
self.ioRecorder = IORecorder( self.ioRecorder = IORecorder(STREAM_INPUT_FILE, STREAM_OUTPUT_FILE, self.settings.inputSampleRate)
STREAM_INPUT_FILE, STREAM_OUTPUT_FILE, self.settings.inputSampleRate
)
if key == "recordIO" and val == 0: if key == "recordIO" and val == 0:
if hasattr(self, "ioRecorder"): if hasattr(self, "ioRecorder"):
self.ioRecorder.close() self.ioRecorder.close()
@ -390,12 +338,7 @@ class VoiceChanger:
return self.get_info() return self.get_info()
def _generate_strength(self, crossfadeSize: int): def _generate_strength(self, crossfadeSize: int):
if ( if self.crossfadeSize != crossfadeSize or self.currentCrossFadeOffsetRate != self.settings.crossFadeOffsetRate or self.currentCrossFadeEndRate != self.settings.crossFadeEndRate or self.currentCrossFadeOverlapSize != self.settings.crossFadeOverlapSize:
self.crossfadeSize != crossfadeSize
or self.currentCrossFadeOffsetRate != self.settings.crossFadeOffsetRate
or self.currentCrossFadeEndRate != self.settings.crossFadeEndRate
or self.currentCrossFadeOverlapSize != self.settings.crossFadeOverlapSize
):
self.crossfadeSize = crossfadeSize self.crossfadeSize = crossfadeSize
self.currentCrossFadeOffsetRate = self.settings.crossFadeOffsetRate self.currentCrossFadeOffsetRate = self.settings.crossFadeOffsetRate
self.currentCrossFadeEndRate = self.settings.crossFadeEndRate self.currentCrossFadeEndRate = self.settings.crossFadeEndRate
@ -424,9 +367,7 @@ class VoiceChanger:
] ]
) )
print( print(f"Generated Strengths: for prev:{self.np_prev_strength.shape}, for cur:{self.np_cur_strength.shape}")
f"Generated Strengths: for prev:{self.np_prev_strength.shape}, for cur:{self.np_cur_strength.shape}"
)
# ひとつ前の結果とサイズが変わるため、記録は消去する。 # ひとつ前の結果とサイズが変わるため、記録は消去する。
if hasattr(self, "np_prev_audio1") is True: if hasattr(self, "np_prev_audio1") is True:
@ -435,19 +376,13 @@ class VoiceChanger:
del self.sola_buffer del self.sola_buffer
# receivedData: tuple of short # receivedData: tuple of short
def on_request( def on_request(self, receivedData: AudioInOut) -> tuple[AudioInOut, list[Union[int, float]]]:
self, receivedData: AudioInOut
) -> tuple[AudioInOut, list[Union[int, float]]]:
return self.on_request_sola(receivedData) return self.on_request_sola(receivedData)
def on_request_sola( def on_request_sola(self, receivedData: AudioInOut) -> tuple[AudioInOut, list[Union[int, float]]]:
self, receivedData: AudioInOut
) -> tuple[AudioInOut, list[Union[int, float]]]:
try: try:
if self.voiceChanger is None: if self.voiceChanger is None:
raise VoiceChangerIsNotSelectedException( raise VoiceChangerIsNotSelectedException("Voice Changer is not selected.")
"Voice Changer is not selected."
)
processing_sampling_rate = self.voiceChanger.get_processing_sampling_rate() processing_sampling_rate = self.voiceChanger.get_processing_sampling_rate()
# 前処理 # 前処理
@ -470,9 +405,7 @@ class VoiceChanger:
crossfade_frame = min(self.settings.crossFadeOverlapSize, block_frame) crossfade_frame = min(self.settings.crossFadeOverlapSize, block_frame)
self._generate_strength(crossfade_frame) self._generate_strength(crossfade_frame)
data = self.voiceChanger.generate_input( data = self.voiceChanger.generate_input(newData, block_frame, crossfade_frame, sola_search_frame)
newData, block_frame, crossfade_frame, sola_search_frame
)
preprocess_time = t.secs preprocess_time = t.secs
# 変換処理 # 変換処理
@ -482,9 +415,7 @@ class VoiceChanger:
if hasattr(self, "sola_buffer") is True: if hasattr(self, "sola_buffer") is True:
np.set_printoptions(threshold=10000) np.set_printoptions(threshold=10000)
audio_offset = -1 * ( audio_offset = -1 * (sola_search_frame + crossfade_frame + block_frame)
sola_search_frame + crossfade_frame + block_frame
)
audio = audio[audio_offset:] audio = audio[audio_offset:]
# SOLA algorithm from https://github.com/yxlllc/DDSP-SVC, https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI # SOLA algorithm from https://github.com/yxlllc/DDSP-SVC, https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI
@ -512,10 +443,7 @@ class VoiceChanger:
print("[Voice Changer] warming up... generating sola buffer.") print("[Voice Changer] warming up... generating sola buffer.")
result = np.zeros(4096).astype(np.int16) result = np.zeros(4096).astype(np.int16)
if ( if hasattr(self, "sola_buffer") is True and sola_offset < sola_search_frame:
hasattr(self, "sola_buffer") is True
and sola_offset < sola_search_frame
):
offset = -1 * (sola_search_frame + crossfade_frame - sola_offset) offset = -1 * (sola_search_frame + crossfade_frame - sola_offset)
end = -1 * (sola_search_frame - sola_offset) end = -1 * (sola_search_frame - sola_offset)
sola_buf_org = audio[offset:end] sola_buf_org = audio[offset:end]
@ -545,9 +473,7 @@ class VoiceChanger:
else: else:
outputData = result outputData = result
print_convert_processing( print_convert_processing(f" Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz")
f" Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz"
)
if receivedData.shape[0] != outputData.shape[0]: if receivedData.shape[0] != outputData.shape[0]:
# print( # print(
@ -564,9 +490,7 @@ class VoiceChanger:
postprocess_time = t.secs postprocess_time = t.secs
print_convert_processing( print_convert_processing(f" [fin] Input/Output size:{receivedData.shape[0]},{outputData.shape[0]}")
f" [fin] Input/Output size:{receivedData.shape[0]},{outputData.shape[0]}"
)
perf = [preprocess_time, mainprocess_time, postprocess_time] perf = [preprocess_time, mainprocess_time, postprocess_time]
return outputData, perf return outputData, perf
@ -586,9 +510,7 @@ class VoiceChanger:
print("[Voice Changer] embedder:", e) print("[Voice Changer] embedder:", e)
return np.zeros(1).astype(np.int16), [0, 0, 0] return np.zeros(1).astype(np.int16), [0, 0, 0]
except VoiceChangerIsNotSelectedException: except VoiceChangerIsNotSelectedException:
print( print("[Voice Changer] Voice Changer is not selected. Wait a bit and if there is no improvement, please re-select vc.")
"[Voice Changer] Voice Changer is not selected. Wait a bit and if there is no improvement, please re-select vc."
)
return np.zeros(1).astype(np.int16), [0, 0, 0] return np.zeros(1).astype(np.int16), [0, 0, 0]
except DeviceCannotSupportHalfPrecisionException: except DeviceCannotSupportHalfPrecisionException:
# RVC.pyでfallback処理をするので、ここはダミーデータ返すだけ。 # RVC.pyでfallback処理をするので、ここはダミーデータ返すだけ。

View File

@ -4,16 +4,47 @@ from const import ModelType
from voice_changer.utils.LoadModelParams import LoadModelParams from voice_changer.utils.LoadModelParams import LoadModelParams
from voice_changer.utils.VoiceChangerModel import AudioInOut from voice_changer.utils.VoiceChangerModel import AudioInOut
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
from dataclasses import dataclass, asdict
import torch
@dataclass()
class GPUInfo:
id: int
name: str
memory: int
@dataclass()
class VoiceChangerManagerSettings:
dummy: int
# intData: list[str] = field(default_factory=lambda: ["slotIndex"])
class VoiceChangerManager(object): class VoiceChangerManager(object):
_instance = None _instance = None
voiceChanger: VoiceChanger = None
def __init__(self, params: VoiceChangerParams):
self.voiceChanger: VoiceChanger = None
self.settings: VoiceChangerManagerSettings = VoiceChangerManagerSettings(dummy=0)
# スタティックな情報を収集
self.gpus: list[GPUInfo] = self._get_gpuInfos()
def _get_gpuInfos(self):
devCount = torch.cuda.device_count()
gpus = []
for id in range(devCount):
name = torch.cuda.get_device_name(id)
memory = torch.cuda.get_device_properties(id).total_memory
gpu = {"id": id, "name": name, "memory": memory}
gpus.append(gpu)
return gpus
@classmethod @classmethod
def get_instance(cls, params: VoiceChangerParams): def get_instance(cls, params: VoiceChangerParams):
if cls._instance is None: if cls._instance is None:
cls._instance = cls() cls._instance = cls(params)
cls._instance.voiceChanger = VoiceChanger(params) cls._instance.voiceChanger = VoiceChanger(params)
return cls._instance return cls._instance
@ -26,10 +57,15 @@ class VoiceChangerManager(object):
return info return info
def get_info(self): def get_info(self):
data = asdict(self.settings)
data["gpus"] = self.gpus
data["status"] = "OK"
if hasattr(self, "voiceChanger"): if hasattr(self, "voiceChanger"):
info = self.voiceChanger.get_info() info = self.voiceChanger.get_info()
info["status"] = "OK" data.update(info)
return info return data
else: else:
return {"status": "ERROR", "msg": "no model loaded"} return {"status": "ERROR", "msg": "no model loaded"}
@ -42,11 +78,10 @@ class VoiceChangerManager(object):
def update_settings(self, key: str, val: str | int | float): def update_settings(self, key: str, val: str | int | float):
if hasattr(self, "voiceChanger"): if hasattr(self, "voiceChanger"):
info = self.voiceChanger.update_settings(key, val) self.voiceChanger.update_settings(key, val)
info["status"] = "OK"
return info
else: else:
return {"status": "ERROR", "msg": "no model loaded"} return {"status": "ERROR", "msg": "no model loaded"}
return self.get_info()
def changeVoice(self, receivedData: AudioInOut): def changeVoice(self, receivedData: AudioInOut):
if hasattr(self, "voiceChanger") is True: if hasattr(self, "voiceChanger") is True: