diff --git a/README.md b/README.md index 3c3626f6..adea76a2 100644 --- a/README.md +++ b/README.md @@ -83,6 +83,18 @@ Windows 版と Mac 版を提供しています。 | | win | - | [黄琴海月](https://drive.google.com/uc?id=1fiymPcoYzwE1yxyIfC_FTPiFfGEC2jA8&export=download) | - | 823MB | | | win | - | [あみたろ](https://drive.google.com/uc?id=1Vt4WBEOAz0EhIWs3ZRFIcg7ELtSHnYfe&export=download) | - | 821MB | +| Version | OS | フレームワーク | link | サポート VC | サイズ | +| ----------- | ------------------------------------- | ------------------------------------- | ------------------------------------------------------------------------------------------ | ------------------------------------------------------------------- | ------ | +| v.1.5.1.15b | win | ONNX(cpu,cuda), PyTorch(cpu) | [通常](https://drive.google.com/uc?id=1nb5DxHQJqnYgzWFTBNxCDOx64__uQqyR&export=download) | MMVC v.1.5.x, MMVC v.1.3.x, RVC | 773MB | +| | win | ONNX(cpu,cuda), PyTorch(cpu,cuda) | [通常](https://drive.google.com/uc?id=197U6ip9ypBSyxhIf3oGnkWfBP-M3Gc12&export=download) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, so-vits-svc 4.0v2, RVC | 2794MB | +| | win | ONNX(cpu,DirectML), PyTorch(cpu) | [通常](https://drive.google.com/uc?id=18Q9CDBnjgTHwOeklVLWAVMFZI-kk9j3l&export=download) | MMVC v.1.5.x, MMVC v.1.3.x, RVC | 488MB | +| | win | ONNX(cpu,DirectML), PyTorch(cpu,cuda) | [通常](https://drive.google.com/uc?id=1rlGewdhvenv1Yn3WFOLcsWQeuo8ecIQ1&export=download) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, so-vits-svc 4.0v2, RVC | 2665MB | +| | mac | ONNX(cpu), PyTorch(cpu) | [normal](https://drive.google.com/uc?id=1saAe8vycI4zv0LRbvNmFLfYt0utGRWyZ&export=download) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, so-vits-svc 4.0v2, RVC | 615MB | + +| Version | OS | フレームワーク | link | サポート VC | サイズ | +| ----------- | ------------------------------------- | --------------------------------- | ---------------------------------------------------------------------------------------- | ------------------------------------------------------------------- | ------ | +| v.1.5.1.15a | win | ONNX(cpu,cuda), PyTorch(cpu,cuda) | [通常](https://drive.google.com/uc?id=1lCo4P3D3QVvrl-0DRh305e34d_YmsI10&export=download) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, so-vits-svc 4.0v2, RVC | 2641MB | + \*1 つくよみちゃんはフリー素材キャラクター「つくよみちゃん」が無料公開している音声データを使用しています。(利用規約など、詳細は文末) \*2 解凍や起動が遅い場合、ウィルス対策ソフトのチェックが走っている可能性があります。ファイルやフォルダを対象外にして実行してみてください。(自己責任です) diff --git a/server/voice_changer/VoiceChanger.py b/server/voice_changer/VoiceChanger.py index 94ec39ac..06fc5921 100755 --- a/server/voice_changer/VoiceChanger.py +++ b/server/voice_changer/VoiceChanger.py @@ -11,13 +11,12 @@ import resampy from voice_changer.IORecorder import IORecorder # from voice_changer.IOAnalyzer import IOAnalyzer +from voice_changer.utils.Timer import Timer +from voice_changer.utils.VoiceChangerModel import VoiceChangerModel, AudioInOut import time -AudioInput: TypeAlias = np.ndarray[Any, np.dtype[np.int16]] - - providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"] STREAM_INPUT_FILE = os.path.join(TMP_DIR, "in.wav") @@ -26,15 +25,6 @@ STREAM_ANALYZE_FILE_DIO = os.path.join(TMP_DIR, "analyze-dio.png") STREAM_ANALYZE_FILE_HARVEST = os.path.join(TMP_DIR, "analyze-harvest.png") -class VoiceChangerModel(Protocol): - loadModel: Callable[..., dict[str, Any]] - def get_processing_sampling_rate(self) -> int: ... - def get_info(self) -> dict[str, Any]: ... - def inference(self, data: tuple[Any, ...]) -> Any: ... - def generate_input(self, newData: AudioInput, inputSize: int, crossfadeSize: int) -> tuple[Any, ...]: ... - def update_settings(self, key: str, val: Any) -> bool: ... - - @dataclass class VoiceChangerSettings(): inputSampleRate: int = 24000 # 48000 or 24000 @@ -234,7 +224,7 @@ class VoiceChanger(): delattr(self, "np_prev_audio1") # receivedData: tuple of short - def on_request(self, receivedData: AudioInput) -> tuple[AudioInput, list[Union[int, float]]]: + def on_request(self, receivedData: AudioInOut) -> tuple[AudioInOut, list[Union[int, float]]]: processing_sampling_rate = self.voiceChanger.get_processing_sampling_rate() print_convert_processing(f"------------ Convert processing.... ------------") @@ -244,7 +234,7 @@ class VoiceChanger(): with Timer("pre-process") as t1: if self.settings.inputSampleRate != processing_sampling_rate: - newData = cast(AudioInput, resampy.resample(receivedData, self.settings.inputSampleRate, processing_sampling_rate)) + newData = cast(AudioInOut, resampy.resample(receivedData, self.settings.inputSampleRate, processing_sampling_rate)) else: newData = receivedData # print("t1::::", t1.secs) @@ -311,7 +301,7 @@ class VoiceChanger(): with Timer("post-process") as t: result = result.astype(np.int16) if self.settings.inputSampleRate != processing_sampling_rate: - outputData = cast(AudioInput, resampy.resample(result, processing_sampling_rate, self.settings.inputSampleRate).astype(np.int16)) + outputData = cast(AudioInOut, resampy.resample(result, processing_sampling_rate, self.settings.inputSampleRate).astype(np.int16)) else: outputData = result # outputData = result @@ -345,7 +335,7 @@ def print_convert_processing(mess: str): print(mess) -def pad_array(arr: AudioInput, target_length: int): +def pad_array(arr: AudioInOut, target_length: int): current_length = arr.shape[0] if current_length >= target_length: return arr @@ -355,17 +345,3 @@ def pad_array(arr: AudioInput, target_length: int): pad_right = pad_width - pad_left padded_arr = np.pad(arr, (pad_left, pad_right), 'constant', constant_values=(0, 0)) return padded_arr - - -class Timer(object): - def __init__(self, title: str): - self.title = title - - def __enter__(self): - self.start = time.time() - return self - - def __exit__(self, *_): - self.end = time.time() - self.secs = self.end - self.start - self.msecs = self.secs * 1000 # millisecs diff --git a/server/voice_changer/utils/Timer.py b/server/voice_changer/utils/Timer.py new file mode 100644 index 00000000..43d7591f --- /dev/null +++ b/server/voice_changer/utils/Timer.py @@ -0,0 +1,15 @@ +import time + + +class Timer(object): + def __init__(self, title: str): + self.title = title + + def __enter__(self): + self.start = time.time() + return self + + def __exit__(self, *_): + self.end = time.time() + self.secs = self.end - self.start + self.msecs = self.secs * 1000 # millisecs diff --git a/server/voice_changer/utils/VoiceChangerModel.py b/server/voice_changer/utils/VoiceChangerModel.py new file mode 100644 index 00000000..cebe1a99 --- /dev/null +++ b/server/voice_changer/utils/VoiceChangerModel.py @@ -0,0 +1,14 @@ +from typing import Any, Callable, Protocol, TypeAlias +import numpy as np + + +AudioInOut: TypeAlias = np.ndarray[Any, np.dtype[np.int16]] + + +class VoiceChangerModel(Protocol): + loadModel: Callable[..., dict[str, Any]] + def get_processing_sampling_rate(self) -> int: ... + def get_info(self) -> dict[str, Any]: ... + def inference(self, data: tuple[Any, ...]) -> Any: ... + def generate_input(self, newData: AudioInOut, inputSize: int, crossfadeSize: int) -> tuple[Any, ...]: ... + def update_settings(self, key: str, val: Any) -> bool: ...