Merge pull request #162 from w-okada/v.1.5.2

V.1.5.2
This commit is contained in:
w-okada 2023-04-13 06:52:55 +09:00 committed by GitHub
commit 7324e4f1c8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 47 additions and 30 deletions

View File

@ -83,6 +83,18 @@ Windows 版と Mac 版を提供しています。
| | <span style="color: blue;">win</span> | - | [黄琴海月](https://drive.google.com/uc?id=1fiymPcoYzwE1yxyIfC_FTPiFfGEC2jA8&export=download) | - | 823MB |
| | <span style="color: blue;">win</span> | - | [あみたろ](https://drive.google.com/uc?id=1Vt4WBEOAz0EhIWs3ZRFIcg7ELtSHnYfe&export=download) | - | 821MB |
| Version | OS | フレームワーク | link | サポート VC | サイズ |
| ----------- | ------------------------------------- | ------------------------------------- | ------------------------------------------------------------------------------------------ | ------------------------------------------------------------------- | ------ |
| v.1.5.1.15b | <span style="color: blue;">win</span> | ONNX(cpu,cuda), PyTorch(cpu) | [通常](https://drive.google.com/uc?id=1nb5DxHQJqnYgzWFTBNxCDOx64__uQqyR&export=download) | MMVC v.1.5.x, MMVC v.1.3.x, RVC | 773MB |
| | <span style="color: blue;">win</span> | ONNX(cpu,cuda), PyTorch(cpu,cuda) | [通常](https://drive.google.com/uc?id=197U6ip9ypBSyxhIf3oGnkWfBP-M3Gc12&export=download) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, so-vits-svc 4.0v2, RVC | 2794MB |
| | <span style="color: blue;">win</span> | ONNX(cpu,DirectML), PyTorch(cpu) | [通常](https://drive.google.com/uc?id=18Q9CDBnjgTHwOeklVLWAVMFZI-kk9j3l&export=download) | MMVC v.1.5.x, MMVC v.1.3.x, RVC | 488MB |
| | <span style="color: blue;">win</span> | ONNX(cpu,DirectML), PyTorch(cpu,cuda) | [通常](https://drive.google.com/uc?id=1rlGewdhvenv1Yn3WFOLcsWQeuo8ecIQ1&export=download) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, so-vits-svc 4.0v2, RVC | 2665MB |
| | <span style="color: red;">mac</span> | ONNX(cpu), PyTorch(cpu) | [normal](https://drive.google.com/uc?id=1saAe8vycI4zv0LRbvNmFLfYt0utGRWyZ&export=download) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, so-vits-svc 4.0v2, RVC | 615MB |
| Version | OS | フレームワーク | link | サポート VC | サイズ |
| ----------- | ------------------------------------- | --------------------------------- | ---------------------------------------------------------------------------------------- | ------------------------------------------------------------------- | ------ |
| v.1.5.1.15a | <span style="color: blue;">win</span> | ONNX(cpu,cuda), PyTorch(cpu,cuda) | [通常](https://drive.google.com/uc?id=1lCo4P3D3QVvrl-0DRh305e34d_YmsI10&export=download) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, so-vits-svc 4.0v2, RVC | 2641MB |
\*1 つくよみちゃんはフリー素材キャラクター「つくよみちゃん」が無料公開している音声データを使用しています。(利用規約など、詳細は文末)
\*2 解凍や起動が遅い場合、ウィルス対策ソフトのチェックが走っている可能性があります。ファイルやフォルダを対象外にして実行してみてください。(自己責任です)

View File

@ -11,13 +11,12 @@ import resampy
from voice_changer.IORecorder import IORecorder
# from voice_changer.IOAnalyzer import IOAnalyzer
from voice_changer.utils.Timer import Timer
from voice_changer.utils.VoiceChangerModel import VoiceChangerModel, AudioInOut
import time
AudioInput: TypeAlias = np.ndarray[Any, np.dtype[np.int16]]
providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
STREAM_INPUT_FILE = os.path.join(TMP_DIR, "in.wav")
@ -26,15 +25,6 @@ STREAM_ANALYZE_FILE_DIO = os.path.join(TMP_DIR, "analyze-dio.png")
STREAM_ANALYZE_FILE_HARVEST = os.path.join(TMP_DIR, "analyze-harvest.png")
class VoiceChangerModel(Protocol):
loadModel: Callable[..., dict[str, Any]]
def get_processing_sampling_rate(self) -> int: ...
def get_info(self) -> dict[str, Any]: ...
def inference(self, data: tuple[Any, ...]) -> Any: ...
def generate_input(self, newData: AudioInput, inputSize: int, crossfadeSize: int) -> tuple[Any, ...]: ...
def update_settings(self, key: str, val: Any) -> bool: ...
@dataclass
class VoiceChangerSettings():
inputSampleRate: int = 24000 # 48000 or 24000
@ -234,7 +224,7 @@ class VoiceChanger():
delattr(self, "np_prev_audio1")
# receivedData: tuple of short
def on_request(self, receivedData: AudioInput) -> tuple[AudioInput, list[Union[int, float]]]:
def on_request(self, receivedData: AudioInOut) -> tuple[AudioInOut, list[Union[int, float]]]:
processing_sampling_rate = self.voiceChanger.get_processing_sampling_rate()
print_convert_processing(f"------------ Convert processing.... ------------")
@ -244,7 +234,7 @@ class VoiceChanger():
with Timer("pre-process") as t1:
if self.settings.inputSampleRate != processing_sampling_rate:
newData = cast(AudioInput, resampy.resample(receivedData, self.settings.inputSampleRate, processing_sampling_rate))
newData = cast(AudioInOut, resampy.resample(receivedData, self.settings.inputSampleRate, processing_sampling_rate))
else:
newData = receivedData
# print("t1::::", t1.secs)
@ -311,7 +301,7 @@ class VoiceChanger():
with Timer("post-process") as t:
result = result.astype(np.int16)
if self.settings.inputSampleRate != processing_sampling_rate:
outputData = cast(AudioInput, resampy.resample(result, processing_sampling_rate, self.settings.inputSampleRate).astype(np.int16))
outputData = cast(AudioInOut, resampy.resample(result, processing_sampling_rate, self.settings.inputSampleRate).astype(np.int16))
else:
outputData = result
# outputData = result
@ -345,7 +335,7 @@ def print_convert_processing(mess: str):
print(mess)
def pad_array(arr: AudioInput, target_length: int):
def pad_array(arr: AudioInOut, target_length: int):
current_length = arr.shape[0]
if current_length >= target_length:
return arr
@ -355,17 +345,3 @@ def pad_array(arr: AudioInput, target_length: int):
pad_right = pad_width - pad_left
padded_arr = np.pad(arr, (pad_left, pad_right), 'constant', constant_values=(0, 0))
return padded_arr
class Timer(object):
def __init__(self, title: str):
self.title = title
def __enter__(self):
self.start = time.time()
return self
def __exit__(self, *_):
self.end = time.time()
self.secs = self.end - self.start
self.msecs = self.secs * 1000 # millisecs

View File

@ -0,0 +1,15 @@
import time
class Timer(object):
def __init__(self, title: str):
self.title = title
def __enter__(self):
self.start = time.time()
return self
def __exit__(self, *_):
self.end = time.time()
self.secs = self.end - self.start
self.msecs = self.secs * 1000 # millisecs

View File

@ -0,0 +1,14 @@
from typing import Any, Callable, Protocol, TypeAlias
import numpy as np
AudioInOut: TypeAlias = np.ndarray[Any, np.dtype[np.int16]]
class VoiceChangerModel(Protocol):
loadModel: Callable[..., dict[str, Any]]
def get_processing_sampling_rate(self) -> int: ...
def get_info(self) -> dict[str, Any]: ...
def inference(self, data: tuple[Any, ...]) -> Any: ...
def generate_input(self, newData: AudioInOut, inputSize: int, crossfadeSize: int) -> tuple[Any, ...]: ...
def update_settings(self, key: str, val: Any) -> bool: ...