WIP: refactor, generalize generateStrength

This commit is contained in:
wataru 2023-03-07 11:49:06 +09:00
parent 9a6b9851db
commit b9741fe577
3 changed files with 20 additions and 5 deletions

7
.gitignore vendored
View File

@ -13,8 +13,15 @@ server/out.wav
server/G_*.pth server/G_*.pth
server/train_config.json server/train_config.json
# v.1.3.xテスト用モデルフォルダ
server/v13 server/v13
server/hubert
server/so-vits-svc
# sovitsテスト用モデルフォルダ
server/sovits
server/test
server/memo.md server/memo.md
client/lib/dist client/lib/dist

View File

@ -1,5 +1,4 @@
import sys import sys
sys.path.append("MMVC_Client/python")
from distutils.util import strtobool from distutils.util import strtobool
from datetime import datetime from datetime import datetime

View File

@ -1,3 +1,6 @@
import sys
sys.path.append("MMVC_Client/python")
from const import ERROR_NO_ONNX_SESSION, TMP_DIR from const import ERROR_NO_ONNX_SESSION, TMP_DIR
import torch import torch
import os import os
@ -14,6 +17,8 @@ from models import SynthesizerTrn
import pyworld as pw import pyworld as pw
from voice_changer.client_modules import convert_continuos_f0, spectrogram_torch, TextAudioSpeakerCollate, get_hparams_from_file, load_checkpoint from voice_changer.client_modules import convert_continuos_f0, spectrogram_torch, TextAudioSpeakerCollate, get_hparams_from_file, load_checkpoint
from voice_changer.MMVCv15 import MMVCv15
import time import time
providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"] providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
@ -121,6 +126,8 @@ class VoiceChanger():
self.currentCrossFadeEndRate = 0 self.currentCrossFadeEndRate = 0
self.currentCrossFadeOverlapSize = 0 self.currentCrossFadeOverlapSize = 0
self.voiceChanger = MMVCv15()
self.gpu_num = torch.cuda.device_count() self.gpu_num = torch.cuda.device_count()
self.text_norm = torch.LongTensor([0, 6, 0]) self.text_norm = torch.LongTensor([0, 6, 0])
self.audio_buffer = torch.zeros(1, 0) self.audio_buffer = torch.zeros(1, 0)
@ -285,14 +292,14 @@ class VoiceChanger():
return self.get_info() return self.get_info()
def _generate_strength(self, unpackedData): def _generate_strength(self, dataLength):
if self.unpackedData_length != unpackedData.shape[0] or \ if self.unpackedData_length != dataLength or \
self.currentCrossFadeOffsetRate != self.settings.crossFadeOffsetRate or \ self.currentCrossFadeOffsetRate != self.settings.crossFadeOffsetRate or \
self.currentCrossFadeEndRate != self.settings.crossFadeEndRate or \ self.currentCrossFadeEndRate != self.settings.crossFadeEndRate or \
self.currentCrossFadeOverlapSize != self.settings.crossFadeOverlapSize: self.currentCrossFadeOverlapSize != self.settings.crossFadeOverlapSize:
self.unpackedData_length = unpackedData.shape[0] self.unpackedData_length = dataLength
self.currentCrossFadeOffsetRate = self.settings.crossFadeOffsetRate self.currentCrossFadeOffsetRate = self.settings.crossFadeOffsetRate
self.currentCrossFadeEndRate = self.settings.crossFadeEndRate self.currentCrossFadeEndRate = self.settings.crossFadeEndRate
self.currentCrossFadeOverlapSize = self.settings.crossFadeOverlapSize self.currentCrossFadeOverlapSize = self.settings.crossFadeOverlapSize
@ -502,7 +509,7 @@ class VoiceChanger():
convertSize = 8192 convertSize = 8192
if convertSize % 128 != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。 if convertSize % 128 != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
convertSize = convertSize + (128 - (convertSize % 128)) convertSize = convertSize + (128 - (convertSize % 128))
self._generate_strength(unpackedData) self._generate_strength(unpackedData.shape[0])
data = self._generate_input(unpackedData, convertSize) data = self._generate_input(unpackedData, convertSize)
preprocess_time = t.secs preprocess_time = t.secs
@ -510,8 +517,10 @@ class VoiceChanger():
try: try:
if self.settings.framework == "ONNX": if self.settings.framework == "ONNX":
result = self._onnx_inference(data, unpackedData.shape[0]) result = self._onnx_inference(data, unpackedData.shape[0])
# result = self.voiceChanger._onnx_inference(data, unpackedData.shape[0])
else: else:
result = self._pyTorch_inference(data, unpackedData.shape[0]) result = self._pyTorch_inference(data, unpackedData.shape[0])
# result = self.voiceChanger._pyTorch_inference(data, unpackedData.shape[0])
except Exception as e: except Exception as e:
print("VC PROCESSING!!!! EXCEPTION!!!", e) print("VC PROCESSING!!!! EXCEPTION!!!", e)