WIP: refactoring

This commit is contained in:
wataru 2023-04-28 14:12:19 +09:00
parent 4ac4a225a7
commit b3d7946592

View File

@ -1,6 +1,11 @@
import sys import sys
import os import os
if sys.platform.startswith('darwin'):
from voice_changer.utils.LoadModelParams import LoadModelParams
from voice_changer.utils.VoiceChangerModel import AudioInOut
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
if sys.platform.startswith("darwin"):
baseDir = [x for x in sys.path if x.endswith("Contents/MacOS")] baseDir = [x for x in sys.path if x.endswith("Contents/MacOS")]
if len(baseDir) != 1: if len(baseDir) != 1:
print("baseDir should be only one ", baseDir) print("baseDir should be only one ", baseDir)
@ -12,17 +17,16 @@ else:
import io import io
from dataclasses import dataclass, asdict, field from dataclasses import dataclass, asdict, field
from functools import reduce
import numpy as np import numpy as np
import torch import torch
import onnxruntime import onnxruntime
# onnxruntime.set_default_logger_severity(3) # onnxruntime.set_default_logger_severity(3)
from const import HUBERT_ONNX_MODEL_PATH
import pyworld as pw import pyworld as pw
from models import SynthesizerTrn from models import SynthesizerTrn # type:ignore
import cluster import cluster # type:ignore
import utils import utils
from fairseq import checkpoint_utils from fairseq import checkpoint_utils
import librosa import librosa
@ -30,11 +34,16 @@ import librosa
from Exceptions import NoModeLoadedException from Exceptions import NoModeLoadedException
providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"] providers = [
"OpenVINOExecutionProvider",
"CUDAExecutionProvider",
"DmlExecutionProvider",
"CPUExecutionProvider",
]
@dataclass @dataclass
class SoVitsSvc40Settings(): class SoVitsSvc40Settings:
gpu: int = 0 gpu: int = 0
dstId: int = 0 dstId: int = 0
@ -51,9 +60,7 @@ class SoVitsSvc40Settings():
onnxModelFile: str = "" onnxModelFile: str = ""
configFile: str = "" configFile: str = ""
speakers: dict[str, int] = field( speakers: dict[str, int] = field(default_factory=lambda: {})
default_factory=lambda: {}
)
# ↓mutableな物だけ列挙 # ↓mutableな物だけ列挙
intData = ["gpu", "dstId", "tran", "predictF0", "extraConvertSize"] intData = ["gpu", "dstId", "tran", "predictF0", "extraConvertSize"]
@ -62,7 +69,9 @@ class SoVitsSvc40Settings():
class SoVitsSvc40: class SoVitsSvc40:
def __init__(self, params): audio_buffer: AudioInOut | None = None
def __init__(self, params: VoiceChangerParams):
self.settings = SoVitsSvc40Settings() self.settings = SoVitsSvc40Settings()
self.net_g = None self.net_g = None
self.onnx_session = None self.onnx_session = None
@ -74,32 +83,30 @@ class SoVitsSvc40:
print("so-vits-svc40 initialization:", params) print("so-vits-svc40 initialization:", params)
# def loadModel(self, config: str, pyTorch_model_file: str = None, onnx_model_file: str = None, clusterTorchModel: str = None): # def loadModel(self, config: str, pyTorch_model_file: str = None, onnx_model_file: str = None, clusterTorchModel: str = None):
def loadModel(self, props): def loadModel(self, props: LoadModelParams):
self.settings.configFile = props["files"]["configFilename"] self.settings.configFile = props.files.configFilename
self.hps = utils.get_hparams_from_file(self.settings.configFile) self.hps = utils.get_hparams_from_file(self.settings.configFile)
self.settings.speakers = self.hps.spk self.settings.speakers = self.hps.spk
self.settings.pyTorchModelFile = props["files"]["pyTorchModelFilename"] self.settings.pyTorchModelFile = props.files.pyTorchModelFilename
self.settings.onnxModelFile = props["files"]["onnxModelFilename"] self.settings.onnxModelFile = props.files.onnxModelFilename
clusterTorchModel = props["files"]["clusterTorchModelFilename"] clusterTorchModel = props.files.clusterTorchModelFilename
content_vec_path = self.params["content_vec_500"] content_vec_path = self.params.content_vec_500
content_vec_onnx_path = self.params["content_vec_500_onnx"] content_vec_onnx_path = self.params.content_vec_500_onnx
content_vec_onnx_on = self.params["content_vec_500_onnx_on"] content_vec_onnx_on = self.params.content_vec_500_onnx_on
hubert_base_path = self.params["hubert_base"] hubert_base_path = self.params.hubert_base
# hubert model # hubert model
try: try:
if os.path.exists(content_vec_path) is False:
if os.path.exists(content_vec_path) == False:
content_vec_path = hubert_base_path content_vec_path = hubert_base_path
if content_vec_onnx_on == True: if content_vec_onnx_on is True:
ort_options = onnxruntime.SessionOptions() ort_options = onnxruntime.SessionOptions()
ort_options.intra_op_num_threads = 8 ort_options.intra_op_num_threads = 8
self.content_vec_onnx = onnxruntime.InferenceSession( self.content_vec_onnx = onnxruntime.InferenceSession(
content_vec_onnx_path, content_vec_onnx_path, providers=providers
providers=providers
) )
else: else:
models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task( models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
@ -114,7 +121,7 @@ class SoVitsSvc40:
# cluster # cluster
try: try:
if clusterTorchModel != None and os.path.exists(clusterTorchModel): if clusterTorchModel is not None and os.path.exists(clusterTorchModel):
self.cluster_model = cluster.get_cluster_model(clusterTorchModel) self.cluster_model = cluster.get_cluster_model(clusterTorchModel)
else: else:
self.cluster_model = None self.cluster_model = None
@ -122,22 +129,22 @@ class SoVitsSvc40:
print("EXCEPTION during loading cluster model ", e) print("EXCEPTION during loading cluster model ", e)
# PyTorchモデル生成 # PyTorchモデル生成
if self.settings.pyTorchModelFile != None: if self.settings.pyTorchModelFile is not None:
self.net_g = SynthesizerTrn( net_g = SynthesizerTrn(
self.hps.data.filter_length // 2 + 1, self.hps.data.filter_length // 2 + 1,
self.hps.train.segment_size // self.hps.data.hop_length, self.hps.train.segment_size // self.hps.data.hop_length,
**self.hps.model **self.hps.model,
) )
self.net_g.eval() net_g.eval()
self.net_g = net_g
utils.load_checkpoint(self.settings.pyTorchModelFile, self.net_g, None) utils.load_checkpoint(self.settings.pyTorchModelFile, self.net_g, None)
# ONNXモデル生成 # ONNXモデル生成
if self.settings.onnxModelFile != None: if self.settings.onnxModelFile is not None:
ort_options = onnxruntime.SessionOptions() ort_options = onnxruntime.SessionOptions()
ort_options.intra_op_num_threads = 8 ort_options.intra_op_num_threads = 8
self.onnx_session = onnxruntime.InferenceSession( self.onnx_session = onnxruntime.InferenceSession(
self.settings.onnxModelFile, self.settings.onnxModelFile, providers=providers
providers=providers
) )
# input_info = self.onnx_session.get_inputs() # input_info = self.onnx_session.get_inputs()
# for i in input_info: # for i in input_info:
@ -147,30 +154,43 @@ class SoVitsSvc40:
# print("output", i) # print("output", i)
return self.get_info() return self.get_info()
def update_settings(self, key: str, val: any): def update_settings(self, key: str, val: int | float | str):
if key == "onnxExecutionProvider" and self.onnx_session != None: if key == "onnxExecutionProvider" and self.onnx_session is not None:
if val == "CUDAExecutionProvider": if val == "CUDAExecutionProvider":
if self.settings.gpu < 0 or self.settings.gpu >= self.gpu_num: if self.settings.gpu < 0 or self.settings.gpu >= self.gpu_num:
self.settings.gpu = 0 self.settings.gpu = 0
provider_options = [{'device_id': self.settings.gpu}] provider_options = [{"device_id": self.settings.gpu}]
self.onnx_session.set_providers(providers=[val], provider_options=provider_options) self.onnx_session.set_providers(
providers=[val], provider_options=provider_options
)
if hasattr(self, "content_vec_onnx"): if hasattr(self, "content_vec_onnx"):
self.content_vec_onnx.set_providers(providers=[val], provider_options=provider_options) self.content_vec_onnx.set_providers(
providers=[val], provider_options=provider_options
)
else: else:
self.onnx_session.set_providers(providers=[val]) self.onnx_session.set_providers(providers=[val])
if hasattr(self, "content_vec_onnx"): if hasattr(self, "content_vec_onnx"):
self.content_vec_onnx.set_providers(providers=[val]) self.content_vec_onnx.set_providers(providers=[val])
elif key == "onnxExecutionProvider" and self.onnx_session == None: elif key == "onnxExecutionProvider" and self.onnx_session is None:
print("Onnx is not enabled. Please load model.") print("Onnx is not enabled. Please load model.")
return False return False
elif key in self.settings.intData: elif key in self.settings.intData:
setattr(self.settings, key, int(val)) val = int(val)
if key == "gpu" and val >= 0 and val < self.gpu_num and self.onnx_session != None: setattr(self.settings, key, val)
if (
key == "gpu"
and val >= 0
and val < self.gpu_num
and self.onnx_session is not None
):
providers = self.onnx_session.get_providers() providers = self.onnx_session.get_providers()
print("Providers:", providers) print("Providers:", providers)
if "CUDAExecutionProvider" in providers: if "CUDAExecutionProvider" in providers:
provider_options = [{'device_id': self.settings.gpu}] provider_options = [{"device_id": self.settings.gpu}]
self.onnx_session.set_providers(providers=["CUDAExecutionProvider"], provider_options=provider_options) self.onnx_session.set_providers(
providers=["CUDAExecutionProvider"],
provider_options=provider_options,
)
elif key in self.settings.floatData: elif key in self.settings.floatData:
setattr(self.settings, key, float(val)) setattr(self.settings, key, float(val))
elif key in self.settings.strData: elif key in self.settings.strData:
@ -183,10 +203,12 @@ class SoVitsSvc40:
def get_info(self): def get_info(self):
data = asdict(self.settings) data = asdict(self.settings)
data["onnxExecutionProviders"] = self.onnx_session.get_providers() if self.onnx_session != None else [] data["onnxExecutionProviders"] = (
self.onnx_session.get_providers() if self.onnx_session is not None else []
)
files = ["configFile", "pyTorchModelFile", "onnxModelFile"] files = ["configFile", "pyTorchModelFile", "onnxModelFile"]
for f in files: for f in files:
if data[f] != None and os.path.exists(data[f]): if data[f] is not None and os.path.exists(data[f]):
data[f] = os.path.basename(data[f]) data[f] = os.path.basename(data[f])
else: else:
data[f] = "" data[f] = ""
@ -194,22 +216,30 @@ class SoVitsSvc40:
return data return data
def get_processing_sampling_rate(self): def get_processing_sampling_rate(self):
if hasattr(self, "hps") == False: if hasattr(self, "hps") is False:
raise NoModeLoadedException("config") raise NoModeLoadedException("config")
return self.hps.data.sampling_rate return self.hps.data.sampling_rate
def get_unit_f0(self, audio_buffer, tran): def get_unit_f0(self, audio_buffer, tran):
wav_44k = audio_buffer wav_44k = audio_buffer
# f0 = utils.compute_f0_parselmouth(wav, sampling_rate=self.target_sample, hop_length=self.hop_size)
# f0 = utils.compute_f0_dio(wav_44k, sampling_rate=self.hps.data.sampling_rate, hop_length=self.hps.data.hop_length)
if self.settings.f0Detector == "dio": if self.settings.f0Detector == "dio":
f0 = compute_f0_dio(wav_44k, sampling_rate=self.hps.data.sampling_rate, hop_length=self.hps.data.hop_length) f0 = compute_f0_dio(
wav_44k,
sampling_rate=self.hps.data.sampling_rate,
hop_length=self.hps.data.hop_length,
)
else: else:
f0 = compute_f0_harvest(wav_44k, sampling_rate=self.hps.data.sampling_rate, hop_length=self.hps.data.hop_length) f0 = compute_f0_harvest(
wav_44k,
sampling_rate=self.hps.data.sampling_rate,
hop_length=self.hps.data.hop_length,
)
if wav_44k.shape[0] % self.hps.data.hop_length != 0: if wav_44k.shape[0] % self.hps.data.hop_length != 0:
print(f" !!! !!! !!! wav size not multiple of hopsize: {wav_44k.shape[0] / self.hps.data.hop_length}") print(
f" !!! !!! !!! wav size not multiple of hopsize: {wav_44k.shape[0] / self.hps.data.hop_length}"
)
f0, uv = utils.interpolate_f0(f0) f0, uv = utils.interpolate_f0(f0)
f0 = torch.FloatTensor(f0) f0 = torch.FloatTensor(f0)
@ -218,11 +248,14 @@ class SoVitsSvc40:
f0 = f0.unsqueeze(0) f0 = f0.unsqueeze(0)
uv = uv.unsqueeze(0) uv = uv.unsqueeze(0)
# wav16k = librosa.resample(audio_buffer, orig_sr=24000, target_sr=16000) wav16k_numpy = librosa.resample(
wav16k_numpy = librosa.resample(audio_buffer, orig_sr=self.hps.data.sampling_rate, target_sr=16000) audio_buffer, orig_sr=self.hps.data.sampling_rate, target_sr=16000
)
wav16k_tensor = torch.from_numpy(wav16k_numpy) wav16k_tensor = torch.from_numpy(wav16k_numpy)
if (self.settings.gpu < 0 or self.gpu_num == 0) or self.settings.framework == "ONNX": if (
self.settings.gpu < 0 or self.gpu_num == 0
) or self.settings.framework == "ONNX":
dev = torch.device("cpu") dev = torch.device("cpu")
else: else:
dev = torch.device("cuda", index=self.settings.gpu) dev = torch.device("cuda", index=self.settings.gpu)
@ -232,54 +265,87 @@ class SoVitsSvc40:
["units"], ["units"],
{ {
"audio": wav16k_numpy.reshape(1, -1), "audio": wav16k_numpy.reshape(1, -1),
}) },
)
c = torch.from_numpy(np.array(c)).squeeze(0).transpose(1, 2) c = torch.from_numpy(np.array(c)).squeeze(0).transpose(1, 2)
# print("onnx hubert:", self.content_vec_onnx.get_providers()) # print("onnx hubert:", self.content_vec_onnx.get_providers())
else: else:
if self.hps.model.ssl_dim == 768: if self.hps.model.ssl_dim == 768:
self.hubert_model = self.hubert_model.to(dev) self.hubert_model = self.hubert_model.to(dev)
wav16k_tensor = wav16k_tensor.to(dev) wav16k_tensor = wav16k_tensor.to(dev)
c = get_hubert_content_layer9(self.hubert_model, wav_16k_tensor=wav16k_tensor) c = get_hubert_content_layer9(
self.hubert_model, wav_16k_tensor=wav16k_tensor
)
else: else:
self.hubert_model = self.hubert_model.to(dev) self.hubert_model = self.hubert_model.to(dev)
wav16k_tensor = wav16k_tensor.to(dev) wav16k_tensor = wav16k_tensor.to(dev)
c = utils.get_hubert_content(self.hubert_model, wav_16k_tensor=wav16k_tensor) c = utils.get_hubert_content(
self.hubert_model, wav_16k_tensor=wav16k_tensor
)
uv = uv.to(dev) uv = uv.to(dev)
f0 = f0.to(dev) f0 = f0.to(dev)
c = utils.repeat_expand_2d(c.squeeze(0), f0.shape[1]) c = utils.repeat_expand_2d(c.squeeze(0), f0.shape[1])
if self.settings.clusterInferRatio != 0 and hasattr(self, "cluster_model") and self.cluster_model != None: if (
speaker = [key for key, value in self.settings.speakers.items() if value == self.settings.dstId] self.settings.clusterInferRatio != 0
and hasattr(self, "cluster_model")
and self.cluster_model is not None
):
speaker = [
key
for key, value in self.settings.speakers.items()
if value == self.settings.dstId
]
if len(speaker) != 1: if len(speaker) != 1:
pass pass
# print("not only one speaker found.", speaker) # print("not only one speaker found.", speaker)
else: else:
cluster_c = cluster.get_cluster_center_result(self.cluster_model, c.cpu().numpy().T, speaker[0]).T cluster_c = cluster.get_cluster_center_result(
self.cluster_model, c.cpu().numpy().T, speaker[0]
).T
cluster_c = torch.FloatTensor(cluster_c).to(dev) cluster_c = torch.FloatTensor(cluster_c).to(dev)
c = c.to(dev) c = c.to(dev)
c = self.settings.clusterInferRatio * cluster_c + (1 - self.settings.clusterInferRatio) * c c = (
self.settings.clusterInferRatio * cluster_c
+ (1 - self.settings.clusterInferRatio) * c
)
c = c.unsqueeze(0) c = c.unsqueeze(0)
return c, f0, uv return c, f0, uv
def generate_input(self, newData: any, inputSize: int, crossfadeSize: int, solaSearchFrame: int = 0): def generate_input(
self,
newData: AudioInOut,
inputSize: int,
crossfadeSize: int,
solaSearchFrame: int = 0,
):
newData = newData.astype(np.float32) / self.hps.data.max_wav_value newData = newData.astype(np.float32) / self.hps.data.max_wav_value
if hasattr(self, "audio_buffer"): if self.audio_buffer is not None:
self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0) # 過去のデータに連結 self.audio_buffer = np.concatenate(
[self.audio_buffer, newData], 0
) # 過去のデータに連結
else: else:
self.audio_buffer = newData self.audio_buffer = newData
convertSize = inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize convertSize = (
inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
)
if convertSize % self.hps.data.hop_length != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。 if convertSize % self.hps.data.hop_length != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
convertSize = convertSize + (self.hps.data.hop_length - (convertSize % self.hps.data.hop_length)) convertSize = convertSize + (
self.hps.data.hop_length - (convertSize % self.hps.data.hop_length)
)
self.audio_buffer = self.audio_buffer[-1 * convertSize:] # 変換対象の部分だけ抽出 convertOffset = -1 * convertSize
self.audio_buffer = self.audio_buffer[convertOffset:] # 変換対象の部分だけ抽出
crop = self.audio_buffer[-1 * (inputSize + crossfadeSize):-1 * (crossfadeSize)] cropOffset = -1 * (inputSize + crossfadeSize)
cropEnd = -1 * (crossfadeSize)
crop = self.audio_buffer[cropOffset:cropEnd]
rms = np.sqrt(np.square(crop).mean(axis=0)) rms = np.sqrt(np.square(crop).mean(axis=0))
vol = max(rms, self.prevVol * 0.0) vol = max(rms, self.prevVol * 0.0)
@ -289,38 +355,46 @@ class SoVitsSvc40:
return (c, f0, uv, convertSize, vol) return (c, f0, uv, convertSize, vol)
def _onnx_inference(self, data): def _onnx_inference(self, data):
if hasattr(self, "onnx_session") == False or self.onnx_session == None: if hasattr(self, "onnx_session") is False or self.onnx_session is None:
print("[Voice Changer] No onnx session.") print("[Voice Changer] No onnx session.")
raise NoModeLoadedException("ONNX") raise NoModeLoadedException("ONNX")
convertSize = data[3] convertSize = data[3]
vol = data[4] vol = data[4]
data = (data[0], data[1], data[2],) data = (
data[0],
data[1],
data[2],
)
if vol < self.settings.silentThreshold: if vol < self.settings.silentThreshold:
return np.zeros(convertSize).astype(np.int16) return np.zeros(convertSize).astype(np.int16)
c, f0, uv = [x.numpy() for x in data] c, f0, uv = [x.numpy() for x in data]
sid_target = torch.LongTensor([self.settings.dstId]).unsqueeze(0).numpy() sid_target = torch.LongTensor([self.settings.dstId]).unsqueeze(0).numpy()
audio1 = self.onnx_session.run( audio1 = (
self.onnx_session.run(
["audio"], ["audio"],
{ {
"c": c.astype(np.float32), "c": c.astype(np.float32),
"f0": f0.astype(np.float32), "f0": f0.astype(np.float32),
"uv": uv.astype(np.float32), "uv": uv.astype(np.float32),
"g": sid_target.astype(np.int64), "g": sid_target.astype(np.int64),
"noise_scale": np.array([self.settings.noiseScale]).astype(np.float32), "noise_scale": np.array([self.settings.noiseScale]).astype(
np.float32
),
# "predict_f0": np.array([self.settings.dstId]).astype(np.int64), # "predict_f0": np.array([self.settings.dstId]).astype(np.int64),
},
)[0][0, 0]
})[0][0, 0] * self.hps.data.max_wav_value * self.hps.data.max_wav_value
)
audio1 = audio1 * vol audio1 = audio1 * vol
result = audio1 result = audio1
return result return result
def _pyTorch_inference(self, data): def _pyTorch_inference(self, data):
if hasattr(self, "net_g") == False or self.net_g == None: if hasattr(self, "net_g") is False or self.net_g is None:
print("[Voice Changer] No pyTorch session.") print("[Voice Changer] No pyTorch session.")
raise NoModeLoadedException("pytorch") raise NoModeLoadedException("pytorch")
@ -331,19 +405,29 @@ class SoVitsSvc40:
convertSize = data[3] convertSize = data[3]
vol = data[4] vol = data[4]
data = (data[0], data[1], data[2],) data = (
data[0],
data[1],
data[2],
)
if vol < self.settings.silentThreshold: if vol < self.settings.silentThreshold:
return np.zeros(convertSize).astype(np.int16) return np.zeros(convertSize).astype(np.int16)
with torch.no_grad(): with torch.no_grad():
c, f0, uv = [x.to(dev)for x in data] c, f0, uv = [x.to(dev) for x in data]
sid_target = torch.LongTensor([self.settings.dstId]).to(dev).unsqueeze(0) sid_target = torch.LongTensor([self.settings.dstId]).to(dev).unsqueeze(0)
self.net_g.to(dev) self.net_g.to(dev)
# audio1 = self.net_g.infer(c, f0=f0, g=sid_target, uv=uv, predict_f0=True, noice_scale=0.1)[0][0, 0].data.float() # audio1 = self.net_g.infer(c, f0=f0, g=sid_target, uv=uv, predict_f0=True, noice_scale=0.1)[0][0, 0].data.float()
predict_f0_flag = True if self.settings.predictF0 == 1 else False predict_f0_flag = True if self.settings.predictF0 == 1 else False
audio1 = self.net_g.infer(c, f0=f0, g=sid_target, uv=uv, predict_f0=predict_f0_flag, audio1 = self.net_g.infer(
noice_scale=self.settings.noiseScale) c,
f0=f0,
g=sid_target,
uv=uv,
predict_f0=predict_f0_flag,
noice_scale=self.settings.noiseScale,
)
audio1 = audio1[0][0].data.float() audio1 = audio1[0][0].data.float()
# audio1 = self.net_g.infer(c, f0=f0, g=sid_target, uv=uv, predict_f0=predict_f0_flag, # audio1 = self.net_g.infer(c, f0=f0, g=sid_target, uv=uv, predict_f0=predict_f0_flag,
# noice_scale=self.settings.noiceScale)[0][0, 0].data.float() # noice_scale=self.settings.noiceScale)[0][0, 0].data.float()
@ -368,7 +452,7 @@ class SoVitsSvc40:
del self.net_g del self.net_g
del self.onnx_session del self.onnx_session
remove_path = os.path.join("so-vits-svc-40") remove_path = os.path.join("so-vits-svc-40")
sys.path = [x for x in sys.path if x.endswith(remove_path) == False] sys.path = [x for x in sys.path if x.endswith(remove_path) is False]
for key in list(sys.modules): for key in list(sys.modules):
val = sys.modules.get(key) val = sys.modules.get(key)
@ -377,14 +461,18 @@ class SoVitsSvc40:
if file_path.find("so-vits-svc-40" + os.path.sep) >= 0: if file_path.find("so-vits-svc-40" + os.path.sep) >= 0:
print("remove", key, file_path) print("remove", key, file_path)
sys.modules.pop(key) sys.modules.pop(key)
except Exception as e: except Exception: # type:ignore
pass pass
def resize_f0(x, target_len): def resize_f0(x, target_len):
source = np.array(x) source = np.array(x)
source[source < 0.001] = np.nan source[source < 0.001] = np.nan
target = np.interp(np.arange(0, len(source) * target_len, len(source)) / target_len, np.arange(0, len(source)), source) target = np.interp(
np.arange(0, len(source) * target_len, len(source)) / target_len,
np.arange(0, len(source)),
source,
)
res = np.nan_to_num(target) res = np.nan_to_num(target)
return res return res
@ -407,7 +495,13 @@ def compute_f0_dio(wav_numpy, p_len=None, sampling_rate=44100, hop_length=512):
def compute_f0_harvest(wav_numpy, p_len=None, sampling_rate=44100, hop_length=512): def compute_f0_harvest(wav_numpy, p_len=None, sampling_rate=44100, hop_length=512):
if p_len is None: if p_len is None:
p_len = wav_numpy.shape[0] // hop_length p_len = wav_numpy.shape[0] // hop_length
f0, t = pw.harvest(wav_numpy.astype(np.double), fs=sampling_rate, frame_period=5.5, f0_floor=71.0, f0_ceil=1000.0) f0, t = pw.harvest(
wav_numpy.astype(np.double),
fs=sampling_rate,
frame_period=5.5,
f0_floor=71.0,
f0_ceil=1000.0,
)
for index, pitch in enumerate(f0): for index, pitch in enumerate(f0):
f0[index] = round(pitch, 1) f0[index] = round(pitch, 1)