This commit is contained in:
wataru 2023-04-28 17:18:33 +09:00
parent 50d1977d50
commit 569d8d2c0c
2 changed files with 17 additions and 35 deletions

View File

@ -191,8 +191,8 @@ class DDSP_SVC:
setattr(self.settings, key, str(val))
if key == "f0Detector":
print("f0Detector update", val)
if val == "dio":
val = "parselmouth"
# if val == "dio":
# val = "parselmouth"
if hasattr(self, "sampling_rate") is False:
self.sampling_rate = 44100

View File

@ -1,5 +1,6 @@
import numpy as np
import parselmouth
# import parselmouth
import torch
import torch.nn.functional as F
from config import x_query, x_center, x_max # type:ignore
@ -27,28 +28,13 @@ class VC(object):
silence_front_offset = int(np.round(real_silence_front * self.sr))
audio = audio[silence_front_offset:]
time_step = self.window / self.sr * 1000
# time_step = self.window / self.sr * 1000
f0_min = 50
f0_max = 1100
f0_mel_min = 1127 * np.log(1 + f0_min / 700)
f0_mel_max = 1127 * np.log(1 + f0_max / 700)
if f0_method == "pm":
f0 = (
parselmouth.Sound(audio, self.sr)
.to_pitch_ac(
time_step=time_step / 1000,
voicing_threshold=0.6,
pitch_floor=f0_min,
pitch_ceiling=f0_max,
)
.selected_array["frequency"]
)
pad_size = (p_len - len(f0) + 1) // 2
if pad_size > 0 or p_len - len(f0) - pad_size > 0:
f0 = np.pad(
f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant"
)
elif f0_method == "harvest":
print("not implemented. use harvest")
f0, t = pyworld.harvest(
audio.astype(np.double),
fs=self.sr,
@ -62,21 +48,17 @@ class VC(object):
f0.astype("float"), (start_frame, n_frames - len(f0) - start_frame)
)
else:
print("[Voice Changer] invalid f0 detector, use pm.", f0_method)
f0 = (
parselmouth.Sound(audio, self.sr)
.to_pitch_ac(
time_step=time_step / 1000,
voicing_threshold=0.6,
pitch_floor=f0_min,
pitch_ceiling=f0_max,
f0, t = pyworld.harvest(
audio.astype(np.double),
fs=self.sr,
f0_ceil=f0_max,
frame_period=10,
)
.selected_array["frequency"]
)
pad_size = (p_len - len(f0) + 1) // 2
if pad_size > 0 or p_len - len(f0) - pad_size > 0:
f0 = pyworld.stonemask(audio.astype(np.double), f0, t, self.sr)
f0 = signal.medfilt(f0, 3)
f0 = np.pad(
f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant"
f0.astype("float"), (start_frame, n_frames - len(f0) - start_frame)
)
f0 *= pow(2, f0_up_key / 12)