voice-changer/server/voice_changer/RVC/pitchExtractor/onnxcrepe/threshold.py
w-okada d5561c2212 New Feature:
- Add Crepe Full/Tiny (onnx)
 - remove test connect for local
Refactor:
 - RVC: comment out module importer
2023-07-07 02:17:29 +09:00

130 lines
3.8 KiB
Python

import numpy as np
from voice_changer.RVC.pitchExtractor import onnxcrepe
###############################################################################
# Pitch thresholding methods
###############################################################################
class At:
"""Simple thresholding at a specified probability value"""
def __init__(self, value):
self.value = value
def __call__(self, pitch, periodicity):
# Make a copy to prevent in-place modification
pitch = pitch.copy()
# Threshold
pitch[periodicity < self.value] = onnxcrepe.UNVOICED
return pitch
class Hysteresis:
"""Hysteresis thresholding"""
def __init__(self,
lower_bound=.19,
upper_bound=.31,
width=.2,
stds=1.7,
return_threshold=False):
self.lower_bound = lower_bound
self.upper_bound = upper_bound
self.width = width
self.stds = stds
self.return_threshold = return_threshold
def __call__(self, pitch, periodicity):
# Perform hysteresis in log-2 space
pitch = np.log2(pitch).flatten()
# Flatten periodicity
periodicity = periodicity.flatten()
# Ignore confidently unvoiced pitch
pitch[periodicity < self.lower_bound] = onnxcrepe.UNVOICED
# Whiten pitch
mean, std = np.nanmean(pitch), np.nanstd(pitch)
pitch = (pitch - mean) / std
# Require high confidence to make predictions far from the mean
parabola = self.width * pitch ** 2 - self.width * self.stds ** 2
threshold = self.lower_bound + np.clip(parabola, 0, 1 - self.lower_bound)
threshold[np.isnan(threshold)] = self.lower_bound
# Apply hysteresis to prevent short, unconfident voiced regions
i = 0
while i < len(periodicity) - 1:
# Detect unvoiced to voiced transition
if periodicity[i] < threshold[i] and periodicity[i + 1] > threshold[i + 1]:
# Grow region until next unvoiced or end of array
start, end, keep = i + 1, i + 1, False
while end < len(periodicity) and periodicity[end] > threshold[end]:
if periodicity[end] > self.upper_bound:
keep = True
end += 1
# Force unvoiced if we didn't pass the confidence required by
# the hysteresis
if not keep:
threshold[start:end] = 1
i = end
else:
i += 1
# Remove pitch with low periodicity
pitch[periodicity < threshold] = onnxcrepe.UNVOICED
# Unwhiten
pitch = pitch * std + mean
# Convert to Hz
pitch = np.array(2 ** pitch)[None, :]
# Optionally return threshold
if self.return_threshold:
return pitch, np.array(threshold)
return pitch
###############################################################################
# Periodicity thresholding methods
###############################################################################
class Silence:
"""Set periodicity to zero in silent regions"""
def __init__(self, value=-60):
self.value = value
def __call__(self,
periodicity,
audio,
sample_rate=onnxcrepe.SAMPLE_RATE,
precision=None,
pad=True):
# Don't modify in-place
periodicity = periodicity.copy()
# Compute loudness
hop_length = sample_rate * precision // 1000
loudness = onnxcrepe.loudness.a_weighted(
audio, sample_rate, hop_length, pad)
# Threshold silence
periodicity[loudness < self.value] = 0.
return periodicity