voice-changer/server/voice_changer/RVC/pitchExtractor/onnxcrepe/loudness.py

74 lines
2.2 KiB
Python
Raw Normal View History

import warnings
import librosa
import numpy as np
from voice_changer.RVC.pitchExtractor import onnxcrepe
###############################################################################
# Constants
###############################################################################
# Minimum decibel level
MIN_DB = -100.
# Reference decibel level
REF_DB = 20.
###############################################################################
# A-weighted loudness
###############################################################################
def a_weighted(audio, sample_rate, hop_length=None, pad=True):
"""Retrieve the per-frame loudness"""
# Default hop length of 10 ms
hop_length = sample_rate // 100 if hop_length is None else hop_length
# Convert to numpy
audio = audio.squeeze(0)
# Resample
if sample_rate != onnxcrepe.SAMPLE_RATE:
audio = librosa.resample(audio, orig_sr=sample_rate, target_sr=onnxcrepe.SAMPLE_RATE)
hop_length = int(hop_length * onnxcrepe.SAMPLE_RATE / sample_rate)
# Cache weights
if not hasattr(a_weighted, 'weights'):
a_weighted.weights = perceptual_weights()
# Take stft
stft = librosa.stft(audio,
n_fft=onnxcrepe.WINDOW_SIZE,
hop_length=hop_length,
win_length=onnxcrepe.WINDOW_SIZE,
center=pad,
pad_mode='constant')
# Compute magnitude on db scale
db = librosa.amplitude_to_db(np.abs(stft))
# Apply A-weighting
weighted = db + a_weighted.weights
# Threshold
weighted[weighted < MIN_DB] = MIN_DB
# Average over weighted frequencies
return weighted.mean(axis=0).astype(np.float32)[None]
def perceptual_weights():
"""A-weighted frequency-dependent perceptual loudness weights"""
frequencies = librosa.fft_frequencies(sr=onnxcrepe.SAMPLE_RATE,
n_fft=onnxcrepe.WINDOW_SIZE)
# A warning is raised for nearly inaudible frequencies, but it ends up
# defaulting to -100 db. That default is fine for our purposes.
with warnings.catch_warnings():
warnings.simplefilter('ignore', RuntimeWarning)
return librosa.A_weighting(frequencies)[:, None] - REF_DB