voice-changer/server/voice_changer/RVC/pitchExtractor/onnxcrepe/loudness.py

import warnings

import librosa
import numpy as np
from voice_changer.RVC.pitchExtractor import onnxcrepe


###############################################################################
# Constants
###############################################################################


# Minimum decibel level
MIN_DB = -100.

# Reference decibel level
REF_DB = 20.


###############################################################################
# A-weighted loudness
###############################################################################


def a_weighted(audio, sample_rate, hop_length=None, pad=True):
    """Retrieve the per-frame loudness"""

    # Default hop length of 10 ms
    hop_length = sample_rate // 100 if hop_length is None else hop_length

    # Convert to numpy
    audio = audio.squeeze(0)

    # Resample
    if sample_rate != onnxcrepe.SAMPLE_RATE:
        audio = librosa.resample(audio, orig_sr=sample_rate, target_sr=onnxcrepe.SAMPLE_RATE)
        hop_length = int(hop_length * onnxcrepe.SAMPLE_RATE / sample_rate)

    # Cache weights
    if not hasattr(a_weighted, 'weights'):
        a_weighted.weights = perceptual_weights()

    # Take stft
    stft = librosa.stft(audio,
                        n_fft=onnxcrepe.WINDOW_SIZE,
                        hop_length=hop_length,
                        win_length=onnxcrepe.WINDOW_SIZE,
                        center=pad,
                        pad_mode='constant')

    # Compute magnitude on db scale
    db = librosa.amplitude_to_db(np.abs(stft))

    # Apply A-weighting
    weighted = db + a_weighted.weights

    # Threshold
    weighted[weighted < MIN_DB] = MIN_DB

    # Average over weighted frequencies
    return weighted.mean(axis=0).astype(np.float32)[None]


def perceptual_weights():
    """A-weighted frequency-dependent perceptual loudness weights"""
    frequencies = librosa.fft_frequencies(sr=onnxcrepe.SAMPLE_RATE,
                                          n_fft=onnxcrepe.WINDOW_SIZE)

    # A warning is raised for nearly inaudible frequencies, but it ends up
    # defaulting to -100 db. That default is fine for our purposes.
    with warnings.catch_warnings():
        warnings.simplefilter('ignore', RuntimeWarning)
        return librosa.A_weighting(frequencies)[:, None] - REF_DB
New Feature: - Add Crepe Full/Tiny (onnx) - remove test connect for local Refactor: - RVC: comment out module importer 2023-07-06 20:17:29 +03:00			`import warnings`

			`import librosa`
			`import numpy as np`
			`from voice_changer.RVC.pitchExtractor import onnxcrepe`


			`###############################################################################`
			`# Constants`
			`###############################################################################`


			`# Minimum decibel level`
			`MIN_DB = -100.`

			`# Reference decibel level`
			`REF_DB = 20.`


			`###############################################################################`
			`# A-weighted loudness`
			`###############################################################################`


			`def a_weighted(audio, sample_rate, hop_length=None, pad=True):`
			`"""Retrieve the per-frame loudness"""`

			`# Default hop length of 10 ms`
			`hop_length = sample_rate // 100 if hop_length is None else hop_length`

			`# Convert to numpy`
			`audio = audio.squeeze(0)`

			`# Resample`
			`if sample_rate != onnxcrepe.SAMPLE_RATE:`
			`audio = librosa.resample(audio, orig_sr=sample_rate, target_sr=onnxcrepe.SAMPLE_RATE)`
			`hop_length = int(hop_length * onnxcrepe.SAMPLE_RATE / sample_rate)`

			`# Cache weights`
			`if not hasattr(a_weighted, 'weights'):`
			`a_weighted.weights = perceptual_weights()`

			`# Take stft`
			`stft = librosa.stft(audio,`
			`n_fft=onnxcrepe.WINDOW_SIZE,`
			`hop_length=hop_length,`
			`win_length=onnxcrepe.WINDOW_SIZE,`
			`center=pad,`
			`pad_mode='constant')`

			`# Compute magnitude on db scale`
			`db = librosa.amplitude_to_db(np.abs(stft))`

			`# Apply A-weighting`
			`weighted = db + a_weighted.weights`

			`# Threshold`
			`weighted[weighted < MIN_DB] = MIN_DB`

			`# Average over weighted frequencies`
			`return weighted.mean(axis=0).astype(np.float32)[None]`


			`def perceptual_weights():`
			`"""A-weighted frequency-dependent perceptual loudness weights"""`
			`frequencies = librosa.fft_frequencies(sr=onnxcrepe.SAMPLE_RATE,`
			`n_fft=onnxcrepe.WINDOW_SIZE)`

			`# A warning is raised for nearly inaudible frequencies, but it ends up`
			`# defaulting to -100 db. That default is fine for our purposes.`
			`with warnings.catch_warnings():`
			`warnings.simplefilter('ignore', RuntimeWarning)`
			`return librosa.A_weighting(frequencies)[:, None] - REF_DB`