Source code for audiomate.processing.pipeline.rhythm

import librosa
import numpy as np

from . import base
from . import spectral


[docs]class Tempogram(base.Computation):
    """
    Computation step to compute tempogram

    Based on http://librosa.github.io/librosa/generated/librosa.feature.tempogram.html

    Args:
        n_mels (int): Number of mel bands to generate.
        win_length (int): Length of the onset autocorrelation window (in frames/onset measurements).
                          The default settings (384) corresponds to 384 * hop_length / sr ~= 8.9s.
    """

    def __init__(self, n_mels=128, win_length=384, parent=None, name=None):
        super(Tempogram, self).__init__(min_frames=win_length, left_context=1, right_context=0,
                                        parent=parent, name=name)

        self.n_mels = n_mels
        self.win_length = win_length

        self.rest = None

[docs]    def compute(self, chunk, sampling_rate, corpus=None, utterance=None):
        # Cleanup rest if it's the first frame
        if chunk.offset == 0:
            self.rest = None

        # Compute mel-spectrogram
        power_spec = np.abs(spectral.stft_from_frames(chunk.data.T)) ** 2
        mel = np.abs(librosa.feature.melspectrogram(S=power_spec, n_mels=self.n_mels, sr=sampling_rate))
        mel_power = librosa.power_to_db(mel)

        # Compute onset strengths
        oenv = librosa.onset.onset_strength(S=mel_power, center=False)

        # Remove context, otherwise we have duplicate frames while online processing
        oenv = oenv[chunk.left_context:]

        if self.rest is not None:
            all_frames = np.concatenate([self.rest, oenv])
        else:
            # Its the first chunk --> pad to center tempogram windows at the beginning
            all_frames = np.pad(oenv, (self.win_length // 2, 0), mode='linear_ramp', end_values=0)

        if chunk.is_last:
            # Its the last chunk --> pad to center tempogram windows at end
            all_frames = np.pad(all_frames, (0, self.win_length // 2), mode='linear_ramp', end_values=0)

            # Compensate the 1 frame that is too much since we want win-len - 1 additional frames,
            # With an even win-len we would have win-len additional frames
            if self.win_length % 2 == 0:
                all_frames = all_frames[:-1]

        if all_frames.shape[0] >= self.win_length:
            tempogram = librosa.feature.tempogram(onset_envelope=all_frames, sr=sampling_rate,
                                                  win_length=self.win_length, center=False).T

            self.rest = all_frames[tempogram.shape[0]:]

            return tempogram
        else:
            self.rest = all_frames

        return None