Source code for audiomate.processing.pipeline.rhythm

import librosa
import numpy as np

from . import base
from . import spectral


[docs]class Tempogram(base.Computation): """ Computation step to compute tempogram Based on http://librosa.github.io/librosa/generated/librosa.feature.tempogram.html Args: n_mels (int): Number of mel bands to generate. win_length (int): Length of the onset autocorrelation window (in frames/onset measurements). The default settings (384) corresponds to 384 * hop_length / sr ~= 8.9s. """ def __init__(self, n_mels=128, win_length=384, parent=None, name=None): super(Tempogram, self).__init__(min_frames=win_length, left_context=1, right_context=0, parent=parent, name=name) self.n_mels = n_mels self.win_length = win_length self.rest = None
[docs] def compute(self, chunk, sampling_rate, corpus=None, utterance=None): # Cleanup rest if it's the first frame if chunk.offset == 0: self.rest = None # Compute mel-spectrogram power_spec = np.abs(spectral.stft_from_frames(chunk.data.T)) ** 2 mel = np.abs(librosa.feature.melspectrogram(S=power_spec, n_mels=self.n_mels, sr=sampling_rate)) mel_power = librosa.power_to_db(mel) # Compute onset strengths oenv = librosa.onset.onset_strength(S=mel_power, center=False) # Remove context, otherwise we have duplicate frames while online processing oenv = oenv[chunk.left_context:] if self.rest is not None: all_frames = np.concatenate([self.rest, oenv]) else: # Its the first chunk --> pad to center tempogram windows at the beginning all_frames = np.pad(oenv, (self.win_length // 2, 0), mode='linear_ramp', end_values=0) if chunk.is_last: # Its the last chunk --> pad to center tempogram windows at end all_frames = np.pad(all_frames, (0, self.win_length // 2), mode='linear_ramp', end_values=0) # Compensate the 1 frame that is too much since we want win-len - 1 additional frames, # With an even win-len we would have win-len additional frames if self.win_length % 2 == 0: all_frames = all_frames[:-1] if all_frames.shape[0] >= self.win_length: tempogram = librosa.feature.tempogram(onset_envelope=all_frames, sr=sampling_rate, win_length=self.win_length, center=False).T self.rest = all_frames[tempogram.shape[0]:] return tempogram else: self.rest = all_frames return None