Source code for audiomate.corpus.preprocessing.pipeline.offline.spectral

import numpy as np
import scipy.fftpack as fft
import librosa
from librosa import filters
from librosa import util

from . import base

def stft_from_frames(frames, window='hann', dtype=np.complex64):
    Variation of the librosa.core.stft function,
    that computes the short-time-fourier-transfrom from frames instead from the signal.


    win_length = frames.shape[0]
    n_fft = win_length

    fft_window = filters.get_window(window, win_length, fftbins=True)

    # Reshape so that the window can be broadcast
    fft_window = fft_window.reshape((-1, 1))

    # Pre-allocate the STFT matrix
    stft_matrix = np.empty((int(1 + n_fft // 2), frames.shape[1]),

    # how many columns can we fit within MAX_MEM_BLOCK?
    n_columns = int(util.MAX_MEM_BLOCK / (stft_matrix.shape[0] *

    for bl_s in range(0, stft_matrix.shape[1], n_columns):
        bl_t = min(bl_s + n_columns, stft_matrix.shape[1])

        # RFFT and Conjugate here to match phase from DPWE code
        stft_matrix[:, bl_s:bl_t] = fft.fft(fft_window *
                                            frames[:, bl_s:bl_t],

    return stft_matrix

[docs]class MelSpectrogram(base.OfflineComputation): """ Computation step that extracts mel-spectrogram features from the given frames. Based on Args: n_mels (int): Number of mel bands to generate. """ def __init__(self, n_mels=128, parent=None, name=None): super(MelSpectrogram, self).__init__(parent=parent, name=name) self.n_mels = n_mels def compute(self, frames, sampling_rate, corpus=None, utterance=None): power_spec = np.abs(stft_from_frames(frames.T)) ** 2 mel = librosa.feature.melspectrogram(S=power_spec, n_mels=self.n_mels, sr=sampling_rate) return mel.T
[docs]class MFCC(base.OfflineComputation): """ Computation step that extracts mfcc features from the given frames. Based on http: // / librosa / generated / librosa.feature.mfcc.html Args: n_mels (int): Number of mel bands to generate. n_mfcc (int): number of MFCCs to return. """ def __init__(self, n_mfcc=13, n_mels=128, parent=None, name=None): super(MFCC, self).__init__(parent=parent, name=name) self.n_mfcc = n_mfcc self.n_mels = n_mels def compute(self, frames, sampling_rate, corpus=None, utterance=None): power_spec = np.abs(stft_from_frames(frames.T)) ** 2 mel = librosa.feature.melspectrogram(S=power_spec, n_mels=self.n_mels, sr=sampling_rate) mel_power = librosa.power_to_db(mel) mfcc = librosa.feature.mfcc(S=mel_power, n_mfcc=self.n_mfcc) return mfcc.T