Source code for audiomate.utils.audio

import librosa
from audiomate.utils import audioread
import numpy as np
import scipy


[docs]def process_buffer(buffer, n_channels): """ Merge the read blocks and resample if necessary. Args: buffer (list): A list of blocks of samples. n_channels (int): The number of channels of the input data. Returns: np.array: The samples """ samples = np.concatenate(buffer) if n_channels > 1: samples = samples.reshape((-1, n_channels)).T samples = librosa.to_mono(samples) return samples
[docs]def read_blocks(file_path, start=0.0, end=float('inf'), buffer_size=5760000): """ Read an audio file block after block. The blocks are yielded one by one. Args: file_path (str): Path to the file to read. start (float): Start in seconds to read from. end (float): End in seconds to read to. ``inf`` means to the end of the file. buffer_size (int): Number of samples to load into memory at once and return as a single block. The exact number of loaded samples depends on the block-size of the audioread library. So it can be of x higher, where the x is typically 1024 or 4096. Returns: Generator: A generator yielding the samples for every block. """ buffer = [] n_buffer = 0 n_samples = 0 with audioread.audio_open(file_path) as input_file: n_channels = input_file.channels sr_native = input_file.samplerate start_sample = int(np.round(sr_native * start)) * n_channels end_sample = end if end_sample != np.inf: end_sample = int(np.round(sr_native * end)) * n_channels for block in input_file: block = librosa.util.buf_to_float(block) n_prev = n_samples n_samples += len(block) if n_samples < start_sample: continue if n_prev > end_sample: break if n_samples > end_sample: block = block[:end_sample - n_prev] if n_prev <= start_sample <= n_samples: block = block[start_sample - n_prev:] n_buffer += len(block) buffer.append(block) if n_buffer >= buffer_size: yield process_buffer(buffer, n_channels) buffer = [] n_buffer = 0 if len(buffer) > 0: yield process_buffer(buffer, n_channels)
[docs]def read_frames(file_path, frame_size, hop_size, start=0.0, end=float('inf'), buffer_size=5760000): """ Read an audio file frame by frame. The frames are yielded one after another. Args: file_path (str): Path to the file to read. frame_size (int): The number of samples per frame. hop_size (int): The number of samples between two frames. start (float): Start in seconds to read from. end (float): End in seconds to read to. ``inf`` means to the end of the file. buffer_size (int): Number of samples to load into memory at once and return as a single block. The exact number of loaded samples depends on the block-size of the audioread library. So it can be of x higher, where the x is typically 1024 or 4096. Returns: Generator: A generator yielding a tuple for every frame. The first item is the frame and the second a boolean indicating if it is the last frame. """ rest_samples = np.array([], dtype=np.float32) for block in read_blocks(file_path, start=start, end=end, buffer_size=buffer_size): # Prepend rest samples from previous block block = np.concatenate([rest_samples, block]) current_sample = 0 # Get frames that are fully contained in the block while current_sample + frame_size < block.size: frame = block[current_sample:current_sample + frame_size] yield frame, False current_sample += hop_size # Store rest samples for next block rest_samples = block[current_sample:] if rest_samples.size > 0: rest_samples = np.pad( rest_samples, (0, frame_size - rest_samples.size), mode='constant', constant_values=0 ) yield rest_samples, True
[docs]def write_wav(path, samples, sr=16000): """ Write to given samples to a wav file. The samples are expected to be floating point numbers in the range of -1.0 to 1.0. Args: path (str): The path to write the wav to. samples (np.array): A float array . sr (int): The sampling rate. """ max_value = np.abs(np.iinfo(np.int16).min) data = (samples * max_value).astype(np.int16) scipy.io.wavfile.write(path, sr, data)