Source code for audiomate.corpus.assets.utterance

import collections

import numpy as np

from audiomate.utils import units
from . import label


[docs]class Utterance(object):
    """
    An utterance defines a sample of audio. It is part of a file or can span over the whole file.

    Args:
        idx (str): A unique identifier for the utterance within a dataset.
        file (File): The file this utterance is belonging to.
        issuer (Issuer): The issuer this utterance was created from.
        start (float): The start of the utterance within the audio file in seconds. (default 0)
        end (float): The end of the utterance within the audio file in seconds. -1 indicates that
                     the utterance ends at the end of the file. (default -1)
        label_lists (LabelList, list): A single or multiple label-lists.

    Attributes:
        label_lists (dict): A dictionary containing label-lists with the label-list-idx as key.
    """

    __slots__ = ['idx', 'file', 'issuer', 'start', 'end', 'label_lists']

    def __init__(self, idx, file, issuer=None, start=0, end=-1, label_lists=None):
        self.idx = idx
        self.file = file
        self.issuer = issuer
        self.start = start
        self.end = end
        self.label_lists = {}

        if label_lists is not None:
            self.set_label_list(label_lists)

        if self.issuer is not None:
            self.issuer.utterances.add(self)

    @property
    def end_abs(self):
        """
        Return the absolute end of the utterance relative to the signal.
        """
        if self.end == -1:
            return self.file.duration
        else:
            return self.end

    @property
    def duration(self):
        """
        Return the absolute duration in seconds.
        """
        return self.end_abs - self.start

[docs]    def num_samples(self, sr=None):
        """
        Return the number of samples.

        Args:
            sr (int): Calculate the number of samples with the given sampling-rate.
                      If None use the native sampling-rate.

        Returns:
            int: Number of samples
        """
        native_sr = self.sampling_rate
        num_samples = units.seconds_to_sample(self.duration, native_sr)

        if sr is not None:
            ratio = float(sr) / native_sr
            num_samples = int(np.ceil(num_samples * ratio))

        return num_samples

    #
    #   Signal
    #

[docs]    def read_samples(self, sr=None, offset=0, duration=None):
        """
        Read the samples of the utterance.

        Args:
            sr (int): If None uses the sampling rate given by the file, otherwise resamples to the given sampling rate.
            offset (float): Offset in seconds to read samples from.
            duration (float): If not None read only this number of seconds in maximum.

        Returns:
            np.ndarray: A numpy array containing the samples as a floating point (numpy.float32) time series.
        """

        read_duration = None

        if self.end >= 0:
            read_duration = self.duration

        if offset > 0:
            read_duration -= offset

        if duration is not None:
            read_duration = min(duration, read_duration)

        return self.file.read_samples(sr=sr, offset=self.start + offset, duration=read_duration)

    @property
    def sampling_rate(self):
        """
        Return the sampling rate.
        """
        return self.file.sampling_rate

    #
    #   Labels
    #

[docs]    def set_label_list(self, label_lists):
        """
        Set the given label-list for this utterance. If the label-list-idx is not set, ``default`` is used.
        If there is already a label-list with the given idx, it will be overriden.

        Args:
            label_list (LabelList, list): A single or multiple label-lists to add.

        """

        if isinstance(label_lists, label.LabelList):
            label_lists = [label_lists]

        for label_list in label_lists:
            if label_list.idx is None:
                label_list.idx = 'default'

            label_list.utterance = self
            self.label_lists[label_list.idx] = label_list

[docs]    def all_label_values(self, label_list_ids=None):
        """
        Return a set of all label-values occurring in this utterance.

        Args:
            label_list_ids (list): If not None, only label-values from label-lists with an id contained in this list
                                   are considered.

        Returns:
             set: A set of distinct label-values.
        """
        values = set()

        for label_list in self.label_lists.values():
            if label_list_ids is None or label_list.idx in label_list_ids:
                values = values.union(label_list.label_values())

        return values

[docs]    def label_count(self, label_list_ids=None):
        """
        Return a dictionary containing the number of times, every label-value in this utterance is occurring.

        Args:
            label_list_ids (list): If not None, only labels from label-lists with an id contained in this list
                                   are considered.

        Returns:
            dict: A dictionary containing the number of occurrences with the label-value as key.
        """
        count = collections.defaultdict(int)

        for label_list in self.label_lists.values():
            if label_list_ids is None or label_list.idx in label_list_ids:
                for label_value, label_count in label_list.label_count().items():
                    count[label_value] += label_count

        return count

[docs]    def label_total_duration(self, label_list_ids=None):
        """
        Return a dictionary containing the number of seconds, every label-value is occurring in this utterance.

        Args:
            label_list_ids (list): If not None, only labels from label-lists with an id contained in this list
                                   are considered.

        Returns:
            dict: A dictionary containing the number of seconds with the label-value as key.
        """
        duration = collections.defaultdict(float)

        for label_list in self.label_lists.values():
            if label_list_ids is None or label_list.idx in label_list_ids:
                for label_value, label_duration in label_list.label_total_duration().items():
                    duration[label_value] += label_duration

        return duration