Source code for audiomate.corpus.base

import abc
import collections
import copy
import math

import numpy as np

from audiomate.utils import stats

[docs]class CorpusView(metaclass=abc.ABCMeta): """ This class defines the basic interface of a corpus. It is not meant to be instantiated directly. It only describes the methods for accessing data of the corpus. Notes: All paths to files should be held as absolute paths in memory. """ @property @abc.abstractmethod def name(self): """ Return the name of the dataset (Equals basename of the path, if not None). """ return 'undefined' # # Tracks # @property @abc.abstractmethod def tracks(self): """ Return the tracks in the corpus. Returns: dict: A dictionary containing :py:class:`audiomate.track.Track` objects with the track-idx as key. """ return {} @property def num_tracks(self): """ Return number of tracks. """ return len(self.tracks)
[docs] def contains_track(self, track): """ Return ``True`` if the given track is in the corpus already, ``False`` otherwise. """ if track.idx not in self.tracks.keys(): return False if track != self.tracks[track.idx]: return False return True
# # Utterances # @property @abc.abstractmethod def utterances(self): """ Return the utterances in the corpus. Returns: dict: A dictionary containing :py:class:`audiomate.corpus.assets.Utterance` objects with the utterance-idx as key. """ return {} @property def num_utterances(self): """ Return number of utterances. """ return len(self.utterances) # # Issuers # @property @abc.abstractmethod def issuers(self): """ Return the issuers in the corpus. Returns: dict: A dictionary containing :py:class:`audiomate.issuers.Issuer` objects with the issuer-idx as key. """ return {} @property def num_issuers(self): """ Return the number of issuers in the corpus. """ return len(self.issuers)
[docs] def contains_issuer(self, issuer): """ Return ``True`` if the given issuer is in the corpus already, ``False`` otherwise. """ if issuer.idx not in self.issuers.keys(): return False if issuer != self.issuers[issuer.idx]: return False return True
# # Feature Container # @property @abc.abstractmethod def feature_containers(self): """ Return the feature-containers in the corpus. Returns: dict: A dictionary containing :py:class:`audiomate.container.FeatureContainer` objects with the feature-idx as key. """ return {} @property def num_feature_containers(self): """ Return the number of feature-containers in the corpus. """ return len(self.feature_containers) # # Subviews # @property def subviews(self): """ Return the subviews of the corpus. Returns: dict: A dictionary containing :py:class:`audiomate.corpus.Subview` objects with the subview-idx as key. """ return {} @property def num_subviews(self): """ Return the number of subviews in the corpus. """ return len(self.subviews) # # Labels #
[docs] def all_label_values(self, label_list_ids=None): """ Return a set of all label-values occurring in this corpus. Args: label_list_ids (list): If not None, only labels from label-lists with an id contained in this list are considered. Returns: :class:`set`: A set of distinct label-values. """ values = set() for utterance in self.utterances.values(): values = values.union(utterance.all_label_values(label_list_ids=label_list_ids)) return values
[docs] def label_count(self, label_list_ids=None): """ Return a dictionary containing the number of times, every label-value in this corpus is occurring. Args: label_list_ids (list): If not None, only labels from label-lists with an id contained in this list are considered. Returns: dict: A dictionary containing the number of occurrences with the label-value as key. """ count = collections.defaultdict(int) for utterance in self.utterances.values(): for label_value, utt_count in utterance.label_count(label_list_ids=label_list_ids).items(): count[label_value] += utt_count return count
[docs] def label_durations(self, label_list_ids=None): """ Return a dictionary containing the total duration, every label-value in this corpus is occurring. Args: label_list_ids (list): If not None, only labels from label-lists with an id contained in this list are considered. Returns: dict: A dictionary containing the total duration with the label-value as key. """ duration = collections.defaultdict(int) for utterance in self.utterances.values(): for label_value, utt_count in utterance.label_total_duration(label_list_ids=label_list_ids).items(): duration[label_value] += utt_count return duration
[docs] def all_tokens(self, delimiter=' ', label_list_ids=None): """ Return a list of all tokens occurring in one of the labels in the corpus. Args: delimiter (str): The delimiter used to split labels into tokens (see :meth:`audiomate.annotations.Label.tokenized`). label_list_ids (list): If not None, only labels from label-lists with an idx contained in this list are considered. Returns: :class:`set`: A set of distinct tokens. """ tokens = set() for utterance in self.utterances.values(): tokens = tokens.union(utterance.all_tokens(delimiter=delimiter, label_list_ids=label_list_ids)) return tokens
# # Data # @property def total_duration(self): """ Return the total amount of audio summed over all utterances in the corpus in seconds. """ duration = 0 for utterance in self.utterances.values(): duration += utterance.duration return duration
[docs] def stats(self): """ Return statistics calculated overall samples of all utterances in the corpus. Returns: DataStats: A DataStats object containing statistics overall samples in the corpus. """ per_utt_stats = self.stats_per_utterance() return stats.DataStats.concatenate(per_utt_stats.values())
[docs] def stats_per_utterance(self): """ Return statistics calculated for all samples of each utterance in the corpus. Returns: dict: A dictionary containing a DataStats object for each utt. """ all_stats = {} for utterance in self.utterances.values(): data = utterance.read_samples() all_stats[utterance.idx] = stats.DataStats(float(np.mean(data)), float(np.var(data)), np.min(data), np.max(data), data.size) return all_stats
# # Restructuring #
[docs] def split_utterances_to_max_time(self, max_time=60.0, overlap=0.0): """ Create a new corpus, where all the utterances are of given maximal duration. Utterance longer than ``max_time`` are split up into multiple utterances. .. warning:: Subviews and FeatureContainers are not added to the newly create corpus. Arguments: max_time (float): Maximal duration for target utterances in seconds. overlap (float): Amount of overlap in seconds. The overlap is measured from the center of the splitting. (The actual overlap of two segments is 2 * overlap) Returns: Corpus: A new corpus instance. """ from audiomate.corpus import Corpus result = Corpus() # Copy Tracks tracks = copy.deepcopy(list(self.tracks.values())) result.import_tracks(tracks) # Copy Issuers issuers = copy.deepcopy(list(self.issuers.values())) result.import_issuers(issuers) for utterance in self.utterances.values(): orig_dur = utterance.duration if orig_dur > max_time: # Compute times where the utterance is split num_sub_utts = math.ceil(orig_dur / max_time) sub_utt_dur = orig_dur / num_sub_utts cutting_points = [] for i in range(1, num_sub_utts): cutting_points.append(i * sub_utt_dur) sub_utts = utterance.split(cutting_points, overlap=overlap) # Set track/issuer from new corpus for sub_utt in sub_utts: sub_utt.track = result.tracks[utterance.track.idx] if utterance.issuer is not None: sub_utt.issuer = result.issuers[utterance.issuer.idx] result.import_utterances(sub_utts) # If utterance <= max_time, just copy else: new_utt = copy.deepcopy(utterance) new_utt.track = result.tracks[new_utt.track.idx] if new_utt.issuer is not None: new_utt.issuer = result.issuers[new_utt.issuer.idx] result.import_utterances(new_utt) return result