Source code for audiomate.annotations.relabeling

from audiomate import annotations
from audiomate.utils import textfile

WILDCARD_COMBINATION = ('**',)


[docs]class UnmappedLabelsException(Exception): def __init__(self, message): super(UnmappedLabelsException, self).__init__(message) self.message = message
[docs]def relabel(label_list, projections): """ Relabel an entire :py:class:`~audiomate.annotations.LabelList` using user-defined projections. Labels can be renamed, removed or overlapping labels can be flattened to a single label per segment. Each entry in the dictionary of projections represents a single projection that maps a combination of labels (key) to a single new label (value). The combination of labels to be mapped is a tuple of naturally sorted labels that apply to one or more segments simultaneously. By defining a special wildcard projection using `('**',)` is is not required to specify a projection for every single combination of labels. This method raises a :py:class:`~audiomate.corpus.utils.labellist.UnmappedLabelsException` if a projection for one or more combinations of labels is not defined. Args: label_list (audiomate.annotations.LabelList): The label list to relabel projections (dict): A dictionary that maps tuples of label combinations to string labels. Returns: audiomate.annotations.LabelList: New label list with remapped labels Raises: UnmappedLabelsException: If a projection for one or more combinations of labels is not defined. Example: >>> projections = { ... ('a',): 'a', ... ('b',): 'b', ... ('c',): 'c', ... ('a', 'b',): 'a_b', ... ('a', 'b', 'c',): 'a_b_c', ... ('**',): 'b_c', ... } >>> label_list = annotations.LabelList(labels=[ ... annotations.Label('a', 3.2, 4.5), ... annotations.Label('b', 4.0, 4.9), ... annotations.Label('c', 4.2, 5.1) ... ]) >>> ll = relabel(label_list, projections) >>> [l.value for l in ll] ['a', 'a_b', 'a_b_c', 'b_c', 'c'] """ unmapped_combinations = find_missing_projections(label_list, projections) if len(unmapped_combinations) > 0: raise UnmappedLabelsException('Unmapped combinations: {}'.format(unmapped_combinations)) new_labels = [] for labeled_segment in label_list.ranges(): combination = tuple(sorted([label.value for label in labeled_segment[2]])) label_mapping = projections[combination] if combination in projections else projections[WILDCARD_COMBINATION] if label_mapping == '': continue new_labels.append(annotations.Label(label_mapping, labeled_segment[0], labeled_segment[1])) return annotations.LabelList(idx=label_list.idx, labels=new_labels)
[docs]def find_missing_projections(label_list, projections): """ Finds all combinations of labels in `label_list` that are not covered by an entry in the dictionary of `projections`. Returns a list containing tuples of uncovered label combinations or en empty list if there are none. All uncovered label combinations are naturally sorted. Each entry in the dictionary of projections represents a single projection that maps a combination of labels (key) to a single new label (value). The combination of labels to be mapped is a tuple of naturally sorted labels that apply to one or more segments simultaneously. By defining a special wildcard projection using `('**',)` is is not required to specify a projection for every single combination of labels. Args: label_list (audiomate.annotations.LabelList): The label list to relabel projections (dict): A dictionary that maps tuples of label combinations to string labels. Returns: List: List of combinations of labels that are not covered by any projection Example: >>> ll = annotations.LabelList(labels=[ ... annotations.Label('b', 3.2, 4.5), ... annotations.Label('a', 4.0, 4.9), ... annotations.Label('c', 4.2, 5.1) ... ]) >>> find_missing_projections(ll, {('b',): 'new_label'}) [('a', 'b'), ('a', 'b', 'c'), ('a', 'c'), ('c',)] """ unmapped_combinations = set() if WILDCARD_COMBINATION in projections: return [] for labeled_segment in label_list.ranges(): combination = tuple(sorted([label.value for label in labeled_segment[2]])) if combination not in projections: unmapped_combinations.add(combination) return sorted(unmapped_combinations)
[docs]def load_projections(projections_file): """ Loads projections defined in the given `projections_file`. The `projections_file` is expected to be in the following format:: old_label_1 | new_label_1 old_label_1 old_label_2 | new_label_2 old_label_3 | You can define one projection per line. Each projection starts with a list of one or multiple old labels (separated by a single whitespace) that are separated from the new label by a pipe (`|`). In the code above, the segment labeled with `old_label_1` will be labeled with `new_label_1` after applying the projection. Segments that are labeled with `old_label_1` **and** `old_label_2` concurrently are relabeled to `new_label_2`. All segments labeled with `old_label_3` are dropped. Combinations of multiple labels are automatically sorted in natural order. Args: projections_file (str): Path to the file with projections Returns: dict: Dictionary where the keys are tuples of labels to project to the key's value Example: >>> load_projections('/path/to/projections.txt') {('b',): 'foo', ('a', 'b'): 'a_b', ('a',): 'bar'} """ projections = {} for parts in textfile.read_separated_lines_generator(projections_file, '|'): combination = tuple(sorted([label.strip() for label in parts[0].split(' ')])) new_label = parts[1].strip() projections[combination] = new_label return projections