import abc
import copy
import os
import audiomate
from audiomate import tracks
from audiomate import logutil
logger = logutil.getLogger()
[docs]class AudioFileConverter(metaclass=abc.ABCMeta):
"""
Base class for converters that convert all audio to a specific format.
A converter creates a new instance of a corpus,
so that all audio files meet given requirements.
Args:
sampling_rate (int): Target sampling rate to convert audio to.
separate_file_per_utterance (bool): If ``True``, every utterance in the
resulting corpus is in a separate file.
If ``False``, the file/utt structure will
be preserved.
force_conversion (bool): If ``True``, all utterances will be converted
whether or not it already matches the target
format. If ``False``, only utterances not
matching the target format will be converted.
Others are reference to the original files.
"""
def __init__(self, sampling_rate=16000, separate_file_per_utterance=False,
force_conversion=False):
self.sampling_rate = sampling_rate
self.separate_file_per_utterance = separate_file_per_utterance
self.force_conversion = force_conversion
[docs] def convert(self, corpus, target_audio_path):
"""
Convert the given corpus.
Args:
corpus (Corpus): The input corpus.
target_audio_path (str): The path where the audio files of the
converted corpus should be saved.
Returns:
Corpus: The newly created corpus.
"""
out_corpus = audiomate.Corpus()
files_to_convert = []
for utterance in logger.progress(
corpus.utterances.values(),
total=corpus.num_utterances,
description='Find utterances to convert'):
if utterance.issuer.idx not in out_corpus.issuers.keys():
out_corpus.import_issuers(utterance.issuer)
if self._does_utt_need_conversion(utterance):
# Store audio in a new file
if self.separate_file_per_utterance:
filename = '{}.{}'.format(utterance.idx, self._file_extension())
path = os.path.join(target_audio_path, filename)
files_to_convert.append((
utterance.track.path,
utterance.start,
utterance.end,
path
))
track = out_corpus.new_file(path, utterance.idx)
start = 0
end = float('inf')
else:
if utterance.track.idx not in out_corpus.tracks.keys():
filename = '{}.{}'.format(utterance.track.idx, self._file_extension())
path = os.path.join(target_audio_path, filename)
files_to_convert.append((
utterance.track.path,
0,
float('inf'),
path
))
out_corpus.new_file(path, utterance.track.idx)
track = utterance.track
start = utterance.start
end = utterance.end
utt = out_corpus.new_utterance(
utterance.idx,
track.idx,
issuer_idx=utterance.issuer.idx,
start=start,
end=end
)
lls = copy.deepcopy(list(utterance.label_lists.values()))
utt.set_label_list(lls)
else:
# Just copy everything to the output corpus
self._copy_utterance_to_corpus(utterance, out_corpus)
self._copy_subviews_to_corpus(corpus, out_corpus)
self._convert_files(files_to_convert)
return out_corpus
@abc.abstractmethod
def _file_extension(self):
""" Return the file-extension that will be used. """
raise NotImplementedError()
@abc.abstractmethod
def _does_utt_match_target_format(self, utterance):
"""
Return ``True`` if the utterance already matches the target format,
``False`` otherwise.
"""
raise NotImplementedError()
@abc.abstractmethod
def _convert_files(self, files):
"""
Store the given samples with the target format
at ``path``.
"""
raise NotImplementedError()
def _does_utt_need_conversion(self, utterance):
""" Return True if an utterance needs to be converted. """
if self.force_conversion:
return True
elif type(utterance.track) != tracks.FileTrack:
return True
elif self.separate_file_per_utterance and (utterance.start > 0 or utterance.end != float('inf')):
return True
elif not self._does_utt_match_target_format(utterance):
return True
return False
def _copy_utterance_to_corpus(self, utterance, corpus):
""" Create a copy of the utterance and add it to the given corpus. """
if utterance.track.idx not in corpus.tracks.keys():
corpus.import_tracks(utterance.track)
corpus.import_utterances(utterance)
def _copy_subviews_to_corpus(self, from_corpus, to_corpus):
""" Create copy of all subviews from ``from_corpus`` in ``to_corpus``. """
subviews = copy.deepcopy(from_corpus.subviews)
for subview_idx, subview in subviews.items():
to_corpus.import_subview(subview_idx, subview)