Source code for audiomate.corpus.io.speech_commands

import glob
import os

import audiomate
from audiomate.corpus import assets
from audiomate.corpus.subset import subview
from audiomate.utils import textfile
from . import base


[docs]class SpeechCommandsReader(base.CorpusReader): """ Reads the google speech commands dataset. .. seealso:: `Launching Speech Commands DS <https://research.googleblog.com/2017/08/launching-speech-commands-dataset.html>`_ Blog-Entry on the release of the speech commands dataset. """
[docs] @classmethod def type(cls): return 'speech-commands'
def _check_for_missing_files(self, path): return [] def _load(self, path): corpus = audiomate.Corpus(path=path) for folder in os.scandir(path): if folder.is_dir() and not folder.name.startswith('_'): SpeechCommandsReader._load_folder(folder, corpus) SpeechCommandsReader._create_subviews(path, corpus) return corpus @staticmethod def _load_folder(folder_entry, corpus): """ Load the given subfolder into the corpus (e.g. bed, one, ...) """ for wav_path in glob.glob(os.path.join(folder_entry.path, '*.wav')): wav_name = os.path.basename(wav_path) basename, __ = os.path.splitext(wav_name) command = folder_entry.name file_idx = '{}_{}'.format(basename, command) issuer_idx = str(basename).split('_', maxsplit=1)[0] corpus.new_file(wav_path, file_idx) if issuer_idx not in corpus.issuers.keys(): corpus.new_issuer(issuer_idx) utt = corpus.new_utterance(file_idx, file_idx, issuer_idx) labels = assets.LabelList(labels=[assets.Label(command)]) utt.set_label_list(labels) @staticmethod def _create_subviews(path, corpus): """ Load the subviews based on testing_list.txt and validation_list.txt """ test_list_path = os.path.join(path, 'testing_list.txt') dev_list_path = os.path.join(path, 'validation_list.txt') test_list = textfile.read_separated_lines(test_list_path, separator='/', max_columns=2) dev_list = textfile.read_separated_lines(dev_list_path, separator='/', max_columns=2) test_set = set(['{}_{}'.format(os.path.splitext(x[1])[0], x[0]) for x in test_list]) dev_set = set(['{}_{}'.format(os.path.splitext(x[1])[0], x[0]) for x in dev_list]) inv_train_set = test_set.union(dev_set) train_filter = subview.MatchingUtteranceIdxFilter(utterance_idxs=inv_train_set, inverse=True) train_view = subview.Subview(corpus, filter_criteria=train_filter) corpus.import_subview('train', train_view) dev_filter = subview.MatchingUtteranceIdxFilter(utterance_idxs=dev_set, inverse=False) dev_view = subview.Subview(corpus, filter_criteria=dev_filter) corpus.import_subview('dev', dev_view) test_filter = subview.MatchingUtteranceIdxFilter(utterance_idxs=test_set, inverse=False) test_view = subview.Subview(corpus, filter_criteria=test_filter) corpus.import_subview('test', test_view)