Source code for audiomate.corpus.io.timit

import os
import glob

import audiomate
from audiomate.corpus import assets
from audiomate.corpus import subset
from . import base
from audiomate.utils import textfile


[docs]class TimitReader(base.CorpusReader): """ Reader for the TIMIT Corpus. .. seealso:: `TIMIT <https://github.com/philipperemy/timit>`_ Download page """
[docs] @classmethod def type(cls): return 'timit'
def _check_for_missing_files(self, path): return [] def _load(self, path): corpus = audiomate.Corpus(path=path) for part in ['TEST', 'TRAIN']: part_path = os.path.join(path, part) part_utt_ids = set() for region in os.listdir(part_path): region_path = os.path.join(part_path, region) if os.path.isdir(region_path): for speaker_abbr in os.listdir(region_path): speaker_path = os.path.join(region_path, speaker_abbr) speaker_idx = speaker_abbr[1:] if speaker_idx not in corpus.issuers.keys(): issuer = assets.Speaker(speaker_idx) if speaker_abbr[:1] == 'M': issuer.gender = assets.Gender.MALE elif speaker_abbr[:1] == 'F': issuer.gender = assets.Gender.FEMALE corpus.import_issuers(issuer) for wav_path in glob.glob(os.path.join(speaker_path, '*.WAV')): sentence_idx = os.path.splitext(os.path.basename(wav_path))[0] utt_idx = '{}-{}-{}'.format(region, speaker_abbr, sentence_idx).lower() part_utt_ids.add(utt_idx) raw_text_path = os.path.join(speaker_path, '{}.TXT'.format(sentence_idx)) raw_text = textfile.read_separated_lines(raw_text_path, separator=' ', max_columns=3)[0][2] words_path = os.path.join(speaker_path, '{}.WRD'.format(sentence_idx)) words = textfile.read_separated_lines(words_path, separator=' ', max_columns=3) phones_path = os.path.join(speaker_path, '{}.PHN'.format(sentence_idx)) phones = textfile.read_separated_lines(phones_path, separator=' ', max_columns=3) corpus.new_file(wav_path, utt_idx) utt = corpus.new_utterance(utt_idx, utt_idx, speaker_idx) utt.set_label_list(assets.LabelList(idx='raw_transcription', labels=[ assets.Label(raw_text) ])) word_ll = assets.LabelList(idx='words') for record in words: start = int(record[0]) / 16000 end = int(record[1]) / 16000 word_ll.append(assets.Label(record[2], start=start, end=end)) utt.set_label_list(word_ll) phone_ll = assets.LabelList(idx='phones') for record in phones: start = int(record[0]) / 16000 end = int(record[1]) / 16000 phone_ll.append(assets.Label(record[2], start=start, end=end)) utt.set_label_list(phone_ll) filter = subset.MatchingUtteranceIdxFilter(utterance_idxs=part_utt_ids) subview = subset.Subview(corpus, filter_criteria=[filter]) corpus.import_subview(part, subview) return corpus