Source code for audiomate.corpus.io.audio_mnist

import os
import glob

import audiomate
from audiomate import annotations
from audiomate import issuers
from audiomate.utils import jsonfile

from . import base
from . import downloader

MASTER_DOWNLOAD_URL = 'https://github.com/soerenab/AudioMNIST/archive/master.zip'


[docs]class AudioMNISTDownloader(downloader.ArchiveDownloader): """ Downloader for the audioMNIST dataset. Args: url (str): The url to download the dataset from. If not given the default URL is used. It is expected to be a zip file. """ def __init__(self, url=None): if url is None: url = MASTER_DOWNLOAD_URL super(AudioMNISTDownloader, self).__init__( url, move_files_up=True )
[docs] @classmethod def type(cls): return 'audio-mnist'
[docs]class AudioMNISTReader(base.CorpusReader): """ Reader for the audioMNIST Corpus. .. seealso:: `AudioMNIST-Dataset <https://github.com/soerenab/AudioMNIST>`_ Download page """
[docs] @classmethod def type(cls): return 'audio-mnist'
def _check_for_missing_files(self, path): missing_files = [] recordings_folder = os.path.join(path, 'data') if not os.path.isdir(recordings_folder): missing_files.append(recordings_folder) meta_file = os.path.join(path, 'data', 'audioMNIST_meta.txt') if not os.path.isfile(meta_file): missing_files.append(meta_file) return missing_files def _load(self, path): corpus = audiomate.Corpus(path=path) data_path = os.path.join(path, 'data') meta_data = AudioMNISTReader.load_speaker_meta(path) for speaker_idx in os.listdir(data_path): speaker_path = os.path.join(data_path, speaker_idx) if os.path.isdir(speaker_path): for file_path in glob.glob(os.path.join(speaker_path, '*.wav')): file_idx = os.path.splitext(os.path.basename(file_path))[0] corpus.new_file(file_path, file_idx) idx_parts = file_idx.split('_') digit = idx_parts[0] if speaker_idx not in corpus.issuers.keys(): issuer = issuers.Speaker( speaker_idx, gender=AudioMNISTReader.get_gender(meta_data, speaker_idx), age_group=AudioMNISTReader.get_age_group(meta_data, speaker_idx) ) corpus.import_issuers(issuer) utterance = corpus.new_utterance(file_idx, file_idx, speaker_idx) utterance.set_label_list(annotations.LabelList.create_single( str(digit), idx=audiomate.corpus.LL_WORD_TRANSCRIPT )) return corpus @staticmethod def load_speaker_meta(corpus_path): meta_file = os.path.join(corpus_path, 'data', 'audioMNIST_meta.txt') return jsonfile.read_json_file(meta_file) @staticmethod def get_gender(meta_data, speaker_idx): gender_str = meta_data[speaker_idx]['gender'] if gender_str == 'male': return issuers.Gender.MALE elif gender_str == 'female': return issuers.Gender.FEMALE else: return issuers.Gender.UNKNOWN @staticmethod def get_age_group(meta_data, speaker_idx): age_str = int(meta_data[speaker_idx]['age']) if age_str < 12: return issuers.AgeGroup.CHILD elif age_str < 18: return issuers.AgeGroup.YOUTH elif age_str < 65: return issuers.AgeGroup.ADULT else: return issuers.AgeGroup.SENIOR