Source code for audiomate.corpus.io.audio_mnist

import os
import glob

import audiomate
from audiomate import annotations
from audiomate import issuers
from audiomate.utils import jsonfile

from . import base
from . import downloader

MASTER_DOWNLOAD_URL = 'https://github.com/soerenab/AudioMNIST/archive/master.zip'


[docs]class AudioMNISTDownloader(downloader.ArchiveDownloader):
    """
    Downloader for the audioMNIST dataset.

    Args:
        url (str): The url to download the dataset from. If not given the default URL is used.
                   It is expected to be a zip file.
    """

    def __init__(self, url=None):
        if url is None:
            url = MASTER_DOWNLOAD_URL

        super(AudioMNISTDownloader, self).__init__(
            url,
            move_files_up=True
        )

[docs]    @classmethod
    def type(cls):
        return 'audio-mnist'


[docs]class AudioMNISTReader(base.CorpusReader):
    """
    Reader for the audioMNIST Corpus.

    .. seealso::

       `AudioMNIST-Dataset <https://github.com/soerenab/AudioMNIST>`_
          Download page
    """

[docs]    @classmethod
    def type(cls):
        return 'audio-mnist'

    def _check_for_missing_files(self, path):
        missing_files = []

        recordings_folder = os.path.join(path, 'data')
        if not os.path.isdir(recordings_folder):
            missing_files.append(recordings_folder)

        meta_file = os.path.join(path, 'data', 'audioMNIST_meta.txt')
        if not os.path.isfile(meta_file):
            missing_files.append(meta_file)

        return missing_files

    def _load(self, path):
        corpus = audiomate.Corpus(path=path)
        data_path = os.path.join(path, 'data')
        meta_data = AudioMNISTReader.load_speaker_meta(path)

        for speaker_idx in os.listdir(data_path):
            speaker_path = os.path.join(data_path, speaker_idx)

            if os.path.isdir(speaker_path):

                for file_path in glob.glob(os.path.join(speaker_path, '*.wav')):
                    file_idx = os.path.splitext(os.path.basename(file_path))[0]

                    corpus.new_file(file_path, file_idx)

                    idx_parts = file_idx.split('_')
                    digit = idx_parts[0]

                    if speaker_idx not in corpus.issuers.keys():
                        issuer = issuers.Speaker(
                            speaker_idx,
                            gender=AudioMNISTReader.get_gender(meta_data, speaker_idx),
                            age_group=AudioMNISTReader.get_age_group(meta_data, speaker_idx)
                        )
                        corpus.import_issuers(issuer)

                    utterance = corpus.new_utterance(file_idx, file_idx, speaker_idx)
                    utterance.set_label_list(annotations.LabelList.create_single(
                        str(digit),
                        idx=audiomate.corpus.LL_WORD_TRANSCRIPT
                    ))

        return corpus

    @staticmethod
    def load_speaker_meta(corpus_path):
        meta_file = os.path.join(corpus_path, 'data', 'audioMNIST_meta.txt')
        return jsonfile.read_json_file(meta_file)

    @staticmethod
    def get_gender(meta_data, speaker_idx):
        gender_str = meta_data[speaker_idx]['gender']

        if gender_str == 'male':
            return issuers.Gender.MALE
        elif gender_str == 'female':
            return issuers.Gender.FEMALE
        else:
            return issuers.Gender.UNKNOWN

    @staticmethod
    def get_age_group(meta_data, speaker_idx):
        age_str = int(meta_data[speaker_idx]['age'])

        if age_str < 12:
            return issuers.AgeGroup.CHILD
        elif age_str < 18:
            return issuers.AgeGroup.YOUTH
        elif age_str < 65:
            return issuers.AgeGroup.ADULT
        else:
            return issuers.AgeGroup.SENIOR