Source code for audiomate.corpus.io.esc

import os
import collections
import zipfile
import shutil

import requests

import audiomate
from audiomate.corpus import assets
from audiomate.corpus import subset
from . import base
from audiomate.utils import textfile

DOWNLOAD_URL = 'https://github.com/karoldvl/ESC-50/archive/master.zip'
META_FILE_PATH = os.path.join('meta', 'esc50.csv')


[docs]class ESC50Downloader(base.CorpusDownloader):
    """
    Downloader for the ESC-50 dataset.

    Args:
        url (str): The url to download the dataset from. If not given the default URL is used.
    """

    def __init__(self, url=None):
        if url is None:
            self.url = DOWNLOAD_URL
        else:
            self.url = url

[docs]    @classmethod
    def type(cls):
        return 'esc-50'

    def _download(self, target_path):
        temp_file = os.path.join(target_path, 'esc_50.zip')

        ESC50Downloader.download_file_chunked(self.url, temp_file)
        ESC50Downloader.extract_zip(temp_file, target_path)

        root_folder = os.path.join(target_path, 'ESC-50-master')

        for element in os.listdir(root_folder):
            shutil.move(os.path.join(root_folder, element), target_path)

        shutil.rmtree(root_folder)
        os.remove(temp_file)

[docs]    @staticmethod
    def download_file_chunked(url, to):
        """ Downloads the file from `url` to the local path `to`."""
        r = requests.get(url, stream=True)
        with open(to, 'wb') as f:
            for chunk in r.iter_content(chunk_size=1024):
                if chunk:
                    f.write(chunk)

    @staticmethod
    def extract_zip(path, to):
        with zipfile.ZipFile(path) as archive:
            archive.extractall(to)


[docs]class ESC50Reader(base.CorpusReader):
    """
    Reader for the ESC-50 dataset (Environmental Sound Classification).

    .. seealso::

       `ESC-50 <https://github.com/karoldvl/ESC-50>`_
          Download page
    """

[docs]    @classmethod
    def type(cls):
        return 'esc-50'

    def _check_for_missing_files(self, path):
        return []

    def _load(self, path):
        corpus = audiomate.Corpus(path=path)

        meta_data = ESC50Reader.load_meta_data(path)

        folds = collections.defaultdict(list)
        esc10_utt_ids = []

        for record in meta_data:
            file_name = record[0]
            file_id = os.path.splitext(file_name)[0]
            file_path = os.path.abspath(os.path.join(path, 'audio', file_name))
            fold = record[1]
            category = record[3]
            esc10 = record[4]

            corpus.new_file(file_path, file_id)
            utt = corpus.new_utterance(file_id, file_id)
            utt.set_label_list(assets.LabelList(labels=[
                assets.Label(category)
            ]))

            folds['fold-{}'.format(fold)].append(file_id)

            if esc10 == 'True':
                esc10_utt_ids.append(file_id)

        for fold_id, fold_utt_ids in folds.items():
            fold_filter = subset.MatchingUtteranceIdxFilter(utterance_idxs=set(fold_utt_ids))
            fold_sv = subset.Subview(corpus, filter_criteria=[fold_filter])
            corpus.import_subview(fold_id, fold_sv)

        esc10_filter = subset.MatchingUtteranceIdxFilter(utterance_idxs=set(esc10_utt_ids))
        esc10_sv = subset.Subview(corpus, filter_criteria=[esc10_filter])
        corpus.import_subview('esc-10', esc10_sv)

        return corpus

    @staticmethod
    def load_meta_data(path):
        file_path = os.path.join(path, META_FILE_PATH)
        lines = textfile.read_separated_lines(file_path, separator=',')
        return lines[1:]