import os
import audiomate
from audiomate import annotations
from audiomate import issuers
from audiomate.corpus import subset
from audiomate.utils import download
from audiomate.utils import textfile
from . import base
DOWNLOAD_URLS = {
'de_DE': 'http://www.m-ailabs.bayern/?ddownload=410',
'en_UK': 'http://www.m-ailabs.bayern/?ddownload=412',
'en_US': 'http://www.m-ailabs.bayern/?ddownload=411',
'es_ES': 'http://www.m-ailabs.bayern/?ddownload=413',
'it_IT': 'http://www.m-ailabs.bayern/?ddownload=409',
'uk_UK': 'http://www.m-ailabs.bayern/?ddownload=414',
'ru_RU': 'http://www.m-ailabs.bayern/?ddownload=415',
'fr_FR': 'http://www.m-ailabs.bayern/?ddownload=681',
'pl_PL': 'http://www.m-ailabs.bayern/?ddownload=673',
}
[docs]class MailabsDownloader(base.CorpusDownloader):
"""
Downloader for the M-AILABS Speech Dataset.
Args:
tags (list): List of tags for different parts to download.
Corresponds to the tags in the
`Statistics & Download Links` on the webpage.
If ``None``, all parts are downloaded.
"""
def __init__(self, tags=None):
self.tags = tags
[docs] @classmethod
def type(cls):
return 'mailabs'
def _download(self, target_path):
os.makedirs(target_path, exist_ok=True)
for tag, download_url in DOWNLOAD_URLS.items():
if self.tags is None or tag in self.tags:
tmp_file = os.path.join(target_path, 'tmp_{}.tgz'.format(tag))
download.download_file(download_url, tmp_file)
download.extract_tar(tmp_file, target_path)
os.remove(tmp_file)
[docs]class MailabsReader(base.CorpusReader):
"""
Reader for the M-AILABS Speech Dataset.
.. seealso::
`M-AILABS Speech Dataset <http://www.m-ailabs.bayern/en/the-mailabs-speech-dataset/>`_
Project Page
"""
[docs] @classmethod
def type(cls):
return 'mailabs'
def _check_for_missing_files(self, path):
return []
def _load(self, path):
corpus = audiomate.Corpus(path=path)
tag_folders = MailabsReader.get_folders(path)
for tag_folder in tag_folders:
MailabsReader.load_tag(corpus, tag_folder)
return corpus
[docs] @staticmethod
def get_folders(path):
"""
Return a list of all subfolder-paths in the given path.
"""
folder_paths = []
for item in os.listdir(path):
folder_path = os.path.join(path, item)
if os.path.isdir(folder_path):
folder_paths.append(folder_path)
return folder_paths
[docs] @staticmethod
def load_tag(corpus, path):
"""
Iterate over all speakers on load them.
Collect all utterance-idx and create a subset of them.
"""
tag_idx = os.path.basename(path)
data_path = os.path.join(path, 'by_book')
tag_utt_ids = []
for gender_path in MailabsReader.get_folders(data_path):
# IN MIX FOLDERS THERE ARE NO SPEAKERS
# HANDLE EVERY UTT AS DIFFERENT ISSUER
if os.path.basename(gender_path) == 'mix':
utt_ids = MailabsReader.load_books_of_speaker(corpus,
gender_path,
None)
tag_utt_ids.extend(utt_ids)
else:
for speaker_path in MailabsReader.get_folders(gender_path):
speaker = MailabsReader.load_speaker(corpus, speaker_path)
utt_ids = MailabsReader.load_books_of_speaker(corpus,
speaker_path,
speaker)
tag_utt_ids.extend(utt_ids)
filter = subset.MatchingUtteranceIdxFilter(
utterance_idxs=set(tag_utt_ids)
)
subview = subset.Subview(corpus, filter_criteria=[filter])
corpus.import_subview(tag_idx, subview)
[docs] @staticmethod
def load_speaker(corpus, path):
"""
Create a speaker instance for the given path.
"""
base_path, speaker_name = os.path.split(path)
base_path, gender_desc = os.path.split(base_path)
base_path, __ = os.path.split(base_path)
base_path, tag = os.path.split(base_path)
gender = issuers.Gender.UNKNOWN
if gender_desc == 'male':
gender = issuers.Gender.MALE
elif gender_desc == 'female':
gender = issuers.Gender.FEMALE
speaker = issuers.Speaker(speaker_name, gender=gender)
corpus.import_issuers(speaker)
return speaker
[docs] @staticmethod
def load_books_of_speaker(corpus, path, speaker):
"""
Load all utterances for the speaker at the given path.
"""
utt_ids = []
for book_path in MailabsReader.get_folders(path):
meta_path = os.path.join(book_path, 'metadata.csv')
wavs_path = os.path.join(book_path, 'wavs')
meta = textfile.read_separated_lines(meta_path,
separator='|',
max_columns=3)
for entry in meta:
file_basename = entry[0]
transcription_raw = entry[1]
transcription_clean = entry[2]
if speaker is None:
idx = file_basename
utt_speaker = issuers.Speaker(idx)
speaker_idx = idx
corpus.import_issuers(utt_speaker)
else:
idx = '{}-{}'.format(speaker.idx, file_basename)
speaker_idx = speaker.idx
wav_name = '{}.wav'.format(file_basename)
wav_path = os.path.join(wavs_path, wav_name)
if os.path.isfile(wav_path):
corpus.new_file(wav_path, idx)
ll_raw = annotations.LabelList.create_single(
transcription_raw,
idx=audiomate.corpus.LL_WORD_TRANSCRIPT_RAW
)
ll_clean = annotations.LabelList.create_single(
transcription_clean,
idx=audiomate.corpus.LL_WORD_TRANSCRIPT
)
utterance = corpus.new_utterance(idx, idx, speaker_idx)
utterance.set_label_list(ll_raw)
utterance.set_label_list(ll_clean)
utt_ids.append(utterance.idx)
return utt_ids