Source code for audiomate.corpus.conversion.wav

import functools
import multiprocessing

import soundfile as sf
import sox

from . import base
from audiomate import logutil

logger = logutil.getLogger()


[docs]class WavAudioFileConverter(base.AudioFileConverter):
    """
    Class that creates a new instance of a corpus,
    so that all audio files meet given requirements.
    """

    def __init__(self, num_workers=4, sampling_rate=16000, separate_file_per_utterance=False,
                 force_conversion=False):
        super(WavAudioFileConverter, self).__init__(
            sampling_rate,
            separate_file_per_utterance,
            force_conversion
        )

        self.num_workers = num_workers

        self.expected_properties = {
            'samplerate': self.sampling_rate,
            'format': 'WAV',
            'subtype': 'PCM_16'
        }

    def _file_extension(self):
        return 'wav'

    def _does_utt_match_target_format(self, utterance):
        """
        Return ``True`` if the utterance already matches the target format,
        ``False`` otherwise.
        """
        if utterance.track.path.endswith('mp3'):
            return False

        try:
            info = sf.info(utterance.track.path)

            for key, value in self.expected_properties.items():
                if info.__getattribute__(key) != value:
                    return False

        except RuntimeError:
            return False

        return True

    def _convert_files(self, files):
        """
        Store the given samples with the target format
        at ``path``.
        """
        with multiprocessing.Pool(self.num_workers) as p:
            func = functools.partial(
                _process_file,
                target_sr=self.sampling_rate
            )
            list(logger.progress(
                p.imap(func, list(files)),
                total=len(files),
                description='Convert audio files'
            ))


def _process_file(file_item, target_sr):
    src = file_item[0]
    start = file_item[1]
    end = file_item[2]
    target = file_item[3]

    tfm = sox.Transformer()

    if start > 0 and end == float('inf'):
        tfm.trim(start)
    elif end != float('inf'):
        tfm.trim(start, end)

    tfm.convert(target_sr, 1, 16)
    try:
        tfm.build(src, target)
    except sox.core.SoxError:
        logger.error('The following file could not be converted: %s', src)