Source code for audiomate.utils.textfile

"""
The textfile module contains functions
for reading and writing textfiles.
"""

import os

from audiomate.utils import text
from audiomate import logutil

logger = logutil.getLogger()


[docs]def read_separated_lines(path, separator=' ', max_columns=-1, keep_empty=False): """ Reads a text file where each line represents a record with some separated columns. Parameters: path (str): Path to the file to read. separator (str): Separator that is used to split the columns. max_columns (int): Number of max columns (if the separator occurs within the last column). keep_empty (bool): If True empty columns are returned as well. Returns: list: A list containing a list for each line read. """ gen = read_separated_lines_generator(path, separator, max_columns, keep_empty=keep_empty) return list(gen)
[docs]def read_separated_lines_with_first_key(path: str, separator: str = ' ', max_columns: int = -1, keep_empty: bool = False): """ Reads the separated lines of a file and return a dictionary with the first column as keys, value is a list with the rest of the columns. Parameters: path (str): Path to the file to read. separator (str): Separator that is used to split the columns. max_columns (str): Number of max columns (if the separator occurs within the last column). keep_empty (bool): If True empty columns are returned as well. Returns: dict: Dictionary with list of column values and first column value as key. """ gen = read_separated_lines_generator(path, separator, max_columns, keep_empty=keep_empty) dic = {} for record in gen: if len(record) > 0: dic[record[0]] = record[1:len(record)] return dic
[docs]def read_key_value_lines(path, separator=' ', default_value=''): """ Reads lines of a text file with two columns as key/value dictionary. Parameters: path (str): Path to the file. separator (str): Separator that is used to split key and value. default_value (str): If no value is given this value is used. Returns: dict: A dictionary with first column as key and second as value. """ gen = read_separated_lines_generator(path, separator, 2) dic = {} for record in gen: if len(record) > 1: dic[record[0]] = record[1] elif len(record) > 0: dic[record[0]] = default_value return dic
[docs]def write_separated_lines(path, values, separator=' ', sort_by_column=0): """ Writes list or dict to file line by line. Dict can have list as value then they written separated on the line. Parameters: path (str): Path to write file to. values (dict, list): A dictionary or a list to write to the file. separator (str): Separator to use between columns. sort_by_column (int): if >= 0, sorts the list by the given index, if its 0 or 1 and its a dictionary it sorts it by either the key (0) or value (1). By default 0, meaning sorted by the first column or the key. """ with open(path, 'w', encoding='utf-8') as f: if type(values) is dict: if sort_by_column in [0, 1]: items = sorted(values.items(), key=lambda t: t[sort_by_column]) else: items = values.items() for key, value in items: if type(value) in [list, set]: value = separator.join([str(x) for x in value]) f.write('{}{}{}\n'.format(key, separator, value)) elif type(values) is list or type(values) is set: if 0 <= sort_by_column < len(values): items = sorted(values) else: items = values for record in items: str_values = [str(value) for value in record] f.write('{}\n'.format(separator.join(str_values)))
[docs]def read_separated_lines_generator(path, separator=' ', max_columns=-1, ignore_lines_starting_with=None, keep_empty=False): """ Creates a generator through all lines of a file and returns the splitted line. Parameters: path (str): Path to the file. separator (str): Separator that is used to split the columns. max_columns (int): Number of max columns (if the separator occurs within the last column). ignore_lines_starting_with (list): Lines starting with a string in this list will be ignored. keep_empty (bool): If True empty columns are returned as well. """ if not os.path.isfile(path): logger.error('File doesnt exist or is no file: %s', path) return with open(path, 'r', errors='ignore', encoding='utf-8') as f: if max_columns > -1: max_splits = max_columns - 1 else: max_splits = -1 for line in f: if keep_empty: stripped_line = line else: stripped_line = line.strip() if ignore_lines_starting_with is not None: should_ignore = text.starts_with_prefix_in_list( stripped_line, ignore_lines_starting_with ) else: should_ignore = False if not should_ignore and stripped_line != '': record = stripped_line.split(sep=separator, maxsplit=max_splits) record = [field.strip() for field in record] yield record