"""
The textfile module contains functions
for reading and writing textfiles.
"""
import os
from audiomate.utils import text
from audiomate import logutil
logger = logutil.getLogger()
[docs]def read_separated_lines(path, separator=' ', max_columns=-1, keep_empty=False):
"""
Reads a text file where each line represents a record with some separated columns.
Parameters:
path (str): Path to the file to read.
separator (str): Separator that is used to split the columns.
max_columns (int): Number of max columns (if the separator occurs within the last column).
keep_empty (bool): If True empty columns are returned as well.
Returns:
list: A list containing a list for each line read.
"""
gen = read_separated_lines_generator(path, separator, max_columns, keep_empty=keep_empty)
return list(gen)
[docs]def read_separated_lines_with_first_key(path: str, separator: str = ' ', max_columns: int = -1,
keep_empty: bool = False):
"""
Reads the separated lines of a file and return a dictionary with the first column as keys, value
is a list with the rest of the columns.
Parameters:
path (str): Path to the file to read.
separator (str): Separator that is used to split the columns.
max_columns (str): Number of max columns (if the separator occurs within the last column).
keep_empty (bool): If True empty columns are returned as well.
Returns:
dict: Dictionary with list of column values and first column value as key.
"""
gen = read_separated_lines_generator(path, separator, max_columns, keep_empty=keep_empty)
dic = {}
for record in gen:
if len(record) > 0:
dic[record[0]] = record[1:len(record)]
return dic
[docs]def read_key_value_lines(path, separator=' ', default_value=''):
"""
Reads lines of a text file with two columns as key/value dictionary.
Parameters:
path (str): Path to the file.
separator (str): Separator that is used to split key and value.
default_value (str): If no value is given this value is used.
Returns:
dict: A dictionary with first column as key and second as value.
"""
gen = read_separated_lines_generator(path, separator, 2)
dic = {}
for record in gen:
if len(record) > 1:
dic[record[0]] = record[1]
elif len(record) > 0:
dic[record[0]] = default_value
return dic
[docs]def write_separated_lines(path, values, separator=' ', sort_by_column=0):
"""
Writes list or dict to file line by line. Dict can have list as value then they written
separated on the line.
Parameters:
path (str): Path to write file to.
values (dict, list): A dictionary or a list to write to the file.
separator (str): Separator to use between columns.
sort_by_column (int): if >= 0, sorts the list by the given index, if its 0 or 1 and its a
dictionary it sorts it by either the key (0) or value (1). By default
0, meaning sorted by the first column or the key.
"""
with open(path, 'w', encoding='utf-8') as f:
if type(values) is dict:
if sort_by_column in [0, 1]:
items = sorted(values.items(), key=lambda t: t[sort_by_column])
else:
items = values.items()
for key, value in items:
if type(value) in [list, set]:
value = separator.join([str(x) for x in value])
f.write('{}{}{}\n'.format(key, separator, value))
elif type(values) is list or type(values) is set:
if 0 <= sort_by_column < len(values):
items = sorted(values)
else:
items = values
for record in items:
str_values = [str(value) for value in record]
f.write('{}\n'.format(separator.join(str_values)))
[docs]def read_separated_lines_generator(path, separator=' ', max_columns=-1,
ignore_lines_starting_with=None, keep_empty=False):
"""
Creates a generator through all lines of a file and returns the splitted line.
Parameters:
path (str): Path to the file.
separator (str): Separator that is used to split the columns.
max_columns (int): Number of max columns (if the separator occurs within the last column).
ignore_lines_starting_with (list): Lines starting with a string in this list will be ignored.
keep_empty (bool): If True empty columns are returned as well.
"""
if not os.path.isfile(path):
logger.error('File doesnt exist or is no file: %s', path)
return
with open(path, 'r', errors='ignore', encoding='utf-8') as f:
if max_columns > -1:
max_splits = max_columns - 1
else:
max_splits = -1
for line in f:
if keep_empty:
stripped_line = line
else:
stripped_line = line.strip()
if ignore_lines_starting_with is not None:
should_ignore = text.starts_with_prefix_in_list(
stripped_line,
ignore_lines_starting_with
)
else:
should_ignore = False
if not should_ignore and stripped_line != '':
record = stripped_line.split(sep=separator, maxsplit=max_splits)
record = [field.strip() for field in record]
yield record