# BSD 3-Clause License.
#
# Copyright (c) 2019-2024 Robert A. Milton. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this
# software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
""" **Functionality for processing results generated by ``romcomma``** """
from __future__ import annotations
from romcomma.base.definitions import *
from romcomma.base.classes import Data
from romcomma.data.storage import Repository, Fold
from shutil import rmtree
[docs]
def copy(src: Path | str, dst: Path | str) -> Path:
""" Copy a folder destructively.
Args:
src: The folder to be copied, relative to the ``Fold.folder``.
dst: The folder of the copy, relative to the ``Fold.folder``.
Returns: dst if successful.
"""
Data.copy(src, dst)
return dst
[docs]
class Collect:
""" A device for collecting -- i.e. concatenating -- csv files across folders or folds."""
csvs: Dict[str, Dict[str, Any]] = {} #: Key = csv name (minus extension). Value = a Dict of options (kwargs) passed to ``pd.read_csv``.
folders: Dict[str, Dict[str, Any]] = {} #: Key = folder containing csvs. Value = An (ordered) Dict of {Column name: Column value} to insert from R to L.
ignore_missing: bool = False #: Whether to raise an exception when a csv is missing from a folder.
write_options: Dict[str, Any] = {'index': False, 'float_format': '%.6f'} #: kwargs passed straight to ``pd.to_csv``.
def __call__(self, dst: Union[Repository, Path, str], is_existing_deleted=False, **kwargs: Any):
""" Collect ``self.csvs`` into ``dst``. If and only if ``dst`` is a Repository, ``self.over_folds`` is called instead of ``self.over_folders``.
Args:
dst: The destination folder, to house ``self.csvs`` or ``self.folders``.
is_existing_deleted: Whether to delete and recreate an existing ``dst``.
**kwargs: Write options passed straight to ``pd.to_csv``.
"""
if isinstance(dst, Repository):
return self.from_folds(dst, is_existing_deleted, **kwargs)
else:
return self.from_folders(dst, is_existing_deleted, **kwargs)
[docs]
def from_folders(self, dst: Union[Path, str], is_existing_deleted=False, **kwargs: Any) -> Collect:
""" Collect ``dst/[self.csvs]`` from ``self.folders``.
Args:
dst: The destination folder, to house ``[self.csvs]``.
is_existing_deleted: Whether to delete and recreate an existing ``dst``.
**kwargs: Write options passed straight to ``pd.to_csv``.
Returns: ``self'' for chaining calls.
"""
dst = Path(dst)
if is_existing_deleted:
rmtree(dst, ignore_errors=True)
dst.mkdir(mode=0o777, parents=True, exist_ok=True)
for csv, read_options in self.csvs.items():
is_initial = True
results = None
for folder, columns in self.folders.items():
file = Path(folder) / f'{csv}.csv'
if file.exists() or not self.ignore_missing:
result = pd.read_csv(file, **read_options)
for key, value in columns.items():
result.insert(0, key, np.full(result.shape[0], value), True)
if is_initial:
results = result.copy(deep=True)
is_initial = False
else:
results = pd.concat([results, result.copy(deep=True)], axis=0, ignore_index=True)
if not (results is None and self.ignore_missing):
results.to_csv(dst / f'{csv}.csv', **(self.write_options | kwargs))
return self
[docs]
def from_folds(self, dst: Repository, is_existing_deleted=False, **kwargs: Any) -> Collect:
""" Collect ``dst/[self.folders]`` from ``Fold(dst, [k])/[self.folders]`` for ``k in self.Folds``.
Args:
dst: The destination folder, to house ``[self.folders]``.
is_existing_deleted: Whether to delete and recreate an existing ``dst``.
**kwargs: Write options passed straight to ``pd.to_csv``.
Returns: ``self'' for chaining calls.
"""
if isinstance(dst, Fold):
raise NotADirectoryError('dst is a Fold, which cannot contain other Folds, so cannot be Collected from.')
folds = tuple((Fold(dst, k) for k in dst.folds))
for sub_folder, extra_columns in self.folders.items():
folders = {fold.folder / sub_folder: {'fold': fold.meta['k'], 'N': fold.N} | extra_columns for fold in folds}
Collect(self.csvs, folders, self.ignore_missing).from_folders(dst.folder / sub_folder, is_existing_deleted, **kwargs)
return self
[docs]
def __init__(self, csvs: Dict[str, Dict[str, Any]] = None, folders: Dict[str, Dict[str, Any]] = None, ignore_missing: bool = False, **kwargs: Any):
""" Construct a Collect object.
Args:
csvs: Key = csv name (minus extension). Value = a Dict of options (kwargs) passed to ``pd.read_csv``.
folders: Key = folder containing csvs. Value = An (ordered) Dict of {Column name: Column value} to insert from R to L.
ignore_missing: Whether to raise an exception when a csv is missing from a folder.
**kwargs: kwargs passed straight to ``pd.to_csv``.
"""
self.csvs = self.csvs if csvs is None else csvs
self.folders = self.folders if folders is None else folders
self.ignore_missing = ignore_missing
self.write_options.update(kwargs)