# BSD 3-Clause License.
#
# Copyright (c) 2019-2024 Robert A. Milton. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this
# software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
""" Base classes for romcomma Models."""
from __future__ import annotations
import pandas as pd
from romcomma.base.definitions import *
import shutil
import json
from abc import ABC
[docs]
class Frame:
""" Encapsulates a pandas DataFrame backed by a source file."""
csv: Path #: The csv file path, without ``.csv``.
@property
def df(self) -> pd.DataFrame:
return self._df
@property
def np(self) -> NP.Matrix:
return self._df.values
@np.setter
def np(self, value: NP.Matrix):
self._df.iloc[:, :] = value
self.write()
@property
def tf(self) -> TF.Matrix:
return tf.convert_to_tensor(self.np)
@tf.setter
def tf(self, value: TF.Matrix):
self._df.iloc[:, :] = value.numpy()
self.write()
[docs]
def write(self, **kwargs: Any) -> Frame:
""" Write to csv. This is called whenever the data in the Frame changes.
Args:
**kwargs: Options passed straight to ``self.to_csv()``.
Returns: ``self``, for call chaining.
"""
self._write_options = self._write_options | kwargs
self._df.to_csv(self.csv.with_suffix(f'{self.csv.suffix}.csv'), **self._write_options)
return self
[docs]
def broadcast_value(self, target_shape: Tuple[int, int], is_diagonal: bool = True) -> Frame:
""" Broadcast a frame
Args:
target_shape: The shape to broadcast to.
is_diagonal: Whether to zero the off-diagonal elements of a square matrix.
Returns: Self, for chaining calls.
Raises:
IndexError: If broadcasting is impossible.
"""
try:
values = np.array(np.broadcast_to(self.np, target_shape))
except ValueError:
raise IndexError(f'{repr(self)} has shape {self.df.shape} 'f' which cannot be broadcast to {target_shape}.')
if is_diagonal and target_shape[0] > 1:
values = np.diag(np.diagonal(values))
self._df = pd.DataFrame(values)
return self.write()
def __call__(self, *args, **kwargs):
""" Returns ``self.np``, as this is automatically cast by tf, np and pd."""
return self.np
def __repr__(self) -> str:
return str(self.csv)
def __str__(self) -> str:
return self.csv.name
# noinspection PyDefaultArgument
[docs]
def __init__(self, csv: Path | str, data: pd.DataFrame | NP.Array | Iterable | Dict = None, index: pd.Index | NP.ArrayLike = None,
columns: pd.Index | NP.ArrayLike = None, dtype: np.dtype | None = None, copy: bool | None = None, **kwargs):
""" Construct a Frame, from csv or pd.DataFrame. If ``data is None``, the Frame is read from csv. Otherwise the Frame is written to csv.
Args:
csv: The csv file path, without ``.csv``.
data: The data to store. If None, a pd.DataFrame is read from csv.
See `pd.DataFrame <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_.
index: See `pd.DataFrame <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_.
columns: See `pd.DataFrame <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_.
dtype: See `pd.DataFrame <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_.
copy: See `pd.DataFrame <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_.
**kwargs: Passed straight to `pd.read_csv <https://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html>`_
or `DataFrame.to_csv <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_csv.html>`_.
"""
self.csv = Path(csv)
self._write_options = {}
if data is None:
self._df = (pd.read_csv(self.csv.with_suffix(f'{self.csv.suffix}.csv'), **({'index_col': 0} | kwargs)))
else:
self._df = pd.DataFrame(data, index, columns, dtype, copy)
self.write(**kwargs)
# noinspection PyProtectedMember
[docs]
class Data(ABC):
""" Abstraction of Model Data. Essentially a NamedTuple of Frames in a folder.
Most Data methods are simple wrappers for annoyingly underscored methods of `NamedTuple
<https://docs.python.org/3/library/collections.html#collections.namedtuple>`_."""
Matrix: Type = Frame | pd.DataFrame | NP.Matrix | TF.Matrix
[docs]
class NamedTuple(NamedTuple):
""" A NamedTuple of data. Must be overridden."""
NotImplemented: Data.Matrix = np.atleast_2d('NotImplemented') #: NamedTuple can have any number of members.
@classmethod
def make(cls, iterable: Iterable) -> NamedTuple:
return cls.NamedTuple._make(iterable)
@classmethod
@property
def fields(cls) -> Tuple[str, ...]:
return cls.NamedTuple._fields
@classmethod
@property
def field_defaults(cls) -> Dict[str, Any]:
return cls.NamedTuple._field_defaults
def asdict(self) -> Dict[str, Any]:
return self._frames._asdict()
def replace(self, **kwargs: Data.Matrix) -> Data:
for key, value in kwargs.items():
value = value.numpy() if isinstance(value, TF.Tensor) else value
kwargs[key] = value if isinstance(value, Frame) else Frame(self._folder / key, np.atleast_2d(value))
self._frames = self.NamedTuple(**kwargs) if self._frames is None else self._frames._replace(**kwargs)
return self
@property
def folder(self) -> Path:
return self._folder
@property
def frames(self) -> NamedTuple:
return self._frames
[docs]
def move(self, dst_folder: Path | str) -> Data:
""" Move ``self`` to ``dst_folder``.
Args:
dst_folder: The folder to move to. If this exists, it will be emptied.
Returns: ``self`` for chaining calls.
"""
self._folder = Data(self.empty(dst_folder), **self.asdict()).folder
return self
def __call__(self, *args, **kwargs):
""" Returns ``self.values``."""
return self._frames
def __repr__(self) -> str:
return str(self._folder)
def __str__(self) -> str:
return self._folder.name
[docs]
def __init__(self, folder: Path | str, **kwargs: Data.Matrix):
""" Data Constructor.
Args:
folder: The folder to record the data. Must exist.
**kwargs: Initial pairs of NamedTuple fields, precisely as in ``NamedTuple(**kwargs)``.
Missing fields receive their defaults, so ``Data(folder)`` is the default parameter set.
"""
self._folder = folder if folder.exists() else self.empty(folder)
kwargs = self.NamedTuple(**kwargs)._asdict()
self._frames = None
self.replace(**kwargs)
[docs]
@classmethod
def read(cls, folder: Path | str, **kwargs: Data.Matrix) -> Data:
""" Read ``Data`` from ``folder``.
Args:
folder: The folder to record the data. Must exist
**kwargs: key=ordinate initial pairs of NamedTuple fields, precisely as in NamedTuple(**kwargs).
Missing fields receive their defaults, so ``Data(folder)`` is the default ``Data``.
Returns: The ``Data`` stored in ``folder``.
"""
folder = Path(folder)
asdict = {field: Frame(folder / field, kwargs.get(field, None)) for field in cls.fields}
return cls(folder, **asdict)
[docs]
@staticmethod
def delete(folder: Path | str) -> Path:
""" Returns a non-existent ``folder``."""
folder = Path(folder)
shutil.rmtree(folder, ignore_errors=True)
return folder
[docs]
@staticmethod
def empty(folder: Path | str) -> Path:
""" Returns an empty ``folder``."""
folder = Data.delete(folder)
folder.mkdir(mode=0o777, parents=True, exist_ok=False)
return folder
[docs]
@staticmethod
def copy(src_folder: Path | str, dst_folder: Path | str) -> Path:
""" Returns a copy of ``src_folder`` at dst_folder, deleting anything existing at the destination."""
dst_folder = Data.delete(dst_folder)
shutil.copytree(src=src_folder, dst=dst_folder)
return dst_folder
[docs]
class Model(ABC):
""" Abstract base class for any model. This base class implements generic file storage and parameter handling.
The latter is dealt with by each subclass overriding ``Data.NamedTuple`` with its own ``NamedTuple[NamedTuple]``
defining the parameter set it takes. ``model.data.values`` is a ``Model.Data.NamedTuple`` of NP.Matrices.
A Model also may include a calibrate method taking meta stored in an meta.json file, which default to cls.META.
"""
[docs]
class Data(Data):
""" This is a placeholder which must be overridden in any implementation."""
[docs]
class NamedTuple(NamedTuple):
""" A NamedTuple of data. Must be overridden."""
NotImplemented: Data.Matrix = np.atleast_2d('NotImplemented') #: NamedTuple can have any number of members.
@classmethod
@property
def META(cls) -> Dict[str, Any]:
"""Returns: Default meta data."""
pass
# raise NotImplementedError
@property
def folder(self) -> Path:
return self._folder
@property
def data(self) -> Data:
return self._data
@data.setter
def data(self, value: Data):
self._data = value
@abstractmethod
def calibrate(self, method: str, **kwargs) -> Dict[str, Any]:
if method != 'I know I told you never to call me, but I have relented because I just cannot live without you sweet-cheeks.':
raise NotImplementedError('base.calibrate() must never be called.')
else:
meta = self.META | kwargs
meta = (meta if meta is not None
else self.read_meta() if self._meta_json.exists() else self.META)
meta.pop('result', default=None)
meta = {**meta, 'result': 'OPTIMIZE HERE !!!'}
self.write_meta(meta)
self.data = self._data.replace('WITH OPTIMAL PARAMETERS!!!').write(self.folder) # Remember to write optimization results.
return meta
def read_meta(self) -> Dict[str, Any]:
# noinspection PyTypeChecker
with open(self._meta_json, mode='r') as file:
return json.load(file)
def write_meta(self, meta: Dict[str, Any]):
# noinspection PyTypeChecker
with open(self._meta_json, mode='w') as file:
json.dump(meta, file, indent=8)
def __repr__(self) -> str:
""" Returns the folder path."""
return str(self._folder)
def __str__(self) -> str:
""" Returns the folder name."""
return self._folder.name
[docs]
@abstractmethod
def __init__(self, folder: Path | str, read_data: bool = False, **kwargs: NP.Matrix):
""" Model constructor, to be called by all subclasses as a matter of priority.
Args:
folder: The model file location.
read_data: If True, the ``model.data`` are read from ``folder``, otherwise defaults are used.
**kwargs: The model.data fields=values to replace after reading from file/defaults.
"""
self._folder = Path(folder)
self._meta_json = self._folder / "meta.json"
if read_data:
self._data = self.Data.read(self._folder).replace(**kwargs)
else:
self._folder.mkdir(mode=0o777, parents=True, exist_ok=True)
self._data = self.Data(self._folder, **kwargs)
self._implementation = None