# BSD 3-Clause License.
#
# Copyright (c) 2019-2024 Robert A. Milton. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this
# software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
""" **Functionality for linear regression** """
from __future__ import annotations
import numpy as np
from romcomma.base.definitions import *
import scipy.stats
from romcomma.data.storage import Frame, Repository, Fold
from romcomma.user import functions
import shutil
import sys
[docs]
def gls(X: NP.Matrix, y: NP.Matrix, cov_y: NP.Matrix, is_through_origin: bool = False) -> Tuple[TF.Vector, TF.Matrix]:
""" `Generalized Least Squares <https://en.wikipedia.org/wiki/Generalized_least_squares>`_ linear regression.
Args:
X: An (N,M) matrix of regression variables
y: An (N,1) vector of observations.
cov_y: The (N,N) covariance matrix of observations ``y``.
is_through_origin: True to constrain to ``y(0)=0``
Returns: A pair consisting of the (M+1,1) -- or (M,1) if ``is_through_origin`` -- regression coefficients and their covariance matrix,
where the intercept is the first regression coefficient -- or absent if ``is_through_origin``.
"""
if not is_through_origin:
X = tf.pad(X, ((0, 0), (0, 1)), mode='constant', constant_values=1.0)
cov_cho = tf.linalg.cholesky(cov_y)
precision_cho_X = tf.linalg.triangular_solve(cov_cho, X)
precision_cho_y = tf.linalg.triangular_solve(cov_cho, y)
cov_beta = tf.linalg.cholesky(tf.einsum('NM, Nm -> Mm', precision_cho_X, precision_cho_X))
cov_beta = tf.linalg.triangular_solve(cov_beta, tf.eye(X.shape[-1]))
cov_beta = tf.einsum('NM, Nm -> Mm', cov_beta, cov_beta)
beta = tf.einsum('Mm, NM, Nl -> ml', cov_beta, precision_cho_X, precision_cho_y)
return beta, cov_beta