Merge branch 'master' of https://github.com/ddbourgin/numpy-ml into m…

…aster
tojo-soraai · Jun 1, 2021 · 470763e · 470763e
2 parents 84f65b9 + 7c210a6
commit 470763e
Show file tree

Hide file tree

Showing 3 changed files with 168 additions and 18 deletions.
diff --git a/numpy_ml/linear_models/README.md b/numpy_ml/linear_models/README.md
@@ -1,7 +1,7 @@
 # Linear Models
 The `lm.py` module implements:
 
-1. [OLS linear regression](https://en.wikipedia.org/wiki/Ordinary_least_squares) with maximum likelihood parameter estimates via the normal equation.
+1. [OLS linear regression](https://en.wikipedia.org/wiki/Ordinary_least_squares) with maximum likelihood parameter estimates via the normal equation. For both (Online and Batch mode)
 2. [Ridge regression / Tikhonov regularization](https://en.wikipedia.org/wiki/Tikhonov_regularization)
    with maximum likelihood parameter estimates via the normal equation.
 2. [Logistic regression](https://en.wikipedia.org/wiki/Logistic_regression) with maximum likelihood parameter estimates via gradient descent.

diff --git a/numpy_ml/linear_models/lm.py b/numpy_ml/linear_models/lm.py
@@ -11,23 +11,113 @@ def __init__(self, fit_intercept=True):
 
         Notes
         -----
-        Given data matrix *X* and target vector *y*, the maximum-likelihood estimate
-        for the regression coefficients, :math:`\\beta`, is:
+        Given data matrix **X** and target vector **y**, the maximum-likelihood
+        estimate for the regression coefficients, :math:`\beta`, is:
 
         .. math::
 
-            \hat{\beta} =
-                \left(\mathbf{X}^\top \mathbf{X}\right)^{-1} \mathbf{X}^\top \mathbf{y}
+            \hat{\beta} = \Sigma^{-1} \mathbf{X}^\top \mathbf{y}
+
+        where :math:`\Sigma^{-1} = (\mathbf{X}^\top \mathbf{X})^{-1}`.
 
         Parameters
         ----------
         fit_intercept : bool
-            Whether to fit an additional intercept term in addition to the
-            model coefficients. Default is True.
+            Whether to fit an intercept term in addition to the model
+            coefficients. Default is True.
         """
         self.beta = None
+        self.sigma_inv = None
         self.fit_intercept = fit_intercept
 
+        self._is_fit = False
+
+    def update(self, X, y):
+        r"""
+        Incrementally update the least-squares coefficients for a set of new
+        examples.
+
+        Notes
+        -----
+        The recursive least-squares algorithm [1]_ [2]_ is used to efficiently
+        update the regression parameters as new examples become available. For
+        a single new example :math:`(\mathbf{x}_{t+1}, \mathbf{y}_{t+1})`, the
+        parameter updates are
+
+        .. math::
+
+            \beta_{t+1} = \left(
+                \mathbf{X}_{1:t}^\top \mathbf{X}_{1:t} +
+                    \mathbf{x}_{t+1}\mathbf{x}_{t+1}^\top \right)^{-1}
+                        \mathbf{X}_{1:t}^\top \mathbf{Y}_{1:t} +
+                            \mathbf{x}_{t+1}^\top \mathbf{y}_{t+1}
+
+        where :math:`\beta_{t+1}` are the updated regression coefficients,
+        :math:`\mathbf{X}_{1:t}` and :math:`\mathbf{Y}_{1:t}` are the set of
+        examples observed from timestep 1 to *t*.
+
+        In the single-example case, the RLS algorithm uses the Sherman-Morrison
+        formula [3]_ to avoid re-inverting the covariance matrix on each new
+        update. In the multi-example case (i.e., where :math:`\mathbf{X}_{t+1}`
+        and :math:`\mathbf{y}_{t+1}` are matrices of `N` examples each), we use
+        the generalized Woodbury matrix identity [4]_ to update the inverse
+        covariance. This comes at a performance cost, but is still more
+        performant than doing multiple single-example updates if *N* is large.
+
+        References
+        ----------
+        .. [1] Gauss, C. F. (1821) _Theoria combinationis observationum
+           erroribus minimis obnoxiae_, Werke, 4. Gottinge
+        .. [2] https://en.wikipedia.org/wiki/Recursive_least_squares_filter
+        .. [3] https://en.wikipedia.org/wiki/Sherman%E2%80%93Morrison_formula
+        .. [4] https://en.wikipedia.org/wiki/Woodbury_matrix_identity
+
+        Parameters
+        ----------
+        X : :py:class:`ndarray <numpy.ndarray>` of shape `(N, M)`
+            A dataset consisting of `N` examples, each of dimension `M`
+        y : :py:class:`ndarray <numpy.ndarray>` of shape `(N, K)`
+            The targets for each of the `N` examples in `X`, where each target
+            has dimension `K`
+        """
+        if not self._is_fit:
+            raise RuntimeError("You must call the `fit` method before calling `update`")
+
+        X, y = np.atleast_2d(X), np.atleast_2d(y)
+
+        X1, Y1 = X.shape[0], y.shape[0]
+        self._update1D(X, y) if X1 == Y1 == 1 else self._update2D(X, y)
+
+    def _update1D(self, x, y):
+        """Sherman-Morrison update for a single example"""
+        beta, S_inv = self.beta, self.sigma_inv
+
+        # convert x to a design vector if we're fitting an intercept
+        if self.fit_intercept:
+            x = np.c_[1, x]
+
+        # update the inverse of the covariance matrix via Sherman-Morrison
+        S_inv -= (S_inv @ x.T @ x @ S_inv) / (1 + x @ S_inv @ x.T)
+
+        # update the model coefficients
+        beta += S_inv @ x.T @ (y - x @ beta)
+
+    def _update2D(self, X, y):
+        """Woodbury update for multiple examples"""
+        beta, S_inv = self.beta, self.sigma_inv
+
+        # convert X to a design matrix if we're fitting an intercept
+        if self.fit_intercept:
+            X = np.c_[np.ones(X.shape[0]), X]
+
+        I = np.eye(X.shape[0])
+
+        # update the inverse of the covariance matrix via Woodbury identity
+        S_inv -= S_inv @ X.T @ np.linalg.pinv(I + X @ S_inv @ X.T) @ X @ S_inv
+
+        # update the model coefficients
+        beta += S_inv @ X.T @ (y - X @ beta)
+
     def fit(self, X, y):
         """
         Fit the regression coefficients via maximum likelihood.
@@ -44,8 +134,10 @@ def fit(self, X, y):
         if self.fit_intercept:
             X = np.c_[np.ones(X.shape[0]), X]
 
-        pseudo_inverse = np.linalg.inv(X.T @ X) @ X.T
-        self.beta = np.dot(pseudo_inverse, y)
+        self.sigma_inv = np.linalg.pinv(X.T @ X)
+        self.beta = np.atleast_2d(self.sigma_inv @ X.T @ y)
+
+        self._is_fit = True
 
     def predict(self, X):
         """
@@ -166,22 +258,22 @@ def __init__(self, penalty="l2", gamma=0, fit_intercept=True):
                 \left(
                     \sum_{i=0}^N y_i \log(\hat{y}_i) +
                       (1-y_i) \log(1-\hat{y}_i)
-                \right) - R(\mathbf{b}, \gamma) 
+                \right) - R(\mathbf{b}, \gamma)
             \right]
-        
+
         where
-        
+
         .. math::
-        
+
             R(\mathbf{b}, \gamma) = \left\{
                 \begin{array}{lr}
                     \frac{\gamma}{2} ||\mathbf{beta}||_2^2 & :\texttt{ penalty = 'l2'}\\
                     \gamma ||\beta||_1 & :\texttt{ penalty = 'l1'}
                 \end{array}
                 \right.
-                
-        is a regularization penalty, :math:`\gamma` is a regularization weight, 
-        `N` is the number of examples in **y**, and **b** is the vector of model 
+
+        is a regularization penalty, :math:`\gamma` is a regularization weight,
+        `N` is the number of examples in **y**, and **b** is the vector of model
         coefficients.
 
         Parameters
@@ -251,10 +343,10 @@ def _NLL(self, X, y, y_pred):
             \right]
         """
         N, M = X.shape
-        beta, gamma = self.beta, self.gamma 
+        beta, gamma = self.beta, self.gamma
         order = 2 if self.penalty == "l2" else 1
         norm_beta = np.linalg.norm(beta, ord=order)
-        
+
         nll = -np.log(y_pred[y == 1]).sum() - np.log(1 - y_pred[y == 0]).sum()
         penalty = (gamma / 2) * norm_beta ** 2 if order == 2 else gamma * norm_beta
         return (penalty + nll) / N

diff --git a/numpy_ml/tests/test_linear_regression.py b/numpy_ml/tests/test_linear_regression.py
@@ -0,0 +1,58 @@
+# flake8: noqa
+import numpy as np
+
+from sklearn.linear_model import LinearRegression as LinearRegressionGold
+
+from numpy_ml.linear_models.lm import LinearRegression
+from numpy_ml.utils.testing import random_tensor
+
+
+def test_linear_regression(N=10):
+    np.random.seed(12345)
+    N = np.inf if N is None else N
+
+    i = 1
+    while i < N + 1:
+        train_samples = np.random.randint(2, 30)
+        update_samples = np.random.randint(1, 30)
+        n_samples = train_samples + update_samples
+
+        # ensure n_feats < train_samples, otherwise multiple solutions are
+        # possible
+        n_feats = np.random.randint(1, train_samples)
+        target_dim = np.random.randint(1, 10)
+
+        fit_intercept = np.random.choice([True, False])
+
+        X = random_tensor((n_samples, n_feats), standardize=True)
+        y = random_tensor((n_samples, target_dim), standardize=True)
+
+        X_train, X_update = X[:train_samples], X[train_samples:]
+        y_train, y_update = y[:train_samples], y[train_samples:]
+
+        # Fit gold standard model on the entire dataset
+        lr_gold = LinearRegressionGold(fit_intercept=fit_intercept, normalize=False)
+        lr_gold.fit(X, y)
+
+        # Fit our model on just (X_train, y_train)...
+        lr = LinearRegression(fit_intercept=fit_intercept)
+        lr.fit(X_train, y_train)
+
+        do_single_sample_update = np.random.choice([True, False])
+
+        # ...then update our model on the examples (X_update, y_update)
+        if do_single_sample_update:
+            for x_new, y_new in zip(X_update, y_update):
+                lr.update(x_new, y_new)
+        else:
+            lr.update(X_update, y_update)
+
+        # check that model predictions match
+        np.testing.assert_almost_equal(lr.predict(X), lr_gold.predict(X), decimal=5)
+
+        # check that model coefficients match
+        beta = lr.beta.T[:, 1:] if fit_intercept else lr.beta.T
+        np.testing.assert_almost_equal(beta, lr_gold.coef_, decimal=6)
+
+        print("\tPASSED")
+        i += 1