From 7c210a62f4782ece0e81223a523f324f936c539b Mon Sep 17 00:00:00 2001 From: David Bourgin Date: Tue, 1 Jun 2021 12:28:30 -0400 Subject: [PATCH] Multi-sample online least-squares (#74) * added support for online linear regression * added test * Addressed reviewer's concern * Refactor update method, add documentation * Expand linear regression unit test * Allow multi-sample updates from the update method * Update tests for linear regression Co-authored-by: Kenneth Odoh --- numpy_ml/linear_models/lm.py | 61 +++++++++++++++++------- numpy_ml/tests/test_linear_regression.py | 17 +++++-- 2 files changed, 55 insertions(+), 23 deletions(-) diff --git a/numpy_ml/linear_models/lm.py b/numpy_ml/linear_models/lm.py index 45141f2..9f63d50 100644 --- a/numpy_ml/linear_models/lm.py +++ b/numpy_ml/linear_models/lm.py @@ -32,16 +32,17 @@ def __init__(self, fit_intercept=True): self._is_fit = False - def update(self, x, y): + def update(self, X, y): r""" - Incrementally update the least-squares coefficients on a new example - via recursive least-squares (RLS) [1]_ . + Incrementally update the least-squares coefficients for a set of new + examples. Notes ----- - The RLS algorithm [2]_ is used to efficiently update the regression - parameters as new examples become available. For a new example - :math:`(\mathbf{x}_{t+1}, \mathbf{y}_{t+1})`, the parameter updates are + The recursive least-squares algorithm [1]_ [2]_ is used to efficiently + update the regression parameters as new examples become available. For + a single new example :math:`(\mathbf{x}_{t+1}, \mathbf{y}_{t+1})`, the + parameter updates are .. math:: @@ -55,9 +56,13 @@ def update(self, x, y): :math:`\mathbf{X}_{1:t}` and :math:`\mathbf{Y}_{1:t}` are the set of examples observed from timestep 1 to *t*. - To perform the above update efficiently, the RLS algorithm makes use of - the Sherman-Morrison formula [3]_ to avoid re-inverting the covariance - matrix on each new update. + In the single-example case, the RLS algorithm uses the Sherman-Morrison + formula [3]_ to avoid re-inverting the covariance matrix on each new + update. In the multi-example case (i.e., where :math:`\mathbf{X}_{t+1}` + and :math:`\mathbf{y}_{t+1}` are matrices of `N` examples each), we use + the generalized Woodbury matrix identity [4]_ to update the inverse + covariance. This comes at a performance cost, but is still more + performant than doing multiple single-example updates if *N* is large. References ---------- @@ -65,23 +70,27 @@ def update(self, x, y): erroribus minimis obnoxiae_, Werke, 4. Gottinge .. [2] https://en.wikipedia.org/wiki/Recursive_least_squares_filter .. [3] https://en.wikipedia.org/wiki/Sherman%E2%80%93Morrison_formula + .. [4] https://en.wikipedia.org/wiki/Woodbury_matrix_identity Parameters ---------- - x : :py:class:`ndarray ` of shape `(1, M)` - A single example of rank `M` - y : :py:class:`ndarray ` of shape `(1, K)` - A `K`-dimensional target vector for the current example + X : :py:class:`ndarray ` of shape `(N, M)` + A dataset consisting of `N` examples, each of dimension `M` + y : :py:class:`ndarray ` of shape `(N, K)` + The targets for each of the `N` examples in `X`, where each target + has dimension `K` """ if not self._is_fit: raise RuntimeError("You must call the `fit` method before calling `update`") - x, y = np.atleast_2d(x), np.atleast_2d(y) - beta, S_inv = self.beta, self.sigma_inv + X, y = np.atleast_2d(X), np.atleast_2d(y) - X1, Y1 = x.shape[0], y.shape[0] - err_str = f"First dimension of x and y must be 1, but got {X1} and {Y1}" - assert X1 == Y1 == 1, err_str + X1, Y1 = X.shape[0], y.shape[0] + self._update1D(X, y) if X1 == Y1 == 1 else self._update2D(X, y) + + def _update1D(self, x, y): + """Sherman-Morrison update for a single example""" + beta, S_inv = self.beta, self.sigma_inv # convert x to a design vector if we're fitting an intercept if self.fit_intercept: @@ -93,6 +102,22 @@ def update(self, x, y): # update the model coefficients beta += S_inv @ x.T @ (y - x @ beta) + def _update2D(self, X, y): + """Woodbury update for multiple examples""" + beta, S_inv = self.beta, self.sigma_inv + + # convert X to a design matrix if we're fitting an intercept + if self.fit_intercept: + X = np.c_[np.ones(X.shape[0]), X] + + I = np.eye(X.shape[0]) + + # update the inverse of the covariance matrix via Woodbury identity + S_inv -= S_inv @ X.T @ np.linalg.pinv(I + X @ S_inv @ X.T) @ X @ S_inv + + # update the model coefficients + beta += S_inv @ X.T @ (y - X @ beta) + def fit(self, X, y): """ Fit the regression coefficients via maximum likelihood. diff --git a/numpy_ml/tests/test_linear_regression.py b/numpy_ml/tests/test_linear_regression.py index 3ccd0f7..d5ea586 100644 --- a/numpy_ml/tests/test_linear_regression.py +++ b/numpy_ml/tests/test_linear_regression.py @@ -1,3 +1,4 @@ +# flake8: noqa import numpy as np from sklearn.linear_model import LinearRegression as LinearRegressionGold @@ -12,7 +13,7 @@ def test_linear_regression(N=10): i = 1 while i < N + 1: - train_samples = np.random.randint(1, 30) + train_samples = np.random.randint(2, 30) update_samples = np.random.randint(1, 30) n_samples = train_samples + update_samples @@ -37,15 +38,21 @@ def test_linear_regression(N=10): lr = LinearRegression(fit_intercept=fit_intercept) lr.fit(X_train, y_train) + do_single_sample_update = np.random.choice([True, False]) + # ...then update our model on the examples (X_update, y_update) - for x_new, y_new in zip(X_update, y_update): - lr.update(x_new, y_new) + if do_single_sample_update: + for x_new, y_new in zip(X_update, y_update): + lr.update(x_new, y_new) + else: + lr.update(X_update, y_update) # check that model predictions match - np.testing.assert_almost_equal(lr.predict(X), lr_gold.predict(X)) + np.testing.assert_almost_equal(lr.predict(X), lr_gold.predict(X), decimal=5) # check that model coefficients match beta = lr.beta.T[:, 1:] if fit_intercept else lr.beta.T - np.testing.assert_almost_equal(beta, lr_gold.coef_) + np.testing.assert_almost_equal(beta, lr_gold.coef_, decimal=6) + print("\tPASSED") i += 1