Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/ddbourgin/numpy-ml into m…
Browse files Browse the repository at this point in the history
…aster
  • Loading branch information
ddbourgin committed Jun 1, 2021
2 parents 84f65b9 + 7c210a6 commit 470763e
Show file tree
Hide file tree
Showing 3 changed files with 168 additions and 18 deletions.
2 changes: 1 addition & 1 deletion numpy_ml/linear_models/README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Linear Models
The `lm.py` module implements:

1. [OLS linear regression](https://en.wikipedia.org/wiki/Ordinary_least_squares) with maximum likelihood parameter estimates via the normal equation.
1. [OLS linear regression](https://en.wikipedia.org/wiki/Ordinary_least_squares) with maximum likelihood parameter estimates via the normal equation. For both (Online and Batch mode)
2. [Ridge regression / Tikhonov regularization](https://en.wikipedia.org/wiki/Tikhonov_regularization)
with maximum likelihood parameter estimates via the normal equation.
2. [Logistic regression](https://en.wikipedia.org/wiki/Logistic_regression) with maximum likelihood parameter estimates via gradient descent.
Expand Down
126 changes: 109 additions & 17 deletions numpy_ml/linear_models/lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,113 @@ def __init__(self, fit_intercept=True):
Notes
-----
Given data matrix *X* and target vector *y*, the maximum-likelihood estimate
for the regression coefficients, :math:`\\beta`, is:
Given data matrix **X** and target vector **y**, the maximum-likelihood
estimate for the regression coefficients, :math:`\beta`, is:
.. math::
\hat{\beta} =
\left(\mathbf{X}^\top \mathbf{X}\right)^{-1} \mathbf{X}^\top \mathbf{y}
\hat{\beta} = \Sigma^{-1} \mathbf{X}^\top \mathbf{y}
where :math:`\Sigma^{-1} = (\mathbf{X}^\top \mathbf{X})^{-1}`.
Parameters
----------
fit_intercept : bool
Whether to fit an additional intercept term in addition to the
model coefficients. Default is True.
Whether to fit an intercept term in addition to the model
coefficients. Default is True.
"""
self.beta = None
self.sigma_inv = None
self.fit_intercept = fit_intercept

self._is_fit = False

def update(self, X, y):
r"""
Incrementally update the least-squares coefficients for a set of new
examples.
Notes
-----
The recursive least-squares algorithm [1]_ [2]_ is used to efficiently
update the regression parameters as new examples become available. For
a single new example :math:`(\mathbf{x}_{t+1}, \mathbf{y}_{t+1})`, the
parameter updates are
.. math::
\beta_{t+1} = \left(
\mathbf{X}_{1:t}^\top \mathbf{X}_{1:t} +
\mathbf{x}_{t+1}\mathbf{x}_{t+1}^\top \right)^{-1}
\mathbf{X}_{1:t}^\top \mathbf{Y}_{1:t} +
\mathbf{x}_{t+1}^\top \mathbf{y}_{t+1}
where :math:`\beta_{t+1}` are the updated regression coefficients,
:math:`\mathbf{X}_{1:t}` and :math:`\mathbf{Y}_{1:t}` are the set of
examples observed from timestep 1 to *t*.
In the single-example case, the RLS algorithm uses the Sherman-Morrison
formula [3]_ to avoid re-inverting the covariance matrix on each new
update. In the multi-example case (i.e., where :math:`\mathbf{X}_{t+1}`
and :math:`\mathbf{y}_{t+1}` are matrices of `N` examples each), we use
the generalized Woodbury matrix identity [4]_ to update the inverse
covariance. This comes at a performance cost, but is still more
performant than doing multiple single-example updates if *N* is large.
References
----------
.. [1] Gauss, C. F. (1821) _Theoria combinationis observationum
erroribus minimis obnoxiae_, Werke, 4. Gottinge
.. [2] https://en.wikipedia.org/wiki/Recursive_least_squares_filter
.. [3] https://en.wikipedia.org/wiki/Sherman%E2%80%93Morrison_formula
.. [4] https://en.wikipedia.org/wiki/Woodbury_matrix_identity
Parameters
----------
X : :py:class:`ndarray <numpy.ndarray>` of shape `(N, M)`
A dataset consisting of `N` examples, each of dimension `M`
y : :py:class:`ndarray <numpy.ndarray>` of shape `(N, K)`
The targets for each of the `N` examples in `X`, where each target
has dimension `K`
"""
if not self._is_fit:
raise RuntimeError("You must call the `fit` method before calling `update`")

X, y = np.atleast_2d(X), np.atleast_2d(y)

X1, Y1 = X.shape[0], y.shape[0]
self._update1D(X, y) if X1 == Y1 == 1 else self._update2D(X, y)

def _update1D(self, x, y):
"""Sherman-Morrison update for a single example"""
beta, S_inv = self.beta, self.sigma_inv

# convert x to a design vector if we're fitting an intercept
if self.fit_intercept:
x = np.c_[1, x]

# update the inverse of the covariance matrix via Sherman-Morrison
S_inv -= (S_inv @ x.T @ x @ S_inv) / (1 + x @ S_inv @ x.T)

# update the model coefficients
beta += S_inv @ x.T @ (y - x @ beta)

def _update2D(self, X, y):
"""Woodbury update for multiple examples"""
beta, S_inv = self.beta, self.sigma_inv

# convert X to a design matrix if we're fitting an intercept
if self.fit_intercept:
X = np.c_[np.ones(X.shape[0]), X]

I = np.eye(X.shape[0])

# update the inverse of the covariance matrix via Woodbury identity
S_inv -= S_inv @ X.T @ np.linalg.pinv(I + X @ S_inv @ X.T) @ X @ S_inv

# update the model coefficients
beta += S_inv @ X.T @ (y - X @ beta)

def fit(self, X, y):
"""
Fit the regression coefficients via maximum likelihood.
Expand All @@ -44,8 +134,10 @@ def fit(self, X, y):
if self.fit_intercept:
X = np.c_[np.ones(X.shape[0]), X]

pseudo_inverse = np.linalg.inv(X.T @ X) @ X.T
self.beta = np.dot(pseudo_inverse, y)
self.sigma_inv = np.linalg.pinv(X.T @ X)
self.beta = np.atleast_2d(self.sigma_inv @ X.T @ y)

self._is_fit = True

def predict(self, X):
"""
Expand Down Expand Up @@ -166,22 +258,22 @@ def __init__(self, penalty="l2", gamma=0, fit_intercept=True):
\left(
\sum_{i=0}^N y_i \log(\hat{y}_i) +
(1-y_i) \log(1-\hat{y}_i)
\right) - R(\mathbf{b}, \gamma)
\right) - R(\mathbf{b}, \gamma)
\right]
where
.. math::
R(\mathbf{b}, \gamma) = \left\{
\begin{array}{lr}
\frac{\gamma}{2} ||\mathbf{beta}||_2^2 & :\texttt{ penalty = 'l2'}\\
\gamma ||\beta||_1 & :\texttt{ penalty = 'l1'}
\end{array}
\right.
is a regularization penalty, :math:`\gamma` is a regularization weight,
`N` is the number of examples in **y**, and **b** is the vector of model
is a regularization penalty, :math:`\gamma` is a regularization weight,
`N` is the number of examples in **y**, and **b** is the vector of model
coefficients.
Parameters
Expand Down Expand Up @@ -251,10 +343,10 @@ def _NLL(self, X, y, y_pred):
\right]
"""
N, M = X.shape
beta, gamma = self.beta, self.gamma
beta, gamma = self.beta, self.gamma
order = 2 if self.penalty == "l2" else 1
norm_beta = np.linalg.norm(beta, ord=order)

nll = -np.log(y_pred[y == 1]).sum() - np.log(1 - y_pred[y == 0]).sum()
penalty = (gamma / 2) * norm_beta ** 2 if order == 2 else gamma * norm_beta
return (penalty + nll) / N
Expand Down
58 changes: 58 additions & 0 deletions numpy_ml/tests/test_linear_regression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# flake8: noqa
import numpy as np

from sklearn.linear_model import LinearRegression as LinearRegressionGold

from numpy_ml.linear_models.lm import LinearRegression
from numpy_ml.utils.testing import random_tensor


def test_linear_regression(N=10):
np.random.seed(12345)
N = np.inf if N is None else N

i = 1
while i < N + 1:
train_samples = np.random.randint(2, 30)
update_samples = np.random.randint(1, 30)
n_samples = train_samples + update_samples

# ensure n_feats < train_samples, otherwise multiple solutions are
# possible
n_feats = np.random.randint(1, train_samples)
target_dim = np.random.randint(1, 10)

fit_intercept = np.random.choice([True, False])

X = random_tensor((n_samples, n_feats), standardize=True)
y = random_tensor((n_samples, target_dim), standardize=True)

X_train, X_update = X[:train_samples], X[train_samples:]
y_train, y_update = y[:train_samples], y[train_samples:]

# Fit gold standard model on the entire dataset
lr_gold = LinearRegressionGold(fit_intercept=fit_intercept, normalize=False)
lr_gold.fit(X, y)

# Fit our model on just (X_train, y_train)...
lr = LinearRegression(fit_intercept=fit_intercept)
lr.fit(X_train, y_train)

do_single_sample_update = np.random.choice([True, False])

# ...then update our model on the examples (X_update, y_update)
if do_single_sample_update:
for x_new, y_new in zip(X_update, y_update):
lr.update(x_new, y_new)
else:
lr.update(X_update, y_update)

# check that model predictions match
np.testing.assert_almost_equal(lr.predict(X), lr_gold.predict(X), decimal=5)

# check that model coefficients match
beta = lr.beta.T[:, 1:] if fit_intercept else lr.beta.T
np.testing.assert_almost_equal(beta, lr_gold.coef_, decimal=6)

print("\tPASSED")
i += 1

0 comments on commit 470763e

Please sign in to comment.