Skip to content

Commit

Permalink
fix: scaler documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
lucasczz committed Jul 7, 2022
1 parent c82d3a4 commit d80a8ec
Show file tree
Hide file tree
Showing 8 changed files with 116 additions and 23 deletions.
13 changes: 10 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,25 +14,31 @@
</p>

## 💈 Installation

```shell
pip install river-torch
```

You can install the latest development version from GitHub as so:

```shell
pip install https://github.com/online-ml/river-torch --upgrade
```

Or, through SSH:

```shell
pip install git@github.com:online-ml/river-torch.git --upgrade
```


## 🍫 Quickstart

We build the development of neural networks on top of the <a href="https://www.riverml.xyz">river API</a> and refer to the rivers design principles.
The following example creates a simple MLP architecture based on PyTorch and incrementally predicts and trains on the website phishing dataset.
For further examples check out the <a href="https://online-ml.github.io/river-torch">Documentation</a>.

### Classification

```python
>>> from river import datasets
>>> from river import metrics
Expand Down Expand Up @@ -81,7 +87,7 @@ Accuracy: 0.8304
```python
>>> import math
>>> from river import datasets, metrics
>>> from river_torch.anomaly import AutoEncoder
>>> from river_torch.anomaly import Autoencoder
>>> from river_torch.utils import get_activation_fn
>>> from torch import manual_seed, nn

Expand Down Expand Up @@ -113,7 +119,7 @@ Accuracy: 0.8304
>>> encoder_fn = get_encoder
>>> decoder_fn = get_decoder

>>> model = AutoEncoder(encoder_fn=encoder_fn,decoder_fn=decoder_fn, lr=0.01)
>>> model = Autoencoder(encoder_fn=encoder_fn,decoder_fn=decoder_fn, lr=0.01)

>>> for x,y in dataset:
... score = model.score_one(x)
Expand All @@ -123,6 +129,7 @@ Accuracy: 0.8304
```

## 🏫 Affiliations

<p align="center">
<img src="https://upload.wikimedia.org/wikipedia/de/thumb/4/44/Fzi_logo.svg/1200px-Fzi_logo.svg.png?raw=true" alt="FZI Logo" height="200"/>
</p>
8 changes: 4 additions & 4 deletions docs/examples/example_ae.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m/home/lucascazzonelli/Documents/Research/river-torch/docs/examples/example_ae.ipynb Cell 1'\u001b[0m in \u001b[0;36m<cell line: 2>\u001b[0;34m()\u001b[0m\n\u001b[1;32m <a href='vscode-notebook-cell:/home/lucascazzonelli/Documents/Research/river-torch/docs/examples/example_ae.ipynb#ch0000000?line=0'>1</a>\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mriver\u001b[39;00m \u001b[39mimport\u001b[39;00m datasets, metrics\n\u001b[0;32m----> <a href='vscode-notebook-cell:/home/lucascazzonelli/Documents/Research/river-torch/docs/examples/example_ae.ipynb#ch0000000?line=1'>2</a>\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mriver_torch\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39manomaly\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mnn_builder\u001b[39;00m \u001b[39mimport\u001b[39;00m get_fc_encoder, get_fc_decoder\n\u001b[1;32m <a href='vscode-notebook-cell:/home/lucascazzonelli/Documents/Research/river-torch/docs/examples/example_ae.ipynb#ch0000000?line=2'>3</a>\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mriver_torch\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39manomaly\u001b[39;00m \u001b[39mimport\u001b[39;00m AutoEncoder\n",
"\u001b[1;32m/home/lucascazzonelli/Documents/Research/river-torch/docs/examples/example_ae.ipynb Cell 1'\u001b[0m in \u001b[0;36m<cell line: 2>\u001b[0;34m()\u001b[0m\n\u001b[1;32m <a href='vscode-notebook-cell:/home/lucascazzonelli/Documents/Research/river-torch/docs/examples/example_ae.ipynb#ch0000000?line=0'>1</a>\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mriver\u001b[39;00m \u001b[39mimport\u001b[39;00m datasets, metrics\n\u001b[0;32m----> <a href='vscode-notebook-cell:/home/lucascazzonelli/Documents/Research/river-torch/docs/examples/example_ae.ipynb#ch0000000?line=1'>2</a>\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mriver_torch\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39manomaly\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mnn_builder\u001b[39;00m \u001b[39mimport\u001b[39;00m get_fc_encoder, get_fc_decoder\n\u001b[1;32m <a href='vscode-notebook-cell:/home/lucascazzonelli/Documents/Research/river-torch/docs/examples/example_ae.ipynb#ch0000000?line=2'>3</a>\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mriver_torch\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39manomaly\u001b[39;00m \u001b[39mimport\u001b[39;00m Autoencoder\n",
"File \u001b[0;32m~/Documents/Research/river-torch/river_torch/__init__.py:1\u001b[0m, in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m \u001b[39mimport\u001b[39;00m anomaly, classification, regression, utils\n\u001b[1;32m 2\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39m__version__\u001b[39;00m \u001b[39mimport\u001b[39;00m __version__ \u001b[39m# noqa: F401\u001b[39;00m\n\u001b[1;32m 4\u001b[0m __all__ \u001b[39m=\u001b[39m [\u001b[39m\"\u001b[39m\u001b[39manomaly\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mclassification\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mregression\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mutils\u001b[39m\u001b[39m\"\u001b[39m]\n",
"File \u001b[0;32m~/Documents/Research/river-torch/river_torch/anomaly/__init__.py:7\u001b[0m, in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mrolling_ae\u001b[39;00m \u001b[39mimport\u001b[39;00m RollingWindowAutoencoder\n\u001b[1;32m 6\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mbase\u001b[39;00m \u001b[39mimport\u001b[39;00m AutoEncoder\n\u001b[0;32m----> 7\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mscaler\u001b[39;00m \u001b[39mimport\u001b[39;00m (\n\u001b[1;32m 8\u001b[0m StandardScaler,\n\u001b[1;32m 9\u001b[0m MeanScaler,\n\u001b[1;32m 10\u001b[0m MinMaxScaler,\n\u001b[1;32m 11\u001b[0m RollingStandardScaler,\n\u001b[1;32m 12\u001b[0m AdaptiveStandardScaler,\n\u001b[1;32m 13\u001b[0m RollingMinMaxScaler,\n\u001b[1;32m 14\u001b[0m RollingMeanScaler,\n\u001b[1;32m 15\u001b[0m AdaptiveMeanScaler,\n\u001b[1;32m 16\u001b[0m )\n\u001b[1;32m 18\u001b[0m __all__ \u001b[39m=\u001b[39m [\n\u001b[1;32m 19\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mAutoEncoder\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m 20\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mVariationalAutoencoder\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mAdaptiveMeanScaler\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m 32\u001b[0m ]\n",
"File \u001b[0;32m~/Documents/Research/river-torch/river_torch/anomaly/__init__.py:7\u001b[0m, in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mrolling_ae\u001b[39;00m \u001b[39mimport\u001b[39;00m RollingWindowAutoencoder\n\u001b[1;32m 6\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mbase\u001b[39;00m \u001b[39mimport\u001b[39;00m Autoencoder\n\u001b[0;32m----> 7\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mscaler\u001b[39;00m \u001b[39mimport\u001b[39;00m (\n\u001b[1;32m 8\u001b[0m StandardScaler,\n\u001b[1;32m 9\u001b[0m MeanScaler,\n\u001b[1;32m 10\u001b[0m MinMaxScaler,\n\u001b[1;32m 11\u001b[0m RollingStandardScaler,\n\u001b[1;32m 12\u001b[0m AdaptiveStandardScaler,\n\u001b[1;32m 13\u001b[0m RollingMinMaxScaler,\n\u001b[1;32m 14\u001b[0m RollingMeanScaler,\n\u001b[1;32m 15\u001b[0m AdaptiveMeanScaler,\n\u001b[1;32m 16\u001b[0m )\n\u001b[1;32m 18\u001b[0m __all__ \u001b[39m=\u001b[39m [\n\u001b[1;32m 19\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mAutoencoder\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m 20\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mVariationalAutoencoder\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mAdaptiveMeanScaler\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m 32\u001b[0m ]\n",
"File \u001b[0;32m~/Documents/Research/river-torch/river_torch/anomaly/scaler.py:105\u001b[0m, in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 100\u001b[0m score \u001b[39m=\u001b[39m raw_score \u001b[39m/\u001b[39m mean\n\u001b[1;32m 102\u001b[0m \u001b[39mreturn\u001b[39;00m score\n\u001b[0;32m--> 105\u001b[0m \u001b[39mclass\u001b[39;00m \u001b[39mMinMaxScaler\u001b[39;00m(AnomalyScaler):\n\u001b[1;32m 106\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__init__\u001b[39m(\u001b[39mself\u001b[39m, anomaly_detector: anomaly\u001b[39m.\u001b[39mAnomalyDetector):\n\u001b[1;32m 107\u001b[0m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39m\u001b[39m__init__\u001b[39m(anomaly_detector)\n",
"File \u001b[0;32m~/Documents/Research/river-torch/river_torch/anomaly/scaler.py:106\u001b[0m, in \u001b[0;36mMinMaxScaler\u001b[0;34m()\u001b[0m\n\u001b[1;32m 105\u001b[0m \u001b[39mclass\u001b[39;00m \u001b[39mMinMaxScaler\u001b[39;00m(AnomalyScaler):\n\u001b[0;32m--> 106\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__init__\u001b[39m(\u001b[39mself\u001b[39m, anomaly_detector: anomaly\u001b[39m.\u001b[39;49mAnomalyDetector):\n\u001b[1;32m 107\u001b[0m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39m\u001b[39m__init__\u001b[39m(anomaly_detector)\n\u001b[1;32m 108\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmin \u001b[39m=\u001b[39m Min()\n",
"\u001b[0;31mAttributeError\u001b[0m: partially initialized module 'river_torch.anomaly' has no attribute 'AnomalyDetector' (most likely due to a circular import)"
Expand All @@ -29,7 +29,7 @@
"source": [
"from river import datasets, metrics\n",
"from river_torch.anomaly.nn_builder import get_fc_encoder, get_fc_decoder\n",
"from river_torch.anomaly import AutoEncoder"
"from river_torch.anomaly import Autoencoder"
]
},
{
Expand Down Expand Up @@ -62,7 +62,7 @@
"dataset = datasets.CreditCard().take(5000)\n",
"metric = metrics.ROCAUC()\n",
"\n",
"model = AutoEncoder(encoder_fn=encoder_fn,decoder_fn=decoder_fn, lr=0.01, n_features=5)\n",
"model = Autoencoder(encoder_fn=encoder_fn,decoder_fn=decoder_fn, lr=0.01, n_features=5)\n",
"\n",
"for x,y in dataset:\n",
" score = model.score_one(x)\n",
Expand Down
6 changes: 3 additions & 3 deletions river_torch/anomaly/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from .base import AutoEncoder
from .base import Autoencoder
from .probability_weighted_ae import ProbabilityWeightedAutoencoder
from .variational_ae import VariationalAutoencoder
from .rolling_ae import RollingWindowAutoencoder

from .base import AutoEncoder, AnomalyScaler
from .base import Autoencoder, AnomalyScaler
from .scaler import (
StandardScaler,
MeanScaler,
Expand All @@ -16,7 +16,7 @@
)

__all__ = [
"AutoEncoder",
"Autoencoder",
"VariationalAutoencoder",
"RollingWindowAutoencoder",
"WindowedStandardizer",
Expand Down
28 changes: 27 additions & 1 deletion river_torch/anomaly/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from river_torch.utils import dict2tensor, get_loss_fn, get_optimizer_fn


class AutoEncoder(anomaly.AnomalyDetector, nn.Module):
class Autoencoder(anomaly.AnomalyDetector, nn.Module):
"""
Base Auto Encoder
----------
Expand Down Expand Up @@ -182,9 +182,35 @@ def configure_optimizers(self):


class AnomalyScaler(base.Wrapper, anomaly.AnomalyDetector):
"""AnomalyScaler is a wrapper around an anomaly detector that scales the output of the model.
Parameters
----------
anomaly_detector
"""

def __init__(self, anomaly_detector: anomaly.AnomalyDetector):
self.anomaly_detector = anomaly_detector

@classmethod
def _unit_test_params(self):
yield {"anomaly_detector": anomaly.HalfSpaceTrees()}

@classmethod
def _unit_test_skips(self):
"""Indicates which checks to skip during unit testing.
Most estimators pass the full test suite. However, in some cases, some estimators might not
be able to pass certain checks.
"""
return {
"check_pickling",
"check_shuffle_features_no_impact",
"check_emerging_features",
"check_disappearing_features",
"check_predict_proba_one",
"check_predict_proba_one_binary",
}

@property
def _wrapped_model(self):
return self.anomaly_detector
Expand Down
5 changes: 3 additions & 2 deletions river_torch/anomaly/probability_weighted_ae.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
from river_torch.anomaly import base
from river_torch.utils import dict2tensor

class ProbabilityWeightedAutoencoder(base.AutoEncoder):

class ProbabilityWeightedAutoencoder(base.Autoencoder):
"""
A propability weighted auto encoder
----------
Expand Down Expand Up @@ -72,4 +73,4 @@ def learn_one(self, x):
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
return self
return self
5 changes: 3 additions & 2 deletions river_torch/anomaly/rolling_ae.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
from river_torch.anomaly import base
from river_torch.utils import dict2tensor

class RollingWindowAutoencoder(base.AutoEncoder):

class RollingWindowAutoencoder(base.Autoencoder):
"""
A rolling window auto encoder
----------
Expand Down Expand Up @@ -62,4 +63,4 @@ def learn_one(self, x):
if len(self._x_window) == self.window_size:
x = torch.concat(list(self._x_window.values))
self._learn_batch(x=x)
return self
return self
72 changes: 65 additions & 7 deletions river_torch/anomaly/scaler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,22 @@


class StandardScaler(AnomalyScaler):
"""Wrapper around an anomaly detector that standardizes the model's output using incremental mean and variance metrics.
Parameters
----------
anomaly_detector
with_std : bool
"""

def __init__(self, anomaly_detector, with_std=True):
super().__init__(anomaly_detector)
self.mean = Mean()
self.sq_mean = Mean()
self.with_std = with_std

def score_one(self, *args) -> float:
"""Return calibrated anomaly score based on raw score provided by the wrapped anomaly detector.
"""Return scaled anomaly score based on raw score provided by the wrapped anomaly detector.
A high score is indicative of an anomaly. A low score corresponds to a normal observation.
Parameters
Expand All @@ -33,7 +41,7 @@ def score_one(self, *args) -> float:
return score

def score_many(self, *args):
"""Return calibrated anomaly scores based on raw scores provided by the wrapped anomaly detector.
"""Return scaled anomaly scores based on raw scores provided by the wrapped anomaly detector.
A high score is indicative of an anomaly. A low score corresponds to a normal observation.
Parameters
Expand All @@ -57,12 +65,18 @@ def score_many(self, *args):


class MeanScaler(AnomalyScaler):
""" Wrapper around an anomaly detector that scales the model's output by the incremental mean of previous scores.
Parameters
----------
anomaly_detector
"""
def __init__(self, anomaly_detector):
super().__init__(anomaly_detector=anomaly_detector)
self.mean = Mean()

def score_one(self, *args) -> float:
"""Return calibrated anomaly score based on raw score provided by the wrapped anomaly detector.
"""Return scaled anomaly score based on raw score provided by the wrapped anomaly detector.
A high score is indicative of an anomaly. A low score corresponds to a normal observation.
Parameters
Expand All @@ -81,7 +95,7 @@ def score_one(self, *args) -> float:
return score

def score_many(self, *args) -> float:
"""Return calibrated anomaly scores based on raw scores provided by the wrapped anomaly detector.
"""Return scaled anomaly scores based on raw scores provided by the wrapped anomaly detector.
A high score is indicative of an anomaly. A low score corresponds to a normal observation.
Parameters
Expand All @@ -101,13 +115,19 @@ def score_many(self, *args) -> float:


class MinMaxScaler(AnomalyScaler):
"""Wrapper around an anomaly detector that scales the model's output to $[0, 1]$ using rolling min and max metrics.
Parameters
----------
anomaly_detector
"""
def __init__(self, anomaly_detector):
super().__init__(anomaly_detector)
self.min = Min()
self.max = Max()

def score_one(self, *args) -> float:
"""Return calibrated anomaly score based on raw score provided by the wrapped anomaly detector.
"""Return scaled anomaly score based on raw score provided by the wrapped anomaly detector.
A high score is indicative of an anomaly. A low score corresponds to a normal observation.
Parameters
Expand All @@ -127,7 +147,7 @@ def score_one(self, *args) -> float:
return score

def score_many(self, *args) -> float:
"""Return calibrated anomaly score based on raw score provided by the wrapped anomaly detector.
"""Return scaled anomaly score based on raw score provided by the wrapped anomaly detector.
A high score is indicative of an anomaly. A low score corresponds to a normal observation.
Parameters
Expand All @@ -152,6 +172,15 @@ def score_many(self, *args) -> float:


class RollingStandardScaler(StandardScaler):
"""Wrapper around an anomaly detector that standardizes the model's output using rolling mean and variance metrics.
Parameters
----------
anomaly_detector
window_size
with_std : bool
"""

def __init__(self, anomaly_detector, window_size=250, with_std=True):
super().__init__(anomaly_detector=anomaly_detector)
self.window_size = window_size
Expand All @@ -161,15 +190,30 @@ def __init__(self, anomaly_detector, window_size=250, with_std=True):


class AdaptiveStandardScaler(StandardScaler):
"""Wrapper around an anomaly detector that standardizes the model's output using exponential running mean and variance metrics.
Parameters
----------
anomaly_detector
alpha
with_std
"""
def __init__(self, anomaly_detector, alpha=0.3, with_std=True):
super().__init__(anomaly_detector=anomaly_detector)
self.apha = alpha
self.alpha = alpha
self.mean = EWMean(alpha=alpha)
self.sq_mean = EWMean(alpha=alpha) if with_std else None
self.with_std = with_std


class RollingMinMaxScaler(MinMaxScaler):
"""Wrapper around an anomaly detector that scales the model's output to $[0, 1]$ using rolling min and max metrics.
Parameters
----------
anomaly_detector
window_size
"""
def __init__(self, anomaly_detector, window_size=250):
super().__init__(anomaly_detector=anomaly_detector)
self.window_size = window_size
Expand All @@ -178,13 +222,27 @@ def __init__(self, anomaly_detector, window_size=250):


class RollingMeanScaler(MeanScaler):
""" Wrapper around an anomaly detector that scales the model's output by the rolling mean of previous scores.
Parameters
----------
anomaly_detector
window_size
"""
def __init__(self, anomaly_detector, window_size=250):
super().__init__(anomaly_detector)
self.window_size = window_size
self.mean = RollingMean(window_size=window_size)


class AdaptiveMeanScaler(MeanScaler):
""" Wrapper around an anomaly detector that scales the model's output by the exponential running mean of previous scores.
Parameters
----------
anomaly_detector
alpha
"""
def __init__(self, anomaly_detector, alpha=0.3):
super().__init__(anomaly_detector)
self.alpha = alpha
Expand Down
2 changes: 1 addition & 1 deletion river_torch/anomaly/variational_ae.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from river_torch.utils import dict2tensor


class VariationalAutoencoder(base.AutoEncoder):
class VariationalAutoencoder(base.Autoencoder):
"""
A propability weighted auto encoder
----------
Expand Down

0 comments on commit d80a8ec

Please sign in to comment.