forked from mlflow/mlflow
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix mlflow#825: add matplotlib to fix missing module error under dock…
…er image (mlflow#827) * add matplotlib to fix missing module error under docker * restored prior version * Initial check-in * adapted to MLProject structure * Split example into platform-specific subdirectories * Add README explaining platform differences * README links * readme link fix * Reveert formatting changes to java readme
- Loading branch information
1 parent
748d1f7
commit 96b493c
Showing
7 changed files
with
174 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# Scikit-learn ElasticNet Diabetes Example | ||
|
||
This example trains an ElasticNet regression model for predicting diabetes progression. The example uses [matplotlib](https://matplotlib.org/), which requires different Python dependencies for Linux and OSX. The [linux](linux) and [osx](osx) subdirectories include appropriate MLflow projects for each respective platform. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
name: tutorial | ||
|
||
conda_env: conda.yaml | ||
|
||
entry_points: | ||
main: | ||
parameters: | ||
alpha: {type: float, default: 0.01} | ||
l1_ratio: {type: float, default: 0.1} | ||
command: "python train_diabetes.py {alpha} {l1_ratio}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
name: tutorial | ||
channels: | ||
- defaults | ||
dependencies: | ||
- cloudpickle=0.6.1 | ||
- python=3.6 | ||
- numpy=1.14.3 | ||
- matplotlib=3.0.2 | ||
- pandas=0.22.0 | ||
- scikit-learn=0.19.1 | ||
- pip: | ||
- mlflow |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
name: tutorial | ||
|
||
conda_env: conda.yaml | ||
|
||
entry_points: | ||
main: | ||
parameters: | ||
alpha: {type: float, default: 0.01} | ||
l1_ratio: {type: float, default: 0.1} | ||
command: "pythonw train_diabetes.py {alpha} {l1_ratio}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
name: tutorial | ||
channels: | ||
- defaults | ||
dependencies: | ||
- cloudpickle=0.6.1 | ||
- python=3.6 | ||
- numpy=1.14.3 | ||
- matplotlib=3.0.2 | ||
- pandas=0.22.0 | ||
- scikit-learn=0.19.1 | ||
- python.app | ||
- pip: | ||
- mlflow |
125 changes: 125 additions & 0 deletions
125
examples/sklearn_elasticnet_diabetes/osx/train_diabetes.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
# | ||
# train_diabetes.py | ||
# | ||
# MLflow model using ElasticNet (sklearn) and Plots ElasticNet Descent Paths | ||
# | ||
# Uses the sklearn Diabetes dataset to predict diabetes progression using ElasticNet | ||
# The predicted "progression" column is a quantitative measure of disease progression one year after baseline | ||
# http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html | ||
# Combines the above with the Lasso Coordinate Descent Path Plot | ||
# http://scikit-learn.org/stable/auto_examples/linear_model/plot_lasso_coordinate_descent_path.html | ||
# Original author: Alexandre Gramfort <alexandre.gramfort@inria.fr>; License: BSD 3 clause | ||
# | ||
# Usage: | ||
# python train_diabetes.py 0.01 0.01 | ||
# python train_diabetes.py 0.01 0.75 | ||
# python train_diabetes.py 0.01 1.0 | ||
# | ||
|
||
import os | ||
import warnings | ||
import sys | ||
|
||
import pandas as pd | ||
import numpy as np | ||
from itertools import cycle | ||
import matplotlib.pyplot as plt | ||
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score | ||
from sklearn.model_selection import train_test_split | ||
from sklearn.linear_model import ElasticNet | ||
from sklearn.linear_model import lasso_path, enet_path | ||
from sklearn import datasets | ||
|
||
# Load Diabetes datasets | ||
diabetes = datasets.load_diabetes() | ||
X = diabetes.data | ||
y = diabetes.target | ||
|
||
# Create pandas DataFrame for sklearn ElasticNet linear_model | ||
Y = np.array([y]).transpose() | ||
d = np.concatenate((X, Y), axis=1) | ||
cols = diabetes.feature_names + ['progression'] | ||
data = pd.DataFrame(d, columns=cols) | ||
|
||
|
||
# Import mlflow | ||
import mlflow | ||
import mlflow.sklearn | ||
|
||
|
||
# Evaluate metrics | ||
def eval_metrics(actual, pred): | ||
rmse = np.sqrt(mean_squared_error(actual, pred)) | ||
mae = mean_absolute_error(actual, pred) | ||
r2 = r2_score(actual, pred) | ||
return rmse, mae, r2 | ||
|
||
|
||
|
||
if __name__ == "__main__": | ||
warnings.filterwarnings("ignore") | ||
np.random.seed(40) | ||
|
||
# Split the data into training and test sets. (0.75, 0.25) split. | ||
train, test = train_test_split(data) | ||
|
||
# The predicted column is "progression" which is a quantitative measure of disease progression one year after baseline | ||
train_x = train.drop(["progression"], axis=1) | ||
test_x = test.drop(["progression"], axis=1) | ||
train_y = train[["progression"]] | ||
test_y = test[["progression"]] | ||
|
||
alpha = float(sys.argv[1]) if len(sys.argv) > 1 else 0.05 | ||
l1_ratio = float(sys.argv[2]) if len(sys.argv) > 2 else 0.05 | ||
|
||
# Run ElasticNet | ||
lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42) | ||
lr.fit(train_x, train_y) | ||
predicted_qualities = lr.predict(test_x) | ||
(rmse, mae, r2) = eval_metrics(test_y, predicted_qualities) | ||
|
||
# Print out ElasticNet model metrics | ||
print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio)) | ||
print(" RMSE: %s" % rmse) | ||
print(" MAE: %s" % mae) | ||
print(" R2: %s" % r2) | ||
|
||
# Log mlflow attributes for mlflow UI | ||
mlflow.log_param("alpha", alpha) | ||
mlflow.log_param("l1_ratio", l1_ratio) | ||
mlflow.log_metric("rmse", rmse) | ||
mlflow.log_metric("r2", r2) | ||
mlflow.log_metric("mae", mae) | ||
mlflow.sklearn.log_model(lr, "model") | ||
|
||
|
||
# Compute paths | ||
eps = 5e-3 # the smaller it is the longer is the path | ||
|
||
print("Computing regularization path using the elastic net.") | ||
alphas_enet, coefs_enet, _ = enet_path(X, y, eps=eps, l1_ratio=l1_ratio, fit_intercept=False) | ||
|
||
# Display results | ||
fig = plt.figure(1) | ||
ax = plt.gca() | ||
|
||
colors = cycle(['b', 'r', 'g', 'c', 'k']) | ||
neg_log_alphas_enet = -np.log10(alphas_enet) | ||
for coef_e, c in zip(coefs_enet, colors): | ||
l2 = plt.plot(neg_log_alphas_enet, coef_e, linestyle='--', c=c) | ||
|
||
plt.xlabel('-Log(alpha)') | ||
plt.ylabel('coefficients') | ||
title = 'ElasticNet Path by alpha for l1_ratio = ' + str(l1_ratio) | ||
plt.title(title) | ||
plt.axis('tight') | ||
|
||
|
||
# Save figures | ||
fig.savefig("ElasticNet-paths.png") | ||
|
||
# Close plot | ||
plt.close(fig) | ||
|
||
# Log artifacts (output files) | ||
mlflow.log_artifact("ElasticNet-paths.png") |