Skip to content

Commit

Permalink
Fix mlflow#825: add matplotlib to fix missing module error under dock…
Browse files Browse the repository at this point in the history
…er image (mlflow#827)

* add matplotlib to fix missing module error under docker

* restored prior version

* Initial check-in

* adapted to MLProject structure

* Split example into platform-specific subdirectories

* Add README explaining platform differences

* README links

* readme link fix

* Reveert formatting changes to java readme
  • Loading branch information
jimthompson5802 authored and marcusrehm committed Feb 18, 2019
1 parent 748d1f7 commit 96b493c
Show file tree
Hide file tree
Showing 7 changed files with 174 additions and 1 deletion.
3 changes: 3 additions & 0 deletions examples/sklearn_elasticnet_diabetes/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Scikit-learn ElasticNet Diabetes Example

This example trains an ElasticNet regression model for predicting diabetes progression. The example uses [matplotlib](https://matplotlib.org/), which requires different Python dependencies for Linux and OSX. The [linux](linux) and [osx](osx) subdirectories include appropriate MLflow projects for each respective platform.
10 changes: 10 additions & 0 deletions examples/sklearn_elasticnet_diabetes/linux/MLproject
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
name: tutorial

conda_env: conda.yaml

entry_points:
main:
parameters:
alpha: {type: float, default: 0.01}
l1_ratio: {type: float, default: 0.1}
command: "python train_diabetes.py {alpha} {l1_ratio}"
12 changes: 12 additions & 0 deletions examples/sklearn_elasticnet_diabetes/linux/conda.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
name: tutorial
channels:
- defaults
dependencies:
- cloudpickle=0.6.1
- python=3.6
- numpy=1.14.3
- matplotlib=3.0.2
- pandas=0.22.0
- scikit-learn=0.19.1
- pip:
- mlflow
Original file line number Diff line number Diff line change
Expand Up @@ -122,4 +122,4 @@ def eval_metrics(actual, pred):
plt.close(fig)

# Log artifacts (output files)
mlflow.log_artifact("ElasticNet-paths.png")
mlflow.log_artifact("ElasticNet-paths.png")
10 changes: 10 additions & 0 deletions examples/sklearn_elasticnet_diabetes/osx/MLproject
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
name: tutorial

conda_env: conda.yaml

entry_points:
main:
parameters:
alpha: {type: float, default: 0.01}
l1_ratio: {type: float, default: 0.1}
command: "pythonw train_diabetes.py {alpha} {l1_ratio}"
13 changes: 13 additions & 0 deletions examples/sklearn_elasticnet_diabetes/osx/conda.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
name: tutorial
channels:
- defaults
dependencies:
- cloudpickle=0.6.1
- python=3.6
- numpy=1.14.3
- matplotlib=3.0.2
- pandas=0.22.0
- scikit-learn=0.19.1
- python.app
- pip:
- mlflow
125 changes: 125 additions & 0 deletions examples/sklearn_elasticnet_diabetes/osx/train_diabetes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
#
# train_diabetes.py
#
# MLflow model using ElasticNet (sklearn) and Plots ElasticNet Descent Paths
#
# Uses the sklearn Diabetes dataset to predict diabetes progression using ElasticNet
# The predicted "progression" column is a quantitative measure of disease progression one year after baseline
# http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html
# Combines the above with the Lasso Coordinate Descent Path Plot
# http://scikit-learn.org/stable/auto_examples/linear_model/plot_lasso_coordinate_descent_path.html
# Original author: Alexandre Gramfort <alexandre.gramfort@inria.fr>; License: BSD 3 clause
#
# Usage:
# python train_diabetes.py 0.01 0.01
# python train_diabetes.py 0.01 0.75
# python train_diabetes.py 0.01 1.0
#

import os
import warnings
import sys

import pandas as pd
import numpy as np
from itertools import cycle
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import lasso_path, enet_path
from sklearn import datasets

# Load Diabetes datasets
diabetes = datasets.load_diabetes()
X = diabetes.data
y = diabetes.target

# Create pandas DataFrame for sklearn ElasticNet linear_model
Y = np.array([y]).transpose()
d = np.concatenate((X, Y), axis=1)
cols = diabetes.feature_names + ['progression']
data = pd.DataFrame(d, columns=cols)


# Import mlflow
import mlflow
import mlflow.sklearn


# Evaluate metrics
def eval_metrics(actual, pred):
rmse = np.sqrt(mean_squared_error(actual, pred))
mae = mean_absolute_error(actual, pred)
r2 = r2_score(actual, pred)
return rmse, mae, r2



if __name__ == "__main__":
warnings.filterwarnings("ignore")
np.random.seed(40)

# Split the data into training and test sets. (0.75, 0.25) split.
train, test = train_test_split(data)

# The predicted column is "progression" which is a quantitative measure of disease progression one year after baseline
train_x = train.drop(["progression"], axis=1)
test_x = test.drop(["progression"], axis=1)
train_y = train[["progression"]]
test_y = test[["progression"]]

alpha = float(sys.argv[1]) if len(sys.argv) > 1 else 0.05
l1_ratio = float(sys.argv[2]) if len(sys.argv) > 2 else 0.05

# Run ElasticNet
lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
lr.fit(train_x, train_y)
predicted_qualities = lr.predict(test_x)
(rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

# Print out ElasticNet model metrics
print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
print(" RMSE: %s" % rmse)
print(" MAE: %s" % mae)
print(" R2: %s" % r2)

# Log mlflow attributes for mlflow UI
mlflow.log_param("alpha", alpha)
mlflow.log_param("l1_ratio", l1_ratio)
mlflow.log_metric("rmse", rmse)
mlflow.log_metric("r2", r2)
mlflow.log_metric("mae", mae)
mlflow.sklearn.log_model(lr, "model")


# Compute paths
eps = 5e-3 # the smaller it is the longer is the path

print("Computing regularization path using the elastic net.")
alphas_enet, coefs_enet, _ = enet_path(X, y, eps=eps, l1_ratio=l1_ratio, fit_intercept=False)

# Display results
fig = plt.figure(1)
ax = plt.gca()

colors = cycle(['b', 'r', 'g', 'c', 'k'])
neg_log_alphas_enet = -np.log10(alphas_enet)
for coef_e, c in zip(coefs_enet, colors):
l2 = plt.plot(neg_log_alphas_enet, coef_e, linestyle='--', c=c)

plt.xlabel('-Log(alpha)')
plt.ylabel('coefficients')
title = 'ElasticNet Path by alpha for l1_ratio = ' + str(l1_ratio)
plt.title(title)
plt.axis('tight')


# Save figures
fig.savefig("ElasticNet-paths.png")

# Close plot
plt.close(fig)

# Log artifacts (output files)
mlflow.log_artifact("ElasticNet-paths.png")

0 comments on commit 96b493c

Please sign in to comment.